# ทำนายผลผลิตการเกษตร โดยใช้ผลผลิตปีก่อน ๆ ด้วย Prophet

https://facebook.github.io/prophet/docs/quick_start.html

ขั้นตอนติดตั้ง ให้สร้าง environment ที่ชื่อ oae โดยใช้ environment.yml
และคัดลอกข้อมูล OAE-process มาไว้ใน folder นี้

In [1]:
import numpy as np
import pandas as pd

import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm


from utils.preproc import read_yield, get_yeild_area, read_annually_data, read_month_data, read_month_data_area, add_regressor
from utils.preproc import rice_inseason_harvest_season, rice_offseason_harvest_season, corn_harvest_season, cassava_harvest_season, rubber_harvest_season

py.init_notebook_mode(connected=True)

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import warnings
warnings.filterwarnings("ignore")

Model parameters

In [2]:
# rice in-season
params = {
    'rice_off-season': {
        'yield_file' : 'OAE-process/OAE-ผลผลิตข้าวนาปรัง.xlsx',
        'cost_file' : 'OAE-process/OAE-ต้นทุนรวมต่อไร่ข้าวนาปรัง.xlsx',
        'price_file' : 'OAE-process/OAE-ราคาข้าวเปลือกเจ้า.xlsx',
        'p': 1,
        'd': 0,
        'q': 0,
        'P': 0,
        'D': 1,
        'Q': 1,
    },
    
    'corn': {
        'yield_file' : 'OAE-process/OAE-ผลผลิตข้าวโพดทั้งหมด.xlsx',
        'cost_file' : 'OAE-process/OAE-ต้นทุนรวมต่อไร่ข้าวโพดเลี้ยงสัตว์.xlsx',
        'price_file' : 'OAE-process/OAE-ราคาข้าวโพดเลี้ยงสัตว์.xlsx',
        'p': 2,
        'd': 0,
        'q': 0,
        'P': 0,
        'D': 1,
        'Q': 1,
    },
}

## User Param: Select Product Type

วิธีใช้ ขั้นที่ 1: เลือกชนิดของผลผลิตทางการเกษตรที่ต้องการ ในตัวแปร `ptype` เช่น ข้าวนาปี (rice_in-season), ข้าวนาปรัง (rice_off-season), ข้าวโพด (corn), ปาล์มน้ำมัน (palm), มันสำปะหลัง (cassava)

In [3]:
ptype = 'rice_off-season'

In [4]:
param = params[ptype]
yield_file = param['yield_file']
cost_file = param['cost_file']
price_file = param['price_file']

In [5]:
df, df_group = read_yield(yield_file)

df_cost = read_annually_data(cost_file)

precipitation_file = 'OAE-process/OAE-ปริมาณน้ำฝน-ฝนตก.xlsx'
df_precip = pd.read_excel(precipitation_file)

df_price = read_month_data(price_file)

In [6]:
df_yield = get_yeild_area(df, 'Phetchabun')
df_yield = df_yield.set_index('ds')
df_yield = df_yield.resample('MS').asfreq()

# Dashboard

ขั้นตอนที่ 2 รัน Dashboard

In [7]:
@widgets.interact_manual(Province=df_group.index)
def predict(Province=df_group.index[0]):
    df_yield = get_yeild_area(df, Province)
    print(f'Total Yield from {df.date.min().strftime("%b %Y")} to {df.date.max().strftime("%b %Y")} = {df_group.loc[Province,"value"]}')
    
    df_yield['year'] = pd.DatetimeIndex(df_yield['ds']).year
    print(df_yield.groupby('year').sum().head())
    
    df_precipitation = read_month_data_area(df_precip, Province)
    
    col_regressor = ['cost', 'precipitation', 'price']
    @add_regressor([df_cost, df_precipitation, df_price], col_regressor)
    def add_features(df):
        return df
    
    df_train, df_test = df_yield[12:], df_yield[:12]
    
    df_train = df_train.set_index('ds')
    df_train = df_train.resample('MS').asfreq()
    df_test = df_test.set_index('ds')
    df_test = df_test.resample('MS').asfreq()
    
    df_train = add_features(df_train).dropna()
    df_test = add_features(df_test).dropna()
    
    endog = df_train.loc[:, 'y']
    exog = df_train.loc[:, col_regressor]
    
    df_train = df_train.sort_index()
#     print(df_train)
    
    my_order = (param['p'],param['d'],param['q'])
    my_seasonal_order = (param['P'], param['D'], param['Q'], 12)
    
    # define model
    model = SARIMAX(endog=endog, exog=exog, order=my_order, seasonal_order=my_seasonal_order)

    model_fit = model.fit()
    print(model_fit.summary())
    
    df_test = df_test.sort_index()
    exog_forecast = df_test.loc[:, col_regressor]
    
#     print(exog_forecast)
    
    #get the predictions and residuals
    predictions = model_fit.forecast(len(exog_forecast), exog=exog_forecast)
    predictions.index = exog_forecast.index
    predictions.clip(lower=0, inplace=True)
    
    # Sanity check that we are evaluating the forecase at the same date
    assert(np.all(predictions.index.to_numpy() == df_test.index.to_numpy()))
    residuals = df_test['y'] - predictions
    mae = np.mean(np.abs(residuals))
    rmse = np.sqrt(np.mean(residuals**2))
    total_error = abs(1 - (np.sum(predictions) / np.sum(df_test['y']))) * 100
    
    # Calculate root mean squared error.
    print(f'\nPredict Yield from {df_test.index.min().strftime("%b %Y")} to {df_test.index.max().strftime("%b %Y")}')
    print('RMSE: %.2f' % rmse )
    print('MAE: %.2f' % mae )
    
    print(f"Total Error: {total_error:.3f} %")
    print(f"Forecast: {np.sum(predictions):.2f}")
    print('Ground Truth:', np.sum(df_test['y']))
    
    py.iplot([
        go.Scatter(x=df_train.index, y=df_train['y'], name='train', line=dict(width=3)),
        go.Scatter(x=predictions.index, y=predictions, name='yhat', line=dict(width=3)),
        go.Scatter(x=df_test.index, y=df_test['y'], name='test', marker=dict(color='blue', size=12), line=dict(width=3)),
    ])

interactive(children=(Dropdown(description='Province', options=('Suphan Buri', 'Phra Nakhon Si Ayutthaya', 'Ph…