# ทำนายผลผลิตข้าวโพด โดยใช้ผลผลิตปีก่อน ๆ ด้วย Prophet

https://facebook.github.io/prophet/docs/quick_start.html

In [1]:
import numpy as np
import pandas as pd
from fbprophet import Prophet

import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls

from fbprophet.plot import plot_plotly, plot_components_plotly

py.init_notebook_mode(connected=True)

Get data

In [2]:
yield_file = 'OAE-process/OAE-ผลผลิตข้าวโพดทั้งหมด.xlsx'
df = pd.read_excel(yield_file)

areas = df.groupby(['area']).sum().sort_values(by='value', ascending=False)

In [3]:
def data_in_area(df, area):
    df = df.sort_values(by='date', ascending=False)
    df_area = df[df.area == area].reset_index()
    df_area = df_area[['date', 'value']]
    df_area.columns = ['ds', 'y']
    return df_area

In [4]:
def resample_year2month(df):
    df = df.set_index('date')
    df.index.name = 'ds'
    return df.resample('MS').asfreq().fillna(method='ffill')

In [5]:
cost_file = 'OAE-process/OAE-ต้นทุนรวมต่อไร่ข้าวโพดเลี้ยงสัตว์.xlsx'
df_cost = pd.read_excel(cost_file)
df_cost_spl = resample_year2month(df_cost)

precipitation_file = 'OAE-process/OAE-ปริมาณน้ำฝน-ฝนตก.xlsx'
df_precipitation = pd.read_excel(precipitation_file)
df_precipitation_nan = data_in_area(df_precipitation,'Nan').set_index('ds')

rainday_file = 'OAE-process/OAE-ปริมาณน้ำฝน-จำนวนวันฝนตก.xlsx'
df_rainday = pd.read_excel(rainday_file)
df_rainday_nan = data_in_area(df_rainday,'Nan').set_index('ds')

price_file = 'OAE-process/OAE-ราคาข้าวโพดเลี้ยงสัตว์.xlsx'
df_price = pd.read_excel(price_file)
df_price_spl = df_price.set_index('date')

Util functions

In [6]:
def add_features(df):
    df = df.join(df_cost_spl['value'], on='ds').rename(columns={'value':'cost'})
    df = df.join(df_precipitation_nan['y'].rename('precipitation'), on='ds')
    df = df.join(df_rainday_nan['y'].rename('rainday'), on='ds')
    df = df.join(df_price_spl['value'], on='ds').rename(columns={'value':'price'})
    return df.sort_values(by='ds', ascending=False).fillna(method='bfill')

def is_harvest_season(ds):
    date = pd.to_datetime(ds)
    return (date.month >= 10 and date.month <= 12)

# Dashboard

In [7]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline

In [25]:
@widgets.interact_manual(Province=areas.index[:30])
def predict(Province=areas.index[0]):
    df_yield = data_in_area(df, Province)
    print(f'Total Yield from {df.date.min().strftime("%b %Y")} to {df.date.max().strftime("%b %Y")} = {areas.loc[Province,"value"]}')
    
    df_train, df_test = df_yield[12:], df_yield[:12]
    df_train = add_features(df_train)
    df_test = add_features(df_test)
    df_train['on_season'] = df_train['ds'].apply(is_harvest_season)
    
    df_yield['year'] = pd.DatetimeIndex(df_yield['ds']).year
    print(df_yield.groupby('year').sum().head())
    
    model = Prophet(changepoint_prior_scale=0.01, seasonality_prior_scale=0.1, holidays_prior_scale=10)
    model.add_seasonality(name='monthly_on_season', period=30.5, fourier_order=3, condition_name='on_season')
    for col in ['cost', 'precipitation', 'rainday', 'price']:
        model.add_regressor(col, prior_scale=0.1, mode='multiplicative')

    model.fit(df_train)

    future = model.make_future_dataframe(periods=24, freq="MS")
    future = add_features(future)
    future['on_season'] = future['ds'].apply(is_harvest_season)

    forecast = model.predict(future)
    forecast.yhat.clip(lower=0, inplace=True)
    forecast = forecast.sort_values(by='ds', ascending=False).reset_index().drop(columns=['index'])
    
    # Sanity check that we are evaluating the forecase at the same date
    assert(np.all(forecast[12:24]['ds'].to_numpy() == df_test['ds'].to_numpy()))
    # Calculate root mean squared error.
    print('RMSE: %f' % np.sqrt(np.mean((forecast[12:24]['yhat'].to_numpy() - df_test['y'].to_numpy())**2)) )
    print('MAE: %f' % np.mean(np.abs(forecast[12:24]['yhat'].to_numpy() - df_test['y'].to_numpy())) )
    
    total_error = abs(1 - (np.sum(forecast[12:24]['yhat'].to_numpy()) / np.sum(df_test['y'].to_numpy()))) * 100
    print('Total Error: %.3f' % total_error)
    print('Forecast:', np.sum(forecast[12:24]['yhat'].to_numpy()))
    print('Ground Truth:', np.sum(df_test['y'].to_numpy()))
    
    py.iplot([
        go.Scatter(x=df_train['ds'], y=df_train['y'], name='train', line=dict(width=3)),
        go.Scatter(x=forecast['ds'], y=forecast['yhat'], name='yhat', line=dict(width=3)),
#         go.Scatter(x=forecast['ds'], y=forecast['yhat_upper'], fill='tonexty', mode='none', name='upper'),
#         go.Scatter(x=forecast['ds'], y=forecast['yhat_lower'], fill='tonexty', mode='none', name='lower'),
        go.Scatter(x=forecast['ds'], y=forecast['trend'], name='Trend'),
        go.Scatter(x=df_test['ds'], y=df_test['y'], name='test', marker=dict(color='blue', size=12), line=dict(width=3)),
    ])
    
    py.iplot(plot_plotly(model, forecast))

    py.iplot(plot_components_plotly(model, forecast))
    
#     import plotly.io as pio
#     fig = go.Figure([
#         go.Scatter(x=df_train['ds'], y=df_train['y'], name='train', line=dict(width=3)),
#         go.Scatter(x=forecast['ds'], y=forecast['yhat'], name='yhat', line=dict(width=3)),
# #         go.Scatter(x=forecast['ds'], y=forecast['yhat_upper'], fill='tonexty', mode='none', name='upper'),
# #         go.Scatter(x=forecast['ds'], y=forecast['yhat_lower'], fill='tonexty', mode='none', name='lower'),
#         go.Scatter(x=forecast['ds'], y=forecast['trend'], name='Trend'),
#         go.Scatter(x=df_test['ds'], y=df_test['y'], name='test', marker=dict(color='blue', size=12), line=dict(width=3)),
#     ])
#     fig.update_layout(title_text='Corn Production at ' + Province)
#     pio.write_html(fig, file=f'corn_prod_{Province}.html', auto_open=True)
    

interactive(children=(Dropdown(description='Province', options=('Phetchabun', 'Nakhon Ratchasima', 'Nan', 'Tak…