## Read Data

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing
import matplotlib as mpl
import matplotlib.pyplot as plt   # data visualization
import seaborn as sns  

In [None]:
df = pd.read_csv('data_file.csv')

df.head()

In [None]:
df.info()

In [None]:
#df.value_counts()

In [None]:
#df['id'].unique()

In [None]:
df.groupby(['id'])

In [None]:
df = df.sort_values(by=['year','month'],ascending=(True,True))

In [None]:
#df.tail(10)

In [None]:
def df_practice(i):
    df_id = df[df['id']==i]
    return df_id

## Prepare Data for Modelling

In [None]:
df.month=df.month.astype(str)

In [None]:
df.year=df.year.astype(str)

In [None]:
df.head(1)

In [None]:
df['ds'] = pd.to_datetime(df['month']+df['year'],format='%m%Y',errors='coerce')

In [None]:
df_prophet = df1[['ds','production']]
df_prophet.columns = ['ds','y']

In [None]:
### Get data for practice 1
df1 = df[df['id']==1]
df1.tail(3)

## Use fbprophet for Modelling

In [None]:
from prophet import Prophet

In [None]:
# define the model
model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=0.4,
                    seasonality_prior_scale=1.0, n_changepoints=100)
# fit the model with seasonality
model.fit(df_prophet)

In [None]:
# define the period for which we want a prediction
future = list()
# use last year data to make in-sample predictiom
for i in range(1, 13):
	date = '2020-%02d' % i
	future.append([date])
future = DataFrame(future)
future.columns = ['ds']
future['ds']= to_datetime(future['ds'])

In [None]:
# use the model to make a forecast
forecast = model.predict(future)
# summarize the forecast
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())
# plot forecast
model.plot(forecast)
pyplot.show()

## Calculate MAPE

In [None]:
import numpy as np
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
y_pred = forecast['yhat']
type(y_pred.values)

In [None]:
y_true = df_prophet['y'][-12:]
type(y_true.values)


In [None]:
MAPE = mean_absolute_percentage_error(y_true.values, y_pred.values)
MAPE

## Improve the Model

In [None]:
pip install dask

In [None]:
from dask.distributed import Client
client = Client()  # connect to the cluster


In [None]:
import itertools
import numpy as np
import pandas as pd


from sklearn.model_selection import ParameterGrid
param_grid = {'seasonality_mode':('multiplicative','additive'),
               'changepoint_prior_scale':[0.4,0.5],
              'seasonality_prior_scale': [1.0, 10.0],
              'n_changepoints' : [100,150,200]}



#param_grid = {  
#    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
#    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
#}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
mape = []  # Store the MAPEs for each params here

grid = ParameterGrid(param_grid)
cnt = 0
for p in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)


In [None]:
# Use cross validation to evaluate all parameters
for params in all_params:
    model = Prophet(**params).fit(df_prophet)  # Fit model with given params
    df_cv = cross_validation(model, horizon='90 days', parallel="dask")
    df_p = performance_metrics(df_cv, rolling_window=0.1)
    mape.append(df_p['mape'].values[0])

In [None]:
# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape
print(tuning_results)

In [None]:
best_params = all_params[np.argmin(mape)]
print(best_params)
# {'seasonality_mode': 'multiplicative', 'changepoint_prior_scale': 0.4, 'seasonality_prior_scale': 1.0, 'n_changepoints': 100}

## Make Out-of-Sample Predictions and Write Results

In [None]:
for i in df['id'].unique():
    df_i = df_practice(i)
    df_prophet = df_i[['ds','production']]
    df_prophet.columns = ['ds','y']
    # define the model
    model = Prophet(seasonality_mode='multiplicative', changepoint_prior_scale=0.4,
                    seasonality_prior_scale=1.0, n_changepoints=100)
    # fit the model with seasonality
    model.fit(df_prophet)
    # define the period for which we want a prediction
    future = list()
    # for next year data to make out-of-sample predictiom
    for j in range(1, 5):
	    date = '2021-%02d' % j
	    future.append([date])
    future = DataFrame(future)
    future.columns = ['ds']
    future['ds']= to_datetime(future['ds'])
    # use the model to make a forecast
    forecast = model.predict(future)
    y_pred = forecast['yhat']
    # write results
    forecast['id'] = i
    forecast['year'] = pd.DatetimeIndex(forecast['ds']).year
    forecast['month'] = pd.DatetimeIndex(forecast['ds']).month
    forecast['production'] = forecast['yhat']
    result_df = forecast[['id','year','month','production']]
    result_df.to_csv('results.csv',mode = 'a',header=False, index=False)