In [None]:
import datetime

import pandas as pd

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

## Use the sample data: https://docs.microsoft.com/ja-jp/azure/open-datasets/dataset-us-producer-price-index-commodities?tabs=azureml-opendatasets
from azureml.opendatasets import UsLaborPPICommodity

import warnings

import matplotlib.pyplot as plt 

In [None]:
## Data import
labor = UsLaborPPICommodity()
labor_df = labor.to_pandas_dataframe()

In [None]:
labor_df.head()

In [None]:
## Omit average period "M13"
labor_df = labor_df[labor_df.period != 'M13']

In [None]:
labor_df.shape

In [None]:
## Extract month
labor_df['period_num'] = labor_df['period'].apply(lambda x: int(x[1:]))
## Assign Year-Month
labor_df['yyyymm'] = labor_df[['year', 'period_num']].apply(lambda x: datetime.datetime(x[0],x[1], 1), axis=1)

In [None]:
## Designate specific code
ItemCode = '120922'

df_item = labor_df.query("item_code == @ItemCode")

plt.plot(df_item['yyyymm'], df_item['value'])
plt.show()

In [None]:
## Define train-test split date
train_date = '2016-12-31'

## Split train and test data
df_train, df_test= df_item[df_item.yyyymm <= train_date], df_item[df_item.yyyymm > train_date]

## Check size
print(df_train.shape, df_test.shape)

In [None]:
## Calculate mean squared error for each order
def evaluate_arima_model(X_train, X_test, arima_order):
	# prepare training dataset
	history = list(X_train.copy())
	# make predictions
	predictions = []
	for val in X_test:
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit()
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(val)
	return mean_squared_error(X_test, predictions), predictions

## Grid search for each order with ARIMA models
def explore_min_mse(X_train, X_test, p_values, d_values, q_values):
	X_train, X_test = X_train.astype('float32'), X_test.astype('float32')
	fit_results = []
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p, d, q)
				try:
					## Model generation and evaluation for each order
					mse, predictions = evaluate_arima_model(X_train, X_test, order)
					fit_results.append([order, mse, predictions])
				except:
					continue
	return pd.DataFrame(fit_results, columns=['order', 'mse', 'predictions'])

In [None]:
## Define range of each parameter
p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)

## Train and test
warnings.filterwarnings("ignore")
df_forecast_results = explore_min_mse(df_train.value, df_test.value, p_values, d_values, q_values)

## Pick one candidate with the minimum errors
best_predictions = df_forecast_results.sort_values('mse', ascending=True).head(1)['predictions']

In [None]:
df_future_values = df_test[['value', 'yyyymm']].copy()

## Append predictive values
df_future_values['pred_value'] = list(best_predictions)[0]


In [None]:
## Plot the result

plt.figure(figsize=(8,6))
plt.plot(df_train['yyyymm'], df_train['value'], label='training data')
plt.plot(df_future_values['yyyymm'], df_future_values['value'],label="actual value")
plt.plot(df_future_values['yyyymm'], df_future_values['pred_value'],label="pred value")
plt.legend()
plt.xlabel('date')
plt.ylabel('values')
plt.title('Forecasting sample for {}'.format(ItemCode))

plt.show()