In [None]:
#Import all necessary libraries
#------------------------------
import os
import plotly.io as pio
import plotly.express as px
import regex as re
import pandas as pd
import matplotlib 
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
#Open raw data
#-----------------------------------------------------------------------

path_csv = '/Users/mattiaficarelli/Documents/GitHub/nhsx/multiverse_sales_modelling/raw_data/sales_data_raw.csv'
df = pd.read_csv(path_csv)
df_1 = df[['PurchaseOrderID','OrderDate']]
df_1['Orders'] = 1
df_2 = df_1.groupby("OrderDate").sum().drop(columns=['PurchaseOrderID']).reset_index()
df_2['y'] = df_2['Orders'].cumsum()
df_3 = df_2.rename(columns = {"OrderDate": "ds"}).drop(columns=['Orders']).reset_index(drop = True)
df_prophet = df_3.copy()

#Group data by month
#-----------------------------------------------------------------------
df_prophet['ds'] = pd.to_datetime(df_prophet['ds']).dt.strftime('%Y-%m')
df_prophet_1 = df_prophet.groupby('ds').sum().reset_index()

#Intialise prophet model
#-------------------------------------------------------------------
model = Prophet()
model.fit(df_prophet_1)

In [None]:
#Save raw data to .csv
#-----------------------------------------------------------------------
df_prophet.to_csv("data/cleaned_data_prophet.csv")

In [None]:
# Check accuracy of model by aligning it to 18 months of data (2015-01 to 2016-05)
#---------------------------------------------------------------------------------

historical_check = list()
for i in range(1, 13):
	date = '2015-%01d' % i
	historical_check.append([date])
for i in range(1, 6):
	date = '2016-%01d' % i
	historical_check.append([date])
historical_check = pd.DataFrame(historical_check)
historical_check.columns = ['ds']
historical_check['ds']= pd.to_datetime(historical_check['ds'])
historical_check_forecast = model.predict(historical_check)


# summarize the historical check forecast in a df
#-------------------------------------------------

historical_forecast = pd.DataFrame(historical_check_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])

#Format and plot graphical output of model
#------------------------------------------

model.plot(historical_check_forecast)
matplotlib.pyplot.xlabel("Date")
matplotlib.pyplot.ylabel("Total sales")


In [None]:
# Use model to predict 18 months of data (2016-05 to 2017-12)
#---------------------------------------------------------------------------------

future = list()
for i in range(6, 13):
	date = '2016-%01d' % i
	future.append([date])
for i in range(1, 13):
	date = '2017-%01d' % i
	future.append([date])
future = pd.DataFrame(future)
future.columns = ['ds']
future['ds']= pd.to_datetime(future['ds'])
forecast = model.predict(future)

# summarize the 18 month predicted forecast in a df
#-------------------------------------------------

future_forecast = pd.DataFrame(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])


#Format and plot graphical output of future forecast
#---------------------------------------------------
model.plot(forecast)
matplotlib.pyplot.xlabel("Date")
matplotlib.pyplot.ylabel("Total sales")



In [None]:
# Use manually evalute forecast model by retaining last 6 months of data
#---------------------------------------------------------------------------------

train = df_prophet_1.drop(df_prophet_1.index[-6:])

# Define and fit the model to data (with last 6 months hidden from it)
#---------------------------------------------------------------------------------
model = Prophet()
model.fit(train)

# Define and fit the model to data (with last 6 months hidden from it)
#---------------------------------------------------------------------------------

future = list()
for i in range(1, 6):
	date = '2016-%01d' % i
	future.append([date])
future = pd.DataFrame(future)
future.columns = ['ds']
future['ds'] = pd.to_datetime(future['ds'])

# use the model to make a forecast
forecast = model.predict(future)

# calculate MAE between expected and predicted values for december
y_true = df_prophet_1['y'][-12:].values
y_pred = forecast['yhat'].values

# plot expected vs actual
#---------------------------------------------------------------------------------
model.plot(forecast)
matplotlib.pyplot.xlabel("Month")
matplotlib.pyplot.ylabel("Total Sales")


In [None]:
#Calculate the mean absolute error and mean absolute percentage error of model
#---------------------------------------------------------------------------------
mape = mean_absolute_percentage_error(y_true, y_pred)
mape