In [None]:
import numpy  as np
import pandas as pd 
from random import uniform
from scipy.stats import pearsonr
from fbprophet import Prophet
import datetime, logging
logging.getLogger().setLevel(logging.CRITICAL)

from matplotlib import pyplot as plt 
%matplotlib inline 

from JModels import *

In [None]:
######################################################################
######################################################################

In [None]:
fn = "DATA/HUNT GOOGLE TRENDS.csv" 
df_hunt = pd.read_csv(fn) 
df_hunt.columns = ['ds','y'] 
df_hunt['ds'] = pd.to_datetime(df_hunt['ds']) 
print(len(df_hunt))  

In [None]:
fn = "DATA/MOWER GOOGLE TRENDS.csv" 
df_season = pd.read_csv(fn) 
df_season.columns = ['ds','y'] 
df_season['ds'] = pd.to_datetime(df_season['ds']) 
print(len(df_season))  

In [None]:
fn = "DATA/MOWER SALES TRAIN.csv"
df_train = pd.read_csv(fn)
df_train.columns = ['ds','y'] 
df_train['ds'] = pd.to_datetime(df_train['ds']) 
print(len(df_train)) 

In [None]:
fn = "DATA/MOWER SALES TEST.csv"
df_test = pd.read_csv(fn)
df_test.columns = ['ds','y'] 
df_test['ds'] = pd.to_datetime(df_test['ds'])  
print(len(df_test)) 

In [None]:
######################################################################
######################################################################

In [None]:
df_hunt.head()

In [None]:
# Create datetime index
# Shift all date values >> by +3 days
# Join to a daily data set
# Interpolate all missing values  

dates1 = df_hunt['ds']
dates2 = dates1 + datetime.timedelta(days=3) #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
df_hunt['ds'] = dates2
base_dates = pd.DataFrame()
base_dates['ds'] = pd.date_range(start=min(dates2),end=max(dates2)) 
df_hunt = pd.merge(base_dates,df_hunt,how='left',on='ds') 
df_hunt = df_hunt.interpolate()  

In [None]:
df_hunt.head()

In [None]:
TestSetSize = 365*2
df_hunt_train = df_hunt.iloc[:-TestSetSize]      #.head(TotalWeeks-TestSetSize) 
df_hunt_test  = df_hunt.iloc[-TestSetSize:]        #.tail(TestSetSize)  

In [None]:
x, y = df_hunt_train['ds'], df_hunt_train['y']
plt.figure(figsize=(11, 6)) 
plt.plot(x,y)
plt.title('Historical Search Index for "hunting blind"',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Index',size=16)
plt.show() 

In [None]:
past_days   =  0
future_days = 365*2

m = Prophet( 
    seasonality_mode = 'multiplicative'
) 

m.fit(df_hunt_train); 
fut = m.make_future_dataframe(future_days)   

#-----------------------------------------------------------------
dates1 = list(fut['ds'])
first_date = dates1[0] 
first_date = str(first_date)[:10] 
dates2 = list(pd.date_range(end=first_date,periods=past_days))[:-1] 
fut0 = pd.DataFrame({'ds': dates2})
fut  = pd.concat([fut0,fut]) 
fut.index = range(len(fut)) 

In [None]:
f1 = m.predict(fut) 
Cols = ['ds','yhat_lower','yhat','yhat_upper']
f2 = f1[Cols].copy()  
ForecastDF = f2.copy() 
ForecastDF.index = ForecastDF['ds']  
yhat_df = ForecastDF[['yhat']].copy() 
max_pred = yhat_df['yhat'].max() 

In [None]:
print('Total Forecast Size:',len(f2)) 

In [None]:
print('m.seasonality_prior_scale :',m.seasonality_prior_scale)
m.plot(f1)
plt.title('Expected Sales',size=18)
plt.xlabel('Date',size=14);
plt.ylabel('Sales',size=14);
plt.ylim([-0.1,max_pred*1.1]) 
plt.show() 

In [None]:
m.plot_components(f1); 

In [None]:
InSampleDF = pd.merge(df_hunt_train,f1[['ds','yhat']],how='left',on='ds')
actuals, preds = InSampleDF['y'], InSampleDF['yhat']

InSampleCor  = pearsonr(actuals,preds)[0]
InSampleRmse = simple_rmse(actuals,preds) 

print('In-Sample Correlation:',round(InSampleCor ,6)) 
print('In-Sample RMSE Error: ',round(InSampleRmse,6))    

In [None]:
OutSampleDF = pd.merge(df_hunt_test,f1[['ds','yhat']],how='left',on='ds') 
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat']

OutSampleCor  = pearsonr(actuals,preds)[0]
OutSampleRmse = simple_rmse(actuals,preds) 

print('Out-Sample Correlation:',round(OutSampleCor ,6)) 
print('Out-Sample RMSE Error: ',round(OutSampleRmse,6))  

In [None]:
actuals, preds = InSampleDF['y'], InSampleDF['yhat']
x_line = np.linspace(0,65,100)
plt.figure(figsize=(7,7))
plt.scatter(preds,actuals,alpha=0.2) 
plt.plot(x_line,x_line,color='green')
plt.title('Goodness-of-Fit Plot (Training Set)',size=20) 
plt.xlabel('Predicted Index Value',size=16)
plt.ylabel('Actual Index Value',size=16)
plt.show()  

In [None]:
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat']
x_line = np.linspace(0,65,100)
plt.figure(figsize=(7,7))
plt.scatter(preds,actuals,alpha=0.2) 
plt.plot(x_line,x_line,color='green')
plt.title('Goodness-of-Fit Plot (Testing Set)',size=20) 
plt.xlabel('Predicted Index Value',size=16)
plt.ylabel('Actual Index Value',size=16)
plt.show()  

In [None]:
yhat_df.head()

In [None]:
x1, y1 = df_hunt_train['ds'], df_hunt_train['y']
x2, y2 = df_hunt_test['ds'] , df_hunt_test['y'] 
x3, y3 = list(yhat_df.index), list(yhat_df['yhat']) 

plt.figure(figsize=(11, 6)) 
plt.plot(x1,y1,label='Training Set')
plt.plot(x2,y2,label='Testing Set')
plt.plot(x3,y3,label='Prediction')
plt.title('Historical Search Index for "hunting blind"',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Index',size=16)
plt.legend(loc='upper left')
plt.xlim([datetime.datetime(2014,1,1),max(x3)])
plt.show() 

In [None]:
######################################################################
######################################################################

In [None]:
#df_train.head() 

In [None]:
x, y = df_train['ds'], df_train['y']
plt.figure(figsize=(11, 6))
plt.plot(x,y)
plt.title('Historical Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Sales',size=16)
plt.show() 

In [None]:
######################################################################
######################################################################

In [None]:
past_days   =  0
future_days = 60  

In [None]:
m = Prophet( 
    seasonality_mode = 'multiplicative',
    mcmc_samples = 500,
    uncertainty_samples = 500 
) 

m.fit(df_train); 
fut = m.make_future_dataframe(future_days)   

#-----------------------------------------------------------------
dates1 = list(fut['ds'])
first_date = dates1[0] 
first_date = str(first_date)[:10] 
dates2 = list(pd.date_range(end=first_date,periods=past_days))[:-1] 
fut0 = pd.DataFrame({'ds': dates2})
fut  = pd.concat([fut0,fut]) 
fut.index = range(len(fut)) 

In [None]:
f1 = m.predict(fut)
Cols = ['ds','yhat_lower','yhat','yhat_upper']
f2 = f1[Cols].copy()  
ForecastDF = f2.copy() 
ForecastDF.index = ForecastDF['ds']  
yhat_df = ForecastDF[['yhat']].copy() 
max_pred = yhat_df['yhat'].max() 

In [None]:
print('Forecast Size:',len(f2)) 

In [None]:
print('m.seasonality_prior_scale :',m.seasonality_prior_scale)
m.plot(f1)
plt.title('Expected Sales',size=18)
plt.xlabel('Date',size=14);
plt.ylabel('Sales',size=14);
plt.ylim([-0.1,max_pred*1.1]) 
plt.show() 

In [None]:
m.plot_components(f1); 

In [None]:
######################################################################
######################################################################

In [None]:
#f1.head().T

In [None]:
InSampleDF = pd.merge(df_train,f1[['ds','yhat']],how='left',on='ds')
actuals, preds = InSampleDF['y'], InSampleDF['yhat']

InSampleCor  = pearsonr(actuals,preds)[0]
InSampleRmse = simple_rmse(actuals,preds) 

print('In-Sample Correlation:',round(InSampleCor ,6)) 
print('In-Sample RMSE Error: ',round(InSampleRmse,6))    

In [None]:
OutSampleDF = pd.merge(df_test,f1[['ds','yhat']],how='left',on='ds') 
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat']

OutSampleCor  = pearsonr(actuals,preds)[0]
OutSampleRmse = simple_rmse(actuals,preds) 

print('Out-Sample Correlation:',round(OutSampleCor ,6)) 
print('Out-Sample RMSE Error: ',round(OutSampleRmse,6))  

In [None]:
# Save these results for subsequent analysis: 
InSampleDF1  = InSampleDF.copy()
OutSampleDF1 = OutSampleDF.copy()

In [None]:
######################################################################
######################################################################

In [None]:
df_season.head()

In [None]:
# Create datetime index
# Shift all date values >> by +3 days
# Join to a daily data set
# Interpolate all missing values  

dates1 = df_season['ds']
dates2 = dates1 + datetime.timedelta(days=3) #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
df_season['ds'] = dates2
base_dates = pd.DataFrame()
base_dates['ds'] = pd.date_range(start=min(dates2),end=max(dates2)) 
df_season = pd.merge(base_dates,df_season,how='left',on='ds') 
df_season = df_season.interpolate() 

In [None]:
df_season.head()

In [None]:
x, y = df_season['ds'], df_season['y'] 
plt.figure(figsize=(11, 6))
plt.plot(x,y)
plt.title('Google Trends Index Over 5 Yrs for "buy lawn mower"',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Search Index',size=16)
plt.show() 

In [None]:
######################################################################

In [None]:
past_days   =   0
future_days = 400  

In [None]:
m = Prophet( 
    seasonality_mode = 'multiplicative'
) 

m.fit(df_season); 
fut = m.make_future_dataframe(future_days)   

#-----------------------------------------------------------------
dates1 = list(fut['ds'])
first_date = dates1[0] 
first_date = str(first_date)[:10] 
dates2 = list(pd.date_range(end=first_date,periods=past_days))[:-1] 
fut0 = pd.DataFrame({'ds': dates2})
fut  = pd.concat([fut0,fut]) 
fut.index = range(len(fut)) 

In [None]:
f1 = m.predict(fut)
Cols = ['ds','yhat_lower','yhat','yhat_upper']
f2 = f1[Cols].copy()  
ForecastDF = f2.copy() 
ForecastDF.index = ForecastDF['ds']  
yhat_df = ForecastDF[['yhat']].copy() 
max_pred = yhat_df['yhat'].max() 

In [None]:
print('m.seasonality_prior_scale :',m.seasonality_prior_scale)
m.plot(f1)
plt.title('Expected Sales',size=18)
plt.xlabel('Date',size=14);
plt.ylabel('Sales',size=14);
plt.ylim([-0.1,max_pred*1.1]) 
plt.show() 

In [None]:
m.plot_components(f1); 

In [None]:
######################################################################
######################################################################

In [None]:
yhat_df.head()

In [None]:
df_season.head()

In [None]:
x1,y1 = df_season['ds'], df_season['y'] 
x2,y2 = list(yhat_df.index),yhat_df['yhat']

plt.figure(figsize=(11, 6))
plt.plot(x1,y1)
plt.plot(x2,y2) 
plt.show()

In [None]:
fitted_season = pd.DataFrame() 
fitted_season['ds'] = list(yhat_df.index)
fitted_season['season'] = list(yhat_df['yhat'])  
fitted_season.head() 

In [None]:
######################################################################
######################################################################

In [None]:
df_train2 = pd.merge(df_train,fitted_season,how='left',on='ds') 
df_train2.head() 

In [None]:
x,y = df_train2['season'],df_train2['y']
plt.figure(figsize=(9,9))
plt.scatter(x,y)
plt.title('Google Trends Index Over 5 Yrs for "buy lawn mower"',size=20) 
plt.xlabel('Search Index - Google Trends',size=16)
plt.ylabel('Historical Sales',size=16)
plt.show() 

In [None]:
Cor  = pearsonr(y,x)[0]
Rmse = simple_rmse(y,x)  
print(round(Cor ,5))
print(round(Rmse,5)) 

In [None]:
poly_model = PolyFit(3)
poly_model.fit(x,y)
x_line = np.linspace(5,95,500) 
y_line = poly_model.predict(x_line) 

In [None]:
x,y = df_train2['season'],df_train2['y']
plt.figure(figsize=(9,9))
plt.scatter(x,y)
plt.plot(x_line,y_line,color='green') 
plt.title('Google Trends Index Over 5 Yrs for "buy lawn mower"',size=20) 
plt.xlabel('Search Index - Google Trends',size=16)
plt.ylabel('Historical Daily Sales',size=16)
plt.show() 

In [None]:
df_train2.head() 

In [None]:
df_train2['y_pred'] = poly_model.predict(df_train2['season']) 
df_train2['resid'] = df_train2['y'] - df_train2['y_pred']

In [None]:
x, y1, y2 = df_train2['ds'], df_train2['y'], df_train2['y_pred']
plt.figure(figsize=(11, 6))
plt.plot(x,y1)
plt.plot(x,y2)
plt.title('Historical Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Sales',size=16) 
plt.show() 

In [None]:
x, y = df_train2['ds'], df_train2['resid']
plt.figure(figsize=(11, 6))
plt.plot(x,y)
plt.title('Residual Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Residual Sales',size=16) 
plt.show() 

In [None]:
fitted_season2 = fitted_season.copy()
fitted_season2['yhat'] = poly_model.predict(fitted_season2['season']) 

In [None]:
x1, y1 = df_train2['ds'], df_train2['y']
x2, y2 = fitted_season2['ds'], fitted_season2['yhat']

plt.figure(figsize=(11, 6))
plt.plot(x1,y1)
plt.plot(x2,y2)
plt.title('Estimated Historical Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Est. Sales',size=16) 
plt.show() 

In [None]:
plt.figure(figsize=(11, 6))
plt.plot(x1,y1)
plt.plot(x2,y2)
plt.title('Estimated Historical Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Est. Sales',size=16) 
plt.xlim([datetime.datetime(2017,3,1),datetime.datetime(2020,3,1)]) 
plt.show() 

In [None]:
InSampleDF = pd.merge(df_train2,fitted_season2,how='left',on='ds')
actuals, preds = InSampleDF['y'], InSampleDF['yhat']

InSampleCor  = pearsonr(actuals,preds)[0]
InSampleRmse = simple_rmse(actuals,preds) 

print('In-Sample Correlation:',round(InSampleCor ,6)) 
print('In-Sample RMSE Error: ',round(InSampleRmse,6))  

In [None]:
OutSampleDF = pd.merge(df_test,fitted_season2[['ds','yhat']],how='left',on='ds') 
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat'] 

OutSampleCor  = pearsonr(actuals,preds)[0]
OutSampleRmse = simple_rmse(actuals,preds) 

print('Out-Sample Correlation:',round(OutSampleCor ,6)) 
print('Out-Sample RMSE Error: ',round(OutSampleRmse,6))  

In [None]:
# Save these results for subsequent analysis: 
InSampleDF2  = InSampleDF.copy()
OutSampleDF2 = OutSampleDF.copy()

In [None]:
######################################################################
######################################################################

In [None]:
y1,x1 = InSampleDF1['y'] , InSampleDF1['yhat']
y2,x2 = OutSampleDF1['y'], OutSampleDF1['yhat'] 

#-------------------------------------------
fig, ax = plt.subplots(1,2, figsize=(12,5))

ax[0].scatter(x1, y1, c='g')
ax[0].plot(np.linspace(0,30,2),np.linspace(0,30,2))
ax[0].set_title('Training Set Fit', fontsize=16)
ax[0].set_xlabel('Predicted Sales', fontsize=14)
ax[0].set_ylabel('Actual Sales',fontsize=14)

ax[1].scatter(x2, y2, c='b')
ax[1].plot(np.linspace(0,30,2),np.linspace(0,30,2)) 
ax[1].set_title('Testing Set Fit',fontsize=16)
ax[1].set_xlabel('Predicted Sales', fontsize=14)
ax[1].set_ylabel('Actual Sales',fontsize=14) 

fig.suptitle('Method #1', fontsize=20)

plt.subplots_adjust(left   = 0.1,  # the left side of the subplots of the figure
                    right  = 0.9,  # the right side of the subplots of the figure
                    bottom = 0.1,  # the bottom of the subplots of the figure
                    top    = 0.9,  # the top of the subplots of the figure
                    wspace = 0.3,  # the amount of width reserved for space between subplots
                    hspace = 0.3   # the amount of height reserved for space between subplots
                   )  

In [None]:
y1,x1 = InSampleDF2['y'] , InSampleDF2['yhat']
y2,x2 = OutSampleDF2['y'], OutSampleDF2['yhat'] 

#-------------------------------------------
fig, ax = plt.subplots(1,2, figsize=(12,5))

ax[0].scatter(x1, y1, c='g')
ax[0].plot(np.linspace(0,30,2),np.linspace(0,30,2))
ax[0].set_title('Training Set Fit', fontsize=16)
ax[0].set_xlabel('Predicted Sales', fontsize=14)
ax[0].set_ylabel('Actual Sales',fontsize=14)

ax[1].scatter(x2, y2, c='b')
ax[1].plot(np.linspace(0,30,2),np.linspace(0,30,2)) 
ax[1].set_title('Testing Set Fit',fontsize=16)
ax[1].set_xlabel('Predicted Sales', fontsize=14)
ax[1].set_ylabel('Actual Sales',fontsize=14) 

fig.suptitle('Method #2', fontsize=20)

plt.subplots_adjust(left   = 0.1,  # the left side of the subplots of the figure
                    right  = 0.9,  # the right side of the subplots of the figure
                    bottom = 0.1,  # the bottom of the subplots of the figure
                    top    = 0.9,  # the top of the subplots of the figure
                    wspace = 0.3,  # the amount of width reserved for space between subplots
                    hspace = 0.3   # the amount of height reserved for space between subplots
                   )  

In [None]:
######################################################################
######################################################################

In [None]:
RDF = pd.DataFrame() # Results DataFrame
RDF['Date']    = OutSampleDF1['ds']
RDF['Sales']   = OutSampleDF1['y']
RDF['PredsM1'] = OutSampleDF1['yhat']
RDF['PredsM2'] = OutSampleDF2['yhat']

for col in ['Sales','PredsM1','PredsM2']:
    RDF[col+'_C'] = np.cumsum(RDF[col]) 

In [None]:
RDF.head()

In [None]:
plt.figure(figsize=(11, 6))
plt.plot(RDF['Date'],RDF['Sales'],label='Actual Sales')
plt.plot(RDF['Date'],RDF['PredsM1'],label='Method #1')
plt.plot(RDF['Date'],RDF['PredsM2'],label='Method #2')
plt.title('Forecasted Daily Sales',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Sales',size=16) 
plt.legend(loc='upper left')
plt.ylim([0,27])
plt.show() 

In [None]:
plt.figure(figsize=(11, 6))
plt.plot(RDF['Date'],RDF['Sales_C'],label='Actual Sales')
plt.plot(RDF['Date'],RDF['PredsM1_C'],label='Method #1')
plt.plot(RDF['Date'],RDF['PredsM2_C'],label='Method #2')
plt.title('Forecasted Daily Sales - Cumulative',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Sales',size=16) 
plt.legend(loc='upper left')
plt.ylim([0,800]) 
plt.show() 

In [None]:
RDF.tail(1)

In [None]:
FinalSales = list(RDF['Sales_C'])[-1]
FinalPred1 = list(RDF['PredsM1_C'])[-1]
FinalPred2 = list(RDF['PredsM2_C'])[-1]  

ErrorM1 = round(100*((FinalPred1-FinalSales)/FinalSales),3) 
ErrorM2 = round(100*((FinalPred2-FinalSales)/FinalSales),3) 

print('Comparing the Cumulative Results')
print('Method 1 Error =',ErrorM1,'%')
print('Method 2 Error =',ErrorM2,'%')

In [None]:
######################################################################
######################################################################

In [None]:
# [END]