In [None]:
import numpy  as np
import pandas as pd 
from random import uniform
from scipy.stats import pearsonr
from fbprophet import Prophet
import datetime, logging
logging.getLogger().setLevel(logging.CRITICAL)

from matplotlib import pyplot as plt 
%matplotlib inline 

from JModels import *

In [None]:
######################################################################
######################################################################

In [None]:
fn = "DATA/HUNT GOOGLE TRENDS.csv" 
df_hunt = pd.read_csv(fn) 
df_hunt.columns = ['ds','y'] 
df_hunt['ds'] = pd.to_datetime(df_hunt['ds']) 
print(len(df_hunt))  

In [None]:
fn = "DATA/POOL GOOGLE TRENDS.csv" 
df_season = pd.read_csv(fn) 
df_season.columns = ['ds','y'] 
df_season['ds'] = pd.to_datetime(df_season['ds']) 
print(len(df_season))  

In [None]:
fn = "DATA/POOL SALES TRAIN.csv"
df_train = pd.read_csv(fn)
df_train.columns = ['ds','y'] 
df_train['ds'] = pd.to_datetime(df_train['ds']) 
print(len(df_train)) 

In [None]:
fn = "DATA/POOL SALES TEST.csv"
df_test = pd.read_csv(fn)
df_test.columns = ['ds','y'] 
df_test['ds'] = pd.to_datetime(df_test['ds'])  
print(len(df_test)) 

In [None]:
######################################################################
######################################################################

In [None]:
df_hunt.head()

In [None]:
# Create datetime index
# Shift all date values >> by +3 days
# Join to a daily data set
# Interpolate all missing values  

dates1 = df_hunt['ds']
dates2 = dates1 + datetime.timedelta(days=3) #<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
df_hunt['ds'] = dates2
base_dates = pd.DataFrame()
base_dates['ds'] = pd.date_range(start=min(dates2),end=max(dates2)) 
df_hunt = pd.merge(base_dates,df_hunt,how='left',on='ds') 
df_hunt = df_hunt.interpolate()  

In [None]:
df_hunt.head()

In [None]:
TestSetSize = 365*2
df_hunt_train = df_hunt.iloc[:-TestSetSize]      #.head(TotalWeeks-TestSetSize) 
df_hunt_test  = df_hunt.iloc[-TestSetSize:]        #.tail(TestSetSize)  

In [None]:
x, y = df_hunt_train['ds'], df_hunt_train['y']
plt.figure(figsize=(11, 6)) 
plt.plot(x,y)
plt.title('Historical Search Index for "hunting blind"',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Index',size=16)
plt.show() 

In [None]:
past_days   =  0
future_days = 365*2

m = Prophet( 
    seasonality_mode = 'multiplicative'
) 

m.fit(df_hunt_train); 
fut = m.make_future_dataframe(future_days)   

#-----------------------------------------------------------------
dates1 = list(fut['ds'])
first_date = dates1[0] 
first_date = str(first_date)[:10] 
dates2 = list(pd.date_range(end=first_date,periods=past_days))[:-1] 
fut0 = pd.DataFrame({'ds': dates2})
fut  = pd.concat([fut0,fut]) 
fut.index = range(len(fut)) 

In [None]:
f1 = m.predict(fut) 
Cols = ['ds','yhat_lower','yhat','yhat_upper']
f2 = f1[Cols].copy()  
ForecastDF = f2.copy() 
ForecastDF.index = ForecastDF['ds']  
yhat_df = ForecastDF[['yhat']].copy() 
max_pred = yhat_df['yhat'].max() 

In [None]:
print('Total Forecast Size:',len(f2)) 

In [None]:
print('m.seasonality_prior_scale :',m.seasonality_prior_scale)
m.plot(f1)
plt.title('Expected Sales',size=18)
plt.xlabel('Date',size=14);
plt.ylabel('Sales',size=14);
plt.ylim([-0.1,max_pred*1.1]) 
plt.show() 

In [None]:
m.plot_components(f1); 

In [None]:
InSampleDF = pd.merge(df_hunt_train,f1[['ds','yhat']],how='left',on='ds')
actuals, preds = InSampleDF['y'], InSampleDF['yhat']

InSampleCor  = pearsonr(actuals,preds)[0]
InSampleRmse = simple_rmse(actuals,preds) 

print('In-Sample Correlation:',round(InSampleCor ,6)) 
print('In-Sample RMSE Error: ',round(InSampleRmse,6))    

In [None]:
OutSampleDF = pd.merge(df_hunt_test,f1[['ds','yhat']],how='left',on='ds') 
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat']

OutSampleCor  = pearsonr(actuals,preds)[0]
OutSampleRmse = simple_rmse(actuals,preds) 

print('Out-Sample Correlation:',round(OutSampleCor ,6)) 
print('Out-Sample RMSE Error: ',round(OutSampleRmse,6))  

In [None]:
actuals, preds = InSampleDF['y'], InSampleDF['yhat']
x_line = np.linspace(0,65,100)
plt.figure(figsize=(7,7))
plt.scatter(preds,actuals,alpha=0.2) 
plt.plot(x_line,x_line,color='green')
plt.title('Goodness-of-Fit Plot (Training Set)',size=20) 
plt.xlabel('Predicted Index Value',size=16)
plt.ylabel('Actual Index Value',size=16)
plt.show()  

In [None]:
actuals, preds = OutSampleDF['y'], OutSampleDF['yhat']
x_line = np.linspace(0,65,100)
plt.figure(figsize=(7,7))
plt.scatter(preds,actuals,alpha=0.2) 
plt.plot(x_line,x_line,color='green')
plt.title('Goodness-of-Fit Plot (Testing Set)',size=20) 
plt.xlabel('Predicted Index Value',size=16)
plt.ylabel('Actual Index Value',size=16)
plt.show()  

In [None]:
yhat_df.head()

In [None]:
x1, y1 = df_hunt_train['ds'], df_hunt_train['y']
x2, y2 = df_hunt_test['ds'] , df_hunt_test['y'] 
x3, y3 = list(yhat_df.index), list(yhat_df['yhat']) 

plt.figure(figsize=(11, 6)) 
plt.plot(x1,y1,label='Training Set')
plt.plot(x2,y2,label='Testing Set')
plt.plot(x3,y3,label='Prediction')
plt.title('Historical Search Index for "hunting blind"',size=20) 
plt.xlabel('Date',size=16)
plt.ylabel('Index',size=16)
plt.legend(loc='upper left')
plt.xlim(['2014',max(x3)])
plt.show() 

In [None]:
######################################################################
######################################################################

In [None]:
### For student to complete ...... 