# Weighted Forecasting for Time Series Store forecast 
* By Alex Dance https://www.linkedin.com/in/alex-dance/
* This notebook is one of several notebooks for a project to improve store and product forecasts
1.	EDA – Exploratory Data Analysis – includes working with annual forecasts
2.	Main Modelling
3.	XG Boost modelling by Month
4.	Weighted average
5.	ARIMA – Month and Other Modelling
6.	Deep Learning

* This forecast does not use a model but rather weightings and average weightings multiplied by average sales to give a forecast

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

In [None]:
from sklearn.metrics import mean_squared_error , mean_absolute_error

In [None]:
class color:  # Testing to make the heading look a liitle more impressive
   BOLD = '\033[1m'

In [None]:
df = pd.read_csv("../input/demand-forecasting-kernels-only/train.csv")
df.head()

In [None]:
split = "2017-01-01"

In [None]:
df['date'] =  pd.to_datetime(df['date'])

# Function for Error calculations

In [None]:
def calculate_errorb(test_sales,  test_prediction):
    MSE_test = mean_squared_error(y_true=test_sales,  y_pred=test_prediction) # Mean Square Error (MAE)
    MAE_test = mean_absolute_error(y_true=test_sales,  y_pred=test_prediction) # Mean Absolute Error (MAE)
    MAPE = np.mean(np.abs(test_prediction - test_sales  ) **2)  # Mean Absolute Percentage Error (MAPE)
    RMSE  = np.mean(np.sqrt((test_prediction - test_sales) ** 2))    
    return{'MSE_test': MSE_test ,'MAE_test': MAE_test,  'MAPE':MAPE, 'RMSE':RMSE}

# Feature Engineering

In [None]:
split = "2016-12-31"

In [None]:
df['ItemStoreCombined'] = df['item'].map(str) + '-' + df['store'].map(str) 

In [None]:
df.head()

In [None]:
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.weekofyear

In [None]:
df = df.set_index('date')


# Splitting and make Test Train

In [None]:
df.head()

In [None]:
df_train = df[ :split ] 

In [None]:
df_test = df[split : ] 

In [None]:
df_train.head()

In [None]:
df_test_final = df_test.copy()

In [None]:
df_test_final =df_test_final.drop (['dayofweek', 'quarter','month', 'year', 'dayofyear', 'weekofyear'],axis=1)

In [None]:
train_cols=list(df_train.columns)

In [None]:
print(train_cols)

In [None]:
df_train = df_train.loc[:,train_cols] 

In [None]:
test_cols=list(df_test.columns)

In [None]:
df_test = df_test.loc[:,test_cols] 

# Now Working on Training review

In [None]:
df_train['Calculated_year'] =  df_train.index.year - min(df_train.index.year) + 1

In [None]:
df_train.head()

In [None]:
df_train['Calculated_year'].value_counts()

# Now working through the weightings
* This is a solution that a lot of standard forecasts would be completed, often in excel
* This solution is completed to see how it compares
* In summary it shows how weightings of the month and then aply that weightings across a total forecast
* Multiple different weightings can be applied
* With such a high number of rows this would be impractical using Excel

In [None]:
month_weighting= (( df_train.groupby(['month']).agg([np.nanmean]).sales - np.nanmean(df_train.sales) ) / np.nanmean(df_train.sales)).rename(columns={'nanmean':'month_weighting'})
df_train=df_train.join(month_weighting,how='left',on='month')

In [None]:
df_train.tail()

In [None]:
month_weighting.head(13)

In [None]:
year_weighting= (( df_train.groupby(['year']).agg([np.nanmean]).sales - np.nanmean(df_train.sales) ) /  np.nanmean(df_train.sales)).rename(columns={'nanmean':'year_weighting'})

In [None]:
print(year_weighting)

In [None]:
CAGR = 0.096 #only for using on the train data - can be adjusted

In [None]:
year_weighting.loc[6,:] =  np.mean(CAGR)*3
df_train=df_train.join(year_weighting,how='left',on='year')

In [None]:
weekday_weighting= ( ( df_train.groupby(['dayofweek']).agg([np.nanmean]).sales - np.nanmean(df_train.sales) ) /  np.nanmean(df_train.sales)).rename(columns={'nanmean':'weekday_weighting'})
df_train=df_train.join(weekday_weighting,how='left',on='dayofweek')

In [None]:
store_item_weighting= ( ( df_train.groupby(['store','item']).agg([np.nanmean]).sales - np.nanmean(df_train.sales) ) / np.nanmean(df_train.sales)).rename(columns={'nanmean':'store_item_weighting'})
df_train=df_train.join(store_item_weighting,how='left',on=['store','item'])

In [None]:
df_train['product_combined_weighting']=np.product(df_train.loc[:,['month_weighting','year_weighting','weekday_weighting','store_item_weighting',]]+1,axis=1)

In [None]:
df_train.sample()

In [None]:
df_train.tail()

In [None]:
df_train.Calculated_year.nunique()

In [None]:
df_train.Calculated_year.value_counts()

In [None]:
print(weekday_weighting)

In [None]:
print(month_weighting)

In [None]:
print(store_item_weighting)

In [None]:
df_train.head()

In [None]:
df_train['sales_prediction']=np.round(df_train.product_combined_weighting*np.round(np.nanmean(df_train.sales),1))  

In [None]:
average_train_sales = np.nanmean(df_train.sales)
print(average_train_sales)

In [None]:
df_train.head()

# now apply the weightings to the test sales

In [None]:
df_test=df_test.join(month_weighting,how='left',on='month')

In [None]:
df_test['Calculated_year'] =  5

In [None]:
year_weighting_17 =0.22  # calculated seperately

In [None]:
df_test['year_weighting'] = year_weighting_17

In [None]:
df_test=df_test.join(weekday_weighting,how='left',on='dayofweek')

In [None]:
df_test=df_test.join(store_item_weighting,how='left',on=['store','item'])

In [None]:
df_test.head()

In [None]:
df_test['smry_product']=np.product(df_test.loc[:,['month_weighting','year_weighting','weekday_weighting','store_item_weighting',]]+1,axis=1)

In [None]:
df_test['weighted_sales_prediction']=df_test.smry_product*average_train_sales

In [None]:
average_train_sales

In [None]:
df_test.sum()

In [None]:
df_test.head()

In [None]:
RMSE_weighted  = np.mean(np.sqrt((df_test['weighted_sales_prediction'] - df_test['sales']) ** 2)) 
print(RMSE_weighted)