# Time Series / Forecasts: From the basic solution to the complex – daily and monthly- by store and by product



# By Alex Dance 
* https://www.linkedin.com/in/alex-dance/
* This notebook is one of several notebooks for a project to improve store forecasts
1.	EDA – Exploratory Data Analysis – includes working with annual forecasts
2.	Main Modelling
3.	XG Boost modelling by Month
4.	Weighted average
5.	ARIMA – Month and Other Modelling
6.	Deep Learning


* Data is from the Kaggle Dataset https://www.kaggle.com/c/demand-forecasting-kernels-only/overview


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns

In [None]:
df = pd.read_csv("../input/demand-forecasting-kernels-only/train.csv")

In [None]:
#df = pd.read_csv(r'C:/Users/alexd/Alex Folder 1/Capstone - Store Forecast/train.csv')  
df.head()

In [None]:
df.shape

# EDA

In [None]:
df.shape

In [None]:
df.nunique()

In [None]:
df['sales'].sum()

# Other Data Files - That were not used

In [None]:
df.max()

# Manipulation

In [None]:
df['date'] =  pd.to_datetime(df['date'])

In [None]:
df = df.set_index('date')

In [None]:
df.head()

In [None]:
df_sales_only = df.drop(['store','item'], axis = 1)

In [None]:
df['day'] = df.index.day
df['month'] = df.index.month
df['year'] = df.index.year
df['dayofweek'] = df.index.dayofweek


In [None]:
print(df)

In [None]:
sns.boxplot(x="dayofweek", y="sales", data=df)

# Working With individual products and individual stores

In [None]:
df.groupby(['store','item']).size()

In [None]:
df[(df.store==1) & (df.item==1)]

In [None]:
df[(df.store==1) & (df.item==1)]['sales'].plot()

In [None]:
df[(df.store==1) & (df.item==1)]['sales']

In [None]:
df_1_1 = df[(df.store==1) & (df.item==1)]['sales']
split = "2017-01-01"

In [None]:
df_i1_s1 = df[(df.store==1) & (df.item==1)]['sales']
df_i1_s2 = df[(df.store==2) & (df.item==1)]['sales']
df_i2_s1 = df[(df.store==1) & (df.item==2)]['sales']
df_i2_s2 = df[(df.store==2) & (df.item==2)]['sales']

In [None]:
df['sales'].resample('W').sum().plot()

In [None]:
df['sales'].resample('M').sum().plot()

In [None]:
df_i1_s1.to_frame()
df_i1_s2.to_frame()
df_i2_s1.to_frame()
df_i2_s1.to_frame()

In [None]:
split1 = "2013-12-31"

In [None]:
df_i1_s1_a = df_i1_s1[:split1] 
df_i1_s2_a = df_i1_s2[:split1] 
df_i2_s1_a = df_i2_s1[:split1] 
df_i2_s2_a = df_i2_s2[:split1] 


In [None]:
df_i1_s1_a.resample('W').sum().plot()

In [None]:
df_i1_s2_a.head(10)

In [None]:
df_i1_s2_a.resample('D').sum().plot()

In [None]:
df_i2_s1_a.head()

In [None]:
df_i2_s2_a.resample('W').sum().plot()

In [None]:
df_i2_s1_a.resample('W').sum().plot()

In [None]:
df_1_1.to_frame()

In [None]:
df_1_1 = df_1_1.to_frame()

In [None]:
df_xg_1_1= df_1_1.copy()  # it was here

In [None]:
df_1_1.head()

# Exploring Shifting

In [None]:
df_1_1['sales-1'] = df_1_1['sales'].shift(1)

In [None]:
df_1_1['sales+2'] = df_1_1['sales'].shift(-2)

In [None]:
df_1_1.head()

In [None]:
df_1_1['sales-2'] = df_1_1['sales'].shift(2)

In [None]:
df_1_1.head()

In [None]:
df_1_1 = df_1_1.dropna()

In [None]:
df_1_1.head()

In [None]:
df_1_1.tail()

In [None]:
df_1_1['sales+1'] = df_1_1['sales'].shift(-1)

In [None]:
df_1_1.head()

In [None]:
df_1_1.tail()

# Looking at the Data

In [None]:
agg_month_item = pd.pivot_table(df, index='month', columns='item', values='sales', aggfunc=np.sum).values

In [None]:
print(agg_month_item)

In [None]:
df["2017-01-03" : "2017-01-20"].sales.sum()

In [None]:
df.dtypes

In [None]:
df["2017-01"]

# Plotting 1

In [None]:
df.sales.resample('M').sum().plot()

In [None]:
df["2017-01-01" : "2017-12-31"].sales.resample('W').sum().plot()

In [None]:
df["2016-01-01" : "2016-12-31"].sales.resample('W').sum().plot()

In [None]:
df["2017-01-03" : "2017-12-31"].sales.resample('W').sum().plot(kind="hist")

In [None]:
df.sales.resample('B').sum().plot()

In [None]:
df["2017-01-01" : "2017-01-31"].sales.resample('d').sum().plot()  

# ACF FROM HERE

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
Jan2017 = df["2017-01-01" : "2017-01-31"].sales.resample('d').sum()  

In [None]:
plot_acf(Jan2017)

In [None]:
plt.plot(Jan2017)

In [None]:
T2017 = df["2017-01-01" : "2017-12-31"].sales.resample('m').sum() 

In [None]:
#plot_acf(T2017)

In [None]:
df_1_1_Jan_17 = df_1_1["2017-01-01" : "2017-01-31"]

In [None]:
df_1_1_Jan_17 = df_1_1_Jan_17['sales']

In [None]:
Byyear = df["2013-01-01" : "2017-12-31"].sales.resample('y').sum() 

In [None]:
plot_acf(Byyear)

# Graph 2

In [None]:
sns.pointplot(x=df['year'], y=df['sales'])

# More Working With Data

In [None]:
df['Year'] = df.index.year
df['Month'] = df.index.month
#df['Weekday Name'] = df.weekday_name
df.sample(5, random_state=0)

ax = df.loc['2017', 'sales'].plot()

In [None]:
sns.boxplot(data=df, x='Year', y='sales');

In [None]:
sns.boxplot(data=df, x='Month', y='sales');

In [None]:
sns.boxplot(data=df, x='store', y='sales');

In [None]:
sns.boxplot(data=df, x='item', y='sales');

# Working with data

In [None]:
daybyweek = df.groupby(['dayofweek']).agg({'sales':'sum'})
print(daybyweek)

In [None]:
day = df.groupby(['day']).agg({'sales':'sum'})
print(day)

In [None]:
day = df.groupby(['item','store' ]).agg({'sales':'sum'})
print(day)

In [None]:
SalesByDay = df.groupby(['date']).agg({'sales':'sum'})

In [None]:
print (SalesByDay)

In [None]:
SalesByDay.plot()

In [None]:
salesstore = df.groupby(['date','store']).agg({'sales' : 'sum'})

In [None]:
sales_by_year = pd.pivot_table(df, index='year', values='sales', aggfunc=np.sum)
print(sales_by_year)

# By ITEM

In [None]:
df[df.item == 1]['sales'].plot()

In [None]:
df[df.item == 2]['sales'].plot()

In [None]:
df[df.item == 20]['sales'].plot()

In [None]:
storetotal = df.groupby(['store']).agg({'sales':'sum'})

In [None]:
print(sum(storetotal['sales']))

In [None]:
print(storetotal)

In [None]:
itemtotal = df.groupby(['item']).agg({'sales':'sum'})

In [None]:
print(sum(itemtotal['sales']))

In [None]:
store1 = df[df.store == 1]['sales']
store1.plot()

In [None]:
store5 = df[df.store == 5]['sales']
store5.plot(color='green')

In [None]:
df['sales'].plot(linewidth=0.5);

# Working with Totals and Averages


In [None]:
grand_avg = df.sales.mean()

In [None]:
store_item_table = pd.pivot_table(df, index='store', columns='item', values='sales', aggfunc=np.mean)
display(store_item_table)

In [None]:
month_table = pd.pivot_table(df, index='month', values='sales', aggfunc=np.mean)
month_table.sales /= grand_avg

In [None]:
print(month_table)

In [None]:
dow_table = pd.pivot_table(df, index='dayofweek', values='sales', aggfunc=np.mean)
dow_table.sales /= grand_avg
print(dow_table)

In [None]:
year_table = pd.pivot_table(df, index='year', values='sales', aggfunc=np.mean)
year_table /= grand_avg

In [None]:
print(year_table)

In [None]:
year_table.info()

# Working with year

In [None]:
#year_table.drop([2017])

In [None]:
years = np.arange(2013, 2019)
annual_sales_avg = year_table.values.squeeze()

In [None]:
print(annual_sales_avg)

In [None]:
print(year_table)

In [None]:
p1 = np.poly1d(np.polyfit(years[:-1], annual_sales_avg, 1))
p2 = np.poly1d(np.polyfit(years[:-1], annual_sales_avg, 2))
p3 = np.poly1d(np.polyfit(years[:-1], annual_sales_avg, 3))

In [None]:
plt.figure(figsize=(8,6))
plt.plot(years[:-1], annual_sales_avg, 'ko')
plt.plot(years, p1(years), 'C0-', color ='red')
plt.plot(years, p2(years), 'C1-',color ='blue')
plt.plot(years, p3(years), 'C2-',color ='green')
plt.xlim(2012.5, 2018.5)
plt.title("Relative Sales by Year")
plt.ylabel("Relative Sales")
plt.xlabel("Year")
plt.show()


In [None]:
print(f"2017 Relative Sales by Degree-1 (Linear) Fit = {p1(2017):.4f}")
print(f"2017 Relative Sales by Degree-2 (Quadratic) Fit = {p2(2017):.4f}")
print(f"2017 Relative Sales by Degree-3 (3 degrees) Fit = {p3(2017):.4f}")