# 35 Benchmark Model Using Rolling Averages

In [2]:
# %pwd
# %cd ..

/Users/jordannieusma/Documents/Documents-Laptop/GitHub


## Import packages and set display

In [57]:
# usual packages
import pandas as pd
import numpy as np
#import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import glob
import time
import pickle
import warnings

import bridgestone as b

In [6]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
warnings.filterwarnings("ignore")

## Load data

In [14]:
# Read-in data
sales = pd.read_csv("final_sales.csv", parse_dates=["DATE"], 
                    usecols=["ARTICLE_ID","DATE","UNITS"], 
                    dtype={"ARTICLE_ID":"category","UNITS":np.float64},
                    low_memory=False)

# Sort
sales.sort_values(by=["DATE","ARTICLE_ID"], inplace=True)

# Group the rows by ARTICLE_ID and month of DATE, and sum the UNITS values
temp = sales.groupby(['ARTICLE_ID', pd.Grouper(key='DATE', freq='M')])['UNITS'].sum().reset_index()
temp["DATE"] = pd.to_datetime(temp["DATE"])

# Create month and year columns
temp["month"] = temp["DATE"].dt.month
temp["year"] = temp["DATE"].dt.year
temp = temp.set_index("DATE")
totalMo = temp

In [16]:
print("Shape totalMo: ", totalMo.shape)
print("Validate that unique article ids = 435: ", len(totalMo["ARTICLE_ID"].unique()))

Shape totalMo:  (18705, 4)
Validate that unique article ids = 435:  435


### Store Aug thru Oct 2018 as separate variables

It is useful to set these as variables and have them handy later for comparisons. 

In [58]:
july2018 = totalMo.loc['2018-07']
aug2018 = totalMo.loc['2018-08']
sept2018 = totalMo.loc['2018-09']
oct2018 = totalMo.loc['2018-10']

print('Shapes should be 435 x 4: ', july2018.shape, aug2018.shape, sept2018.shape, oct2018.shape)

Shapes should be 435 x 4:  (435, 4) (435, 4) (435, 4) (435, 4)


## Benchmark 1: Predict Using Avg Last Two Months

In [45]:
# Get the average sales from Aug-Sept 2018
temp = pd.merge(aug2018, sept2018, on='ARTICLE_ID').drop(['month_y','year_y'], axis=1)
temp['AugSept2018Avg'] = (temp['UNITS_x'] + temp['UNITS_y'])/2
avg_last_two_months = temp.drop(['UNITS_x','month_x','year_x','UNITS_y'], axis=1)
avg_last_two_months

Unnamed: 0,ARTICLE_ID,AugSept2018Avg
0,106242,558.0
1,106259,683.5
2,106310,779.0
3,106497,1739.0
4,106650,709.5
...,...,...
430,98031,365.5
431,98048,827.5
432,98065,2638.5
433,98099,2876.0


In [76]:
# merge predictions and actuals
temp = pd.merge(avg_last_two_months, oct2018, on='ARTICLE_ID').rename(columns={'AugSept2018Avg':'Prediction','UNITS':'Actual'}).drop(['month','year'], axis=1)
temp['AbsDiff'] = abs(temp['Actual']-temp['Prediction'])

# get WMAPE
result_last_two_months = b.get_wmape(temp, actuals='Actual', predictions='Prediction')
print("WMAPE using last two months: ", result_last_two_months)

WMAPE using last two months:  0.1187478107079956


## Benchmark 2: Predict Using Avg Last Three Months

In [73]:
# Get the average sales from Aug-Sept 2018
temp = pd.merge(july2018, aug2018, on='ARTICLE_ID').drop(['month_y','year_y'], axis=1).merge(sept2018, on='ARTICLE_ID').drop(['month_x','year_x'], axis=1)
temp['JulyAugSept2018Avg'] = round(((temp['UNITS_x'] + temp['UNITS_y'] + temp['UNITS'])/3), 0)
avg_last_three_months = temp.drop(['UNITS_x','UNITS_y','UNITS','month','year'], axis=1)

# Check top 5 rows
avg_last_three_months.head()

Unnamed: 0,ARTICLE_ID,UNITS_x,UNITS_y,UNITS,month,year,JulyAugSept2018Avg
0,106242,431.0,552.0,564.0,9,2018,516.0
1,106259,709.0,722.0,645.0,9,2018,692.0
2,106310,1059.0,1024.0,534.0,9,2018,872.0
3,106497,1457.0,1713.0,1765.0,9,2018,1645.0
4,106650,555.0,676.0,743.0,9,2018,658.0
...,...,...,...,...,...,...,...
430,98031,408.0,410.0,321.0,9,2018,380.0
431,98048,979.0,922.0,733.0,9,2018,878.0
432,98065,2457.0,2799.0,2478.0,9,2018,2578.0
433,98099,2986.0,2823.0,2929.0,9,2018,2913.0


In [78]:
# Merge predictions and actuals
temp = pd.merge(avg_last_three_months, oct2018, on='ARTICLE_ID').rename(columns={'JulyAugSept2018Avg':'Prediction','UNITS':'Actual'}).drop(['month','year'], axis=1)
temp['AbsDiff'] = abs(temp['Actual']-temp['Prediction'])

# get WMAPE
result_last_three_months = b.get_wmape(temp, actuals='Actual', predictions='Prediction')
print("WMAPE using last three months: ", result_last_three_months)

WMAPE using last three months:  0.13184375742522114


Since the WMAPE score using the average of the last two months of sales against October 2018 is lower than using the average last three months of sales, we will use the former as our model benchmark.