In [24]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing, Holt
from sklearn.metrics import mean_squared_error
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime, timedelta
import numpy as np
import math
import warnings
import csv

warnings.filterwarnings('ignore')

df = pd.read_csv('Sales Final .csv')
df = df.drop(['Item Code', 'Item Desc', 'Total'], axis=1)
unique_items_list = df['Category'].unique().tolist()
print(len(unique_items_list))
print(unique_items_list)
data_frames_dict = dict(tuple(df.groupby('Category')))
for item in unique_items_list:
    print(item, len(data_frames_dict[item]), sep="-->")



9
['Fries', 'Sausages', 'Entries', 'Apetizers', 'Desserts', 'Sandwichs', 'Pizzas', 'Tapas', 'Hamburgers']
Fries-->416
Sausages-->304
Entries-->307
Apetizers-->416
Desserts-->102
Sandwichs-->446
Pizzas-->1098
Tapas-->362
Hamburgers-->780


In [25]:
#Create sales list
sales_list = df['Quantity'].tolist()

# Create date list
date_list = df['Date'].tolist()

#Create category list
category_list = df['Category'].tolist()

# Create an empty dataframe
df1 = pd.DataFrame()

# Convert that column into a datetime datatype
df1['Date'] = pd.to_datetime(df['Date'])
# Create a column from the numeric sales variable
df1['Quantity'] = sales_list
#Create a column for category
df1['Category'] = category_list

df = df1
df['Month'] = df['Date'].dt.month 
df['Year'] = df['Date'].dt.year
df = df.set_index('Date')

df.head(5)

Unnamed: 0_level_0,Quantity,Category,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-01,51,Fries,1,2015
2015-01-01,19,Fries,1,2015
2015-01-01,60,Fries,1,2015
2015-01-01,48,Fries,1,2015
2015-01-01,15,Fries,1,2015


In [None]:
for item in unique_items_list:
    # PLOT the actual sales (by months) from the data
    
    months = ['2015-01', '2015-02', '2015-03', '2015-04', '2015-05', '2015-06', '2015-07', '2015-08', '2015-09', '2015-10', 
         '2015-11', '2015-12',
         '2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06', '2016-07', '2016-08', '2016-09', '2016-10', 
         '2016-11', '2016-12',
         '2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', 
         '2017-11', '2017-12', 
         '2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06', '2018-07', '2018-08', '2018-09', '2018-10', 
         '2018-11', '2018-12', 
         '2019-01', '2019-02', '2019-03', '2019-04']
    
    sums=[]
    categoryList = data_frames_dict[item]
    categoryList['Datetime'] = pd.to_datetime(categoryList['Date'])
    categoryList = categoryList.set_index('Datetime')
    for month in months:
        sums.append(categoryList[month]['Quantity'].sum())
    categoryList.drop_duplicates(subset='Date', keep='first', inplace=True)
    categoryList['Sales'] = sums
    categoryList = categoryList.drop(['Date','Category', 'Quantity'], axis=1)
    categoryList.reset_index()
    # print(item)
    
    # TRAINING and TESTING
    size = int(len(categoryList['Sales']) * 0.78)
    train, test = categoryList['Sales'][0:size], categoryList['Sales'][size:len(categoryList['Sales'])]
    
    # CALCULATING RMSE
    def measure_rmse(actual, predicted):
        return math.sqrt(mean_squared_error(actual, predicted))
    
    #SIMPLE EXPONENTIAL SMOOTHING (SES)
    print("SIMPLE EXPONENTIAL SMOOTHING METHOD")
    fit1 = SimpleExpSmoothing(train).fit(smoothing_level=0.2,optimized=False)
    fcast1 = fit1.forecast(12).rename(r'$\alpha=0.2$')
    print(fcast1)
    # plot fit1 with alpha = 0.2
    fcast1.plot(marker='o', color='blue', legend=True)
    fit1.fittedvalues.plot(marker='o',  color='blue')

    
    fit2 = SimpleExpSmoothing(train).fit(smoothing_level=0.6,optimized=False)
    fcast2 = fit2.forecast(12).rename(r'$\alpha=0.6$')
    # plot fit2 with alpha = 0.6
    fcast2.plot(marker='o', color='red', legend=True)
    fit2.fittedvalues.plot(marker='o', color='red')


    fit3 = SimpleExpSmoothing(train).fit()
    fcast3 = fit3.forecast(12).rename(r'$\alpha=%s$'%fit3.model.params['smoothing_level'])
    # plot fit3 with auto optimization value
    fcast3.plot(marker='o', color='green', legend=True)
    fit3.fittedvalues.plot(marker='o', color='green')
    #plot the sales
    categoryList.plot(marker='o', color='black', legend=True)

    plt.show()
    # list of RMSE
    
    # Print RMSE 
    print("The RMSE for SIMPLE EXPONENTIAL SMOOTHING with alpha = 0.2 is", '%.2f'%measure_rmse(test, fcast1))
    print("The RMSE for SIMPLE EXPONENTIAL SMOOTHING with alpha = 0.6 is", '%.2f'%measure_rmse(test, fcast2))
    print("The RMSE for SIMPLE EXPONENTIAL SMOOTHING with auto optimization is", '%.2f'%measure_rmse(test, fcast3))
    print('\n')
    
    
    
    # Holt's METHOD
    print("Holt’s METHOD")
    
    fit1 = Holt(train).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False)
    fcast1 = fit1.forecast(12).rename("Holt's linear trend")

    fit2 = Holt(train, exponential=True).fit(smoothing_level=0.8, smoothing_slope=0.2, optimized=False)
    fcast2 = fit2.forecast(12).rename("Exponential trend")

    fit3 = Holt(train, damped=True).fit(smoothing_level=0.8, smoothing_slope=0.2)
    fcast3 = fit3.forecast(12).rename("Additive damped trend")


    fit1.fittedvalues.plot(marker="o", color='blue')
    fcast1.plot(color='blue', marker="o", legend=True)
    fit2.fittedvalues.plot(marker="o", color='red')
    fcast2.plot(color='red', marker="o", legend=True)
    fit3.fittedvalues.plot(marker="o", color='green')
    fcast3.plot(color='green', marker="o", legend=True)
    categoryList.plot(marker='o', color='black', legend=True)

    plt.show()
    
    # Print RMSE 
    print("The RMSE for Holt’s METHOD with explicitly given smoothing parameters alpha = 0.8 and beta = 0.2 is", '%.2f'%measure_rmse(test, fcast1))
    print("The RMSE for Holt’s METHOD with exponential model rather than a Holt’s additive model", '%.2f'%measure_rmse(test, fcast2))
    print("The RMSE for Holt’s METHOD with damped version of the Holt’s additive model but allow the dampening parameter ϕ to be optimized while fixing the values for α=0.8, β*=0.2.", '%.2f'%measure_rmse(test, fcast3))
    print('\n')
  
    # Save to csv file
#     with open ('HoltMethodRMSE.csv', 'a') as f:
#         writer = csv.writer(f)
#         if (item == 'Fries'):
#             f.write("Month, RMSE1, RMSE2, RMSE3" + "\n")
#         line = str(item) + "," + str('%.2f'%measure_rmse(test, fcast1)) + "," + str('%.2f'%measure_rmse(test, fcast2)) + "," + str('%.2f'%measure_rmse(test, fcast3)) + "\n"
#         f.write(line)
    
    
    # Holt-Winters’ METHOD
    print("Holt-Winters’ METHOD")
    fit1 = ExponentialSmoothing(train, seasonal_periods=4, trend='add', seasonal='add').fit(use_boxcox=True)
    fit2 = ExponentialSmoothing(train, seasonal_periods=4, trend='add', seasonal='mul').fit(use_boxcox=True)
    fit3 = ExponentialSmoothing(train, seasonal_periods=4, trend='add', seasonal='add', damped=True).fit(use_boxcox=True)
    fit4 = ExponentialSmoothing(train, seasonal_periods=4, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True)
    fit1.fittedvalues.plot(style='--', color='red')
    fit2.fittedvalues.plot(style='--', color='green')
    fit3.fittedvalues.plot(style='--', color='purple')
    fit4.fittedvalues.plot(style='--', color='blue')
    
    fcast1 = fit1.forecast(12).rename('add trend, add seasonal')
    fcast1.plot(style='--', color='red')
    fcast2 = fit2.forecast(12).rename('add trend, mul seasonal')
    fcast2.plot(style='--', color='green')
    fcast3 = fit3.forecast(12).rename('add damped trend, add seasonal')
    fcast3.plot(style='--', color='purple')
    fcast4 = fit4.forecast(12).rename('add damped trend, mul seasonal')
    fcast4.plot(style='--', color='blue')
    
    
    categoryList.plot(marker='o', color='black', legend=True)
    plt.show()
    
    print("The RMSE for Holt-Winters’ METHOD with additive trend, additive seasonal of period season_length=4 and a Box-Cox transformation is", measure_rmse(test, fcast1))
    print("The RMSE for Holt-Winters’ METHOD with additive trend, multiplicative seasonal of period season_length=4 and a Box-Cox transformation", measure_rmse(test, fcast2))
    print("The RMSE for Holt-Winters’ METHOD with additive damped trend, additive seasonal of period season_length=4 and a Box-Cox transformation", measure_rmse(test, fcast3))
    print("The RMSE for Holt-Winters’ METHOD with additive damped trend, multiplicative seasonal of period season_length=4 and a Box-Cox transformation", measure_rmse(test, fcast4))
    
    # Save to csv file
#     with open ('Holt-WinterMethodRMSE.csv', 'a') as f:
#         writer = csv.writer(f)
#         if (item == 'Fries'):
#             f.write("Month, RMSE1, RMSE2, RMSE3, RMSE4" + "\n")
#         line = str(item) + "," + str('%.2f'%measure_rmse(test, fcast1)) + "," + str('%.2f'%measure_rmse(test, fcast2)) + "," + str('%.2f'%measure_rmse(test, fcast3)) + "," + str('%.2f'%measure_rmse(test, fcast4)) + "\n"
#         f.write(line)
    # Save to csv file
#     with open (str(item) +'.HoltWinterMethod.csv', 'w') as f:
#         writer = csv.writer(f)
#         f.write("Month, Actual_sales, Forecast_sales_1, Forecast_sales_2, Forecast_sales_3, Forecast_sales_4" + "\n")
#         for row in range (12):
#             line = str(months[40+row]) + "," + str(test[row]) + "," + str(fcast1[row]) + "," + str(fcast2[row]) + "," + str(fcast3[row]) + "," + str(fcast4[row]) + "\n"
#             f.write(line)