# Import packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import datetime
import holidays
import warnings
warnings.filterwarnings('ignore')

# Set Country holidays
us_holidays = holidays.US()

# Load Data

In [2]:
#File path to the csv file
csv_file = "<path to>\ForecastingData.csv"

# Read csv file into dataframe
df = pd.read_csv(csv_file)

# Print first 5 rows in the dataframe
df.head()

Unnamed: 0,ID1,ID2,Time,Value,RDPI,Year,Month,WeekOfMonth,WeekOfYear,IsUsNewYearsDay,...,Lag17,Lag18,Lag19,Lag20,Lag21,Lag22,Lag23,Lag24,Lag25,Lag26
0,1,2,12/7/2019 9:00:04 AM,5.123964,11753.2,2019,12,1,49,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2,11/30/2019 9:00:04 AM,4.836282,11753.2,2019,11,5,48,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,2,11/23/2019 9:00:04 AM,5.337538,11753.2,2019,11,4,47,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,2,11/16/2019 9:00:04 AM,5.347108,11753.2,2019,11,3,46,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,2,11/9/2019 9:00:04 AM,5.075174,11696.6,2019,11,2,45,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Filter by the store and item IDs to predict

In [3]:
#Ids to Predict
ID1 = 2
ID2 = 1
#filter list to ids we want to predict
df = df.loc[(df['ID1'] == ID1) & (df['ID2'] == ID2)]

df.head()

Unnamed: 0,ID1,ID2,Time,Value,RDPI,Year,Month,WeekOfMonth,WeekOfYear,IsUsNewYearsDay,...,Lag17,Lag18,Lag19,Lag20,Lag21,Lag22,Lag23,Lag24,Lag25,Lag26
163,2,1,12/7/2019 9:00:04 AM,5.214936,11753.2,2019,12,1,49,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
164,2,1,11/30/2019 9:00:04 AM,5.278115,11753.2,2019,11,5,48,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
165,2,1,11/23/2019 9:00:04 AM,4.882802,11753.2,2019,11,4,47,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
166,2,1,11/16/2019 9:00:04 AM,4.718499,11753.2,2019,11,3,46,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
167,2,1,11/9/2019 9:00:04 AM,4.912655,11696.6,2019,11,2,45,False,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Create future 4 weeks for predictions

In [4]:
#format time columns
#sort df by time
df['Time']=pd.to_datetime(df['Time'].astype(str))
df = df.sort_values(by='Time')

In [5]:
#get last date from df

lastdate  = df['Time'].iloc[-1].date()
print(lastdate)

2019-12-07


In [6]:
#add next four week dates
#loop thru range of numbers by 7 up to 34 skip by 7
for days in range(7,34,7):
    #append new date to existing df
    df = df.append({'ID1' : ID1, 
                    'ID2' : ID2, 
                    'Time':(lastdate + datetime.timedelta(days=days)), 
                    'Value':0,
                    'RDPI': df['RDPI'].iloc[-1],
                    'IsUsNewYearsDay': False,
                    'IsUsLaborDay': False,
                    'IsBlackFriday': False,
                    'IsChristmasDay': False
                   } , ignore_index=True)

# Add Lag features for prev 26 weeks

In [7]:
#add lag features
#grab the prev 26 weeks and add to current row
for index, item in df.iterrows():
    for shift in range(1,27):
        colName = 'Lag' + str(shift)
        df[colName] = df['Value'].shift(shift)

In [8]:
predictionDf = df.tail(4).reset_index()
predictionDf = predictionDf.drop(columns='index')
predictionDf

Unnamed: 0,ID1,ID2,Time,Value,RDPI,Year,Month,WeekOfMonth,WeekOfYear,IsUsNewYearsDay,...,Lag17,Lag18,Lag19,Lag20,Lag21,Lag22,Lag23,Lag24,Lag25,Lag26
0,2,1,2019-12-14,0.0,11753.2,,,,,False,...,4.682131,5.062595,5.075174,4.804021,4.736198,4.955827,4.997212,4.912655,5.575949,5.214936
1,2,1,2019-12-21,0.0,11753.2,,,,,False,...,5.036953,4.682131,5.062595,5.075174,4.804021,4.736198,4.955827,4.997212,4.912655,5.575949
2,2,1,2019-12-28,0.0,11753.2,,,,,False,...,4.983607,5.036953,4.682131,5.062595,5.075174,4.804021,4.736198,4.955827,4.997212,4.912655
3,2,1,2020-01-04,0.0,11753.2,,,,,False,...,4.89784,4.983607,5.036953,4.682131,5.062595,5.075174,4.804021,4.736198,4.955827,4.997212


# Create Time Features

In [9]:
predictionDf['Year'] = pd.to_datetime(predictionDf['Time']).dt.year
predictionDf['Month'] = pd.to_datetime(predictionDf['Time']).dt.month
predictionDf['WeekOfMonth'] = pd.to_datetime(predictionDf['Time']).dt.day.apply(lambda day: math.ceil(int(day)/7))
predictionDf['WeekOfYear'] = pd.to_datetime(predictionDf['Time']).dt.week

In [10]:
predictionDf[['Time','Year','WeekOfMonth','WeekOfYear']]

Unnamed: 0,Time,Year,WeekOfMonth,WeekOfYear
0,2019-12-14,2019,2,50
1,2019-12-21,2019,3,51
2,2019-12-28,2019,4,52
3,2020-01-04,2020,1,1


# Create Holiday columns Values

In [11]:
def createHolidayFeatures(index):
    for days in range(0,7):
        newDate= predictionDf['Time'][index] + datetime.timedelta(days=days)
        holiday = us_holidays.get(newDate);
        if(holiday == "Thanksgiving"):
            predictionDf['IsBlackFriday'][index] = 1
        elif holiday == "Labor Day":
            predictionDf['IsUsLaborDay'][index] = 1
        elif holiday == "Christmas Day":
            predictionDf['IsChristmasDay'][index] = 1
        elif holiday == "New Year's Day":
            predictionDf['IsUsNewYearsDay'][index] = 1

In [12]:
for index in range(len(predictionDf)):
    createHolidayFeatures(index)

In [13]:
#print holidays
predictionDf[['Time','IsBlackFriday','IsUsLaborDay','IsChristmasDay','IsUsNewYearsDay']]

Unnamed: 0,Time,IsBlackFriday,IsUsLaborDay,IsChristmasDay,IsUsNewYearsDay
0,2019-12-14,False,False,False,False
1,2019-12-21,False,False,True,False
2,2019-12-28,False,False,False,True
3,2020-01-04,False,False,False,False


# Create Fourier Features

In [14]:
def createFourierFeatures(index, weekofyear):
    #seasonality number weeks in year
    seasonality = 52
    #take index of data and mod seasonality. This gets the 0 to 51 weeks in a year
    #calcWeekOfYear = index % seasonality
    for s in range(1,5):
        cosColName = ('FreqCos' + str(s))
        sinColName = ('FreqSin' + str(s))
        #fix this its taking too long
        predictionDf[cosColName].loc[index] = math.cos(weekofyear*2*math.pi*s/seasonality)
        predictionDf[sinColName].loc[index] = math.sin(weekofyear*2*math.pi*s/seasonality)

In [15]:
for index, weekofyear in predictionDf['WeekOfYear'].iteritems():
    createFourierFeatures(index,weekofyear)

In [16]:
predictionDf[['FreqCos1', 'FreqSin1', 'FreqCos2', 'FreqSin2', 'FreqCos3', 'FreqSin3', 'FreqCos4', 'FreqSin4']]

Unnamed: 0,FreqCos1,FreqSin1,FreqCos2,FreqSin2,FreqCos3,FreqSin3,FreqCos4,FreqSin4
0,0.970942,-0.2393157,0.885456,-0.4647232,0.748511,-0.6631227,0.568065,-0.8229839
1,0.992709,-0.1205367,0.970942,-0.2393157,0.935016,-0.3546049,0.885456,-0.4647232
2,1.0,6.432491e-16,1.0,1.286498e-15,1.0,-7.347881e-16,1.0,2.572996e-15
3,0.992709,0.1205367,0.970942,0.2393157,0.935016,0.3546049,0.885456,0.4647232


In [17]:
#Optional: graph fourier features
#sns.pointplot(x = 'Value', y = 'FreqCos1', data = df)
#sns.pointplot(x = 'Value', y = 'FreqSin1', data = df)

In [18]:
#visualize the columns included in the df for debugging purposes
#list(df.columns) 

# Filter Data and Create Dictionary for API Post

In [21]:
# Create Dictionary
predictionDf['Time'] = predictionDf['Time'].astype(str)
pre_dict = predictionDf.T.to_dict('dict')
#print(pre_dict)

# Create function to get prediction from API

In [27]:

import urllib.request
import json
import os
import ssl

def getPrediction(index):

    #construct data object for post
    data = {
            "Inputs": {"input0":
                          [
                              pre_dict[index]
                          ],
                    },
                    "GlobalParameters":  {
                    }
            }
    #print(data)
    body = str.encode(json.dumps(data))
    
    url = '<endpoint>'
    api_key = '<key>' # Replace this with the API key for the web service
    headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}
    
    req = urllib.request.Request(url, body, headers)
    
    try:
        response = urllib.request.urlopen(req)
        result = response.read()
        return json.loads(result)
    except urllib.error.HTTPError as error:
        print(f'The request failed with status code: {str(error.code)} Error Info {error.info()}')
        print(json.loads(error.read().decode("utf8", 'ignore')))

# Get predictions

In [25]:

for index in range(4): 
    predictionResult = getPrediction(index)
    #print(predictionResult)
    prediction = math.ceil(predictionResult['Results']['WebServiceOutput0'][0]['Forecast'])
    storeId = math.ceil(predictionResult['Results']['WebServiceOutput0'][0]['ID1'])
    itemId = math.ceil(predictionResult['Results']['WebServiceOutput0'][0]['ID2'])
    time = predictionDf['Time'][index]
    print(f'For week {time} item {itemId} the prediction is {prediction} items in store {storeId}')

For week 2019-12-14 item 1 the prediction is 134 items in store 2
For week 2019-12-21 item 1 the prediction is 131 items in store 2
For week 2019-12-28 item 1 the prediction is 143 items in store 2
For week 2020-01-04 item 1 the prediction is 132 items in store 2
