In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import holidays 
from datetime import datetime, date
from sklearn.preprocessing import OneHotEncoder
%matplotlib inline

The purpose of this code is to predict a sales for different items over the period of 90-days. It coincides with the kaggle competition found at https://www.kaggle.com/c/demand-forecasting-kernels-only. I chose to use a Neural net for this project. I was interested to see how it would perform compared to other projects that used ARIMA models.

In [13]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

#used for kernel that was uploaded to Kaggle
#train = pd.read_csv('../input/train.csv')
#test = pd.read_csv('../input/test.csv')

In [14]:
train.head()

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10


In [15]:
train.describe()

Unnamed: 0,store,item,sales
count,913000.0,913000.0,913000.0
mean,5.5,25.5,52.250287
std,2.872283,14.430878,28.801144
min,1.0,1.0,0.0
25%,3.0,13.0,30.0
50%,5.5,25.5,47.0
75%,8.0,38.0,70.0
max,10.0,50.0,231.0


In [16]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 913000 entries, 0 to 912999
Data columns (total 4 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   date    913000 non-null  object
 1   store   913000 non-null  int64 
 2   item    913000 non-null  int64 
 3   sales   913000 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 27.9+ MB


## Neural Net

In order to make a useful neural network, I extracted important information from the date column. I feature engineered columns that deal with day of the week, day in the year, whether a particular day was a holiday or not, and other features. Most features I transformed to be on a scaled from 1 to -1 by putting them through a cosine function. This gave my features a cyclical pattern that would repeat year over year. I felt like this would help my neural net take into account the cyclical nature of sales year to year and month to month.

In [17]:
def createmodeldataframe(mydf):
    mydf["date"] = pd.to_datetime(mydf.date)
    mydf['dayofweek'] = mydf['date'].apply(lambda x:np.cos((2*np.pi*(x.weekday())/7)-.4))
    mydf['month'] = mydf['date'].apply(lambda x: np.cos((2*np.pi*x.month/12)-.1))
    mydf['dayinmonth'] = mydf['date'].apply(lambda x: np.cos((2*np.pi*x.day/30)-.1))
    mydf['dayinyear'] = mydf['date'].apply(lambda x: np.cos((2*np.pi*x.timetuple().tm_yday/365)-.01))
    mydf['year'] = mydf['date'].apply(lambda x: x.year)
    usholidays = holidays.US()
    mydf['US_holiday'] = mydf['date'].apply(lambda x: x in usholidays).astype(int)
    ohe = OneHotEncoder()
    df3 = pd.DataFrame(ohe.fit_transform(mydf[["store","item"]]).toarray())
    lastforecast=pd.concat([mydf,df3],axis=1)
    cleanforecast = lastforecast.drop(['date','store','item'], axis=1)
    return cleanforecast

In [18]:
cleanforecast = createmodeldataframe(train)

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam

In [21]:
X_train = cleanforecast.drop('sales', axis=1)
y_train = cleanforecast['sales']

In [22]:

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)

In [23]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50)

In [24]:
model = Sequential()

model.add(Dense(66,activation='relu'))
model.add(Dense(40,activation='relu'))
model.add(Dropout(.3))
model.add(Dense(30,activation='relu'))
model.add(Dropout(.2))
model.add(Dense(20,activation='relu'))
model.add(Dense(15,activation='relu'))
model.add(Dense(15,activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam',loss='mse')

In [26]:
model.fit(x=X_train,y=y_train.values,
          batch_size=256,epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


<tensorflow.python.keras.callbacks.History at 0x2453c4ca040>

## Submission

In [28]:
testdf = pd.read_csv("test.csv")
testdata = createmodeldataframe(testdf)
testids = testdata.id
testdata.drop('id',axis=1, inplace=True)
testdata = scaler.transform(testdata)
testpreds = model.predict(testdata)

# added a slight change to predictions to get better results. After looking at a graph of 
# predicted vs actual_preds on some of my first attempts at this NN, seemed like a transformation
# would allow predictions to more closely align with actual results seen
testpreds = testpreds**1.07

In [None]:
# Create submission
sub = pd.DataFrame({'id':testids,'sales':testpreds})
sub.to_csv('submission.csv', index=False)
sub.head()