# Neural Network in TensorFlow
Now we try a simple Multilayer Perceptron Network; i.e. a simple Neural Network

We will reconsider the data features prepared in a previous notebook: 3_XGBoost

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from pathlib import Path

root_path = '../input/demand-forecasting-kernels-only/'
train_original = pd.read_csv(Path(root_path, 'train.csv'), low_memory=False, 
                    parse_dates=['date'], index_col=['date'])
test_original = pd.read_csv(Path(root_path, 'test.csv'), low_memory=False, 
                   parse_dates=['date'], index_col=['date'])
sample_sub_original = pd.read_csv(Path(root_path, 'sample_submission.csv'))

In [2]:
def SMAPE(forecast, actual):
    """
    Symmetric Mean Absolute Percentage Error: from https://www.kaggle.com/enolac5/time-series-arima-dnn-xgboost-comparison
    """
    masked_arr = ~((forecast==0)&(actual==0))
    diff = abs(forecast[masked_arr] - actual[masked_arr])
    avg = (abs(forecast[masked_arr]) + abs(actual[masked_arr]))/2
    
    print('SMAPE Error Score: ' + str(round(sum(diff/avg)/len(forecast) * 100, 2)) + ' %')

In [3]:
train = train_original.copy()
test = test_original.copy()

train['weekday'] = train.index.dayofweek
train['dayofyear'] = train.index.dayofyear
train['year'] = train.index.year
train['month'] = train.index.month
train = train.reset_index()

test['weekday'] = test.index.dayofweek
test['dayofyear'] = test.index.dayofyear
test['year'] = test.index.year
test['month'] = test.index.month
test = test.reset_index()

In [4]:
# One Hot Encode Months + drop first column to remove dummy variable trap
temp = pd.get_dummies(train['month'], prefix='is_month', drop_first=True)

for col in temp.columns:
    train[col] = temp[col]
    
# Determines if day is a weekend
# Assumption: Friday is NOT a weekend
train['is_weekend'] = train['weekday'] // 5 # 0 for Mon-Fri (0-4), 1 for Sat (5) and Sun (6)

from itertools import product

avg_sales = np.zeros(shape=(10, 50)) # (num_store, num_item)

for store, item in list(product(range(1, 11), range(1, 51))):
    avg_sales[store-1, item-1] = train.query(f'store == {store} & item == {item}')['sales'].mean()
    
avg_sales_col = []

for _, row in train.iterrows():
    store, item = row['store'], row['item']
    avg_sales_col.append(avg_sales[store-1, item-1])
    
train['avg_sales'] = avg_sales_col

In [5]:
# One Hot Encode Months + drop first column to remove dummy variable trap
temp = pd.get_dummies(test['month'], prefix='is_month', drop_first=True)

for col in temp.columns:
    test[col] = temp[col]

# add in columns manually (test data spans Jan-Mar only)
for i in range(4, 13):
    test[f'is_month_{i}'] = 0
    
# Determines if day is a weekend
# Assumption: Friday is NOT a weekend
test['is_weekend'] = test['weekday'] // 5 # 0 for Mon-Fri (0-4), 1 for Sat (5) and Sun (6)

# Recalculate avg_sales column for test dataframe
avg_sales_col = []

for _, row in test.iterrows():
    store, item = row['store'], row['item']
    avg_sales_col.append(avg_sales[store-1, item-1])
    
test['avg_sales'] = avg_sales_col

### Prepare `X` and `y` dataframes

In [6]:
X = train.copy()
X.drop(['sales','date'], axis=1, inplace=True)
y = train['sales']

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

### Create the model
We create a simple NN

In [8]:
tf.keras.backend.clear_session()
tf.random.set_seed(42)

model = Sequential([
    Dense(20, activation="relu"),
    Dense(10, activation="relu"),
    Dense(1),
])

model.compile(loss='mse', optimizer=Adam())

In [9]:
print(f"X: {X.shape}")
print(f"y: {y.shape}")

X: (913000, 19)
y: (913000,)


In [10]:
epochs = 50
batch_size = 256

model.fit(X, y, epochs=epochs, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fcfe80ae510>

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                400       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 621
Trainable params: 621
Non-trainable params: 0
_________________________________________________________________


In [12]:
X_test = test.copy()
X_test.drop(['id', 'date'], axis=1, inplace=True)

test_preds = model.predict(X_test)

sample_sub = sample_sub_original.copy()
sample_sub['sales'] = test_preds
sample_sub['sales'] = np.round(sample_sub['sales']).astype(int)
sample_sub.head()

Unnamed: 0,id,sales
0,0,9
1,1,11
2,2,12
3,3,13
4,4,14


In [13]:
sample_sub.to_csv('./submission.csv', index=False) # Public Score of 19.92