In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
%cd "/gdrive/MyDrive/BlackBelt/DLFundamentals/009A_Assignment_1_BigMart/"
!pwd

/gdrive/MyDrive/BlackBelt/DLFundamentals/009A_Assignment_1_BigMart
/gdrive/MyDrive/BlackBelt/DLFundamentals/009A_Assignment_1_BigMart


# Steps to build a Neural Network using Keras

<ol>1. Loading the dataset</ol>
<ol>2. Creating training and validation set</ol>
<ol>3. Defining the architecture of the model</ol>
<ol>4. Compiling the model (defining loss function, optimizer)</ol>
<ol>5. Training the model</ol>
<ol>6. Evaluating model performance on training and validation set</ol>

In [3]:
# importing the required libraries
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# loading the pre-processed dataset
data = pd.read_csv('train_modified.csv')

In [5]:
# looking at the first five rows of the dataset
data.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Visibility,Item_MRP,Outlet_Identifier,Item_Outlet_Sales,Outlet_Years,Item_Fat_Content_LF,Item_Fat_Content_R,Item_Type_Baking Goods,...,Outlet_Location_Type_Tier 1,Outlet_Location_Type_Tier 2,Outlet_Location_Type_Tier 3,Outlet_Type_Grocery Store,Outlet_Type_Supermarket Type1,Outlet_Type_Supermarket Type2,Outlet_Type_Supermarket Type3,Item_Type_Combined_Drinks,Item_Type_Combined_Food,Item_Type_Combined_Non-Consumable
0,FDA15,9.3,0.016047,249.8092,OUT049,3735.138,14,1,0,0,...,1,0,0,0,1,0,0,0,1,0
1,DRC01,5.92,0.019278,48.2692,OUT018,443.4228,4,0,1,0,...,0,0,1,0,0,1,0,1,0,0
2,FDN15,17.5,0.01676,141.618,OUT049,2097.27,14,1,0,0,...,1,0,0,0,1,0,0,0,1,0
3,FDX07,19.2,0.0,182.095,OUT010,732.38,15,0,1,0,...,0,0,1,1,0,0,0,0,1,0
4,NCD19,8.93,0.0,53.8614,OUT013,994.7052,26,1,0,0,...,0,0,1,0,1,0,0,0,0,1


In [6]:
# checking missing values
data.isnull().sum()

Item_Identifier                      0
Item_Weight                          0
Item_Visibility                      0
Item_MRP                             0
Outlet_Identifier                    0
Item_Outlet_Sales                    0
Outlet_Years                         0
Item_Fat_Content_LF                  0
Item_Fat_Content_R                   0
Item_Type_Baking Goods               0
Item_Type_Breads                     0
Item_Type_Breakfast                  0
Item_Type_Canned                     0
Item_Type_Dairy                      0
Item_Type_Frozen Foods               0
Item_Type_Fruits and Vegetables      0
Item_Type_Hard Drinks                0
Item_Type_Health and Hygiene         0
Item_Type_Household                  0
Item_Type_Meat                       0
Item_Type_Others                     0
Item_Type_Seafood                    0
Item_Type_Snack Foods                0
Item_Type_Soft Drinks                0
Item_Type_Starchy Foods              0
Outlet_Identifier_1_OUT01

In [7]:
# checking the data type
data.dtypes

Item_Identifier                       object
Item_Weight                          float64
Item_Visibility                      float64
Item_MRP                             float64
Outlet_Identifier                     object
Item_Outlet_Sales                    float64
Outlet_Years                           int64
Item_Fat_Content_LF                    int64
Item_Fat_Content_R                     int64
Item_Type_Baking Goods                 int64
Item_Type_Breads                       int64
Item_Type_Breakfast                    int64
Item_Type_Canned                       int64
Item_Type_Dairy                        int64
Item_Type_Frozen Foods                 int64
Item_Type_Fruits and Vegetables        int64
Item_Type_Hard Drinks                  int64
Item_Type_Health and Hygiene           int64
Item_Type_Household                    int64
Item_Type_Meat                         int64
Item_Type_Others                       int64
Item_Type_Seafood                      int64
Item_Type_

In [8]:
# looking at the shape of the data
data.shape

(8523, 48)

In [10]:
# separating the independent and dependent variables

# storing all the independent variables as X
X = data.drop(['Item_Outlet_Sales','Item_Identifier','Outlet_Identifier'], axis=1)

# storing the dependent variable as y
y = data['Item_Outlet_Sales']

In [11]:
# shape of independent and dependent variables
X.shape, y.shape

((8523, 45), (8523,))

## 2. Creating training and validation set

In [12]:
# Creating training and validation set

# stratify will make sure that the distribution of classes in train and validation set it similar
# random state to regenerate the same train and validation set
# test size 0.2 will keep 20% data in validation and remaining 80% in train set

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=10,test_size=0.2)

In [13]:
# shape of training and validation set
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((6818, 45), (6818,)), ((1705, 45), (1705,)))

## 3. Defining the architecture of the model

In [14]:
# importing the sequential model
from keras.models import Sequential
# importing different layers from keras
from keras.layers import InputLayer, Dense 

In [15]:
import tensorflow as tf

In [16]:
# number of input neurons
X_train.shape

(6818, 45)

In [17]:
# number of features in the data
X_train.shape[1]

45

In [18]:
# defining input neurons
input_neurons = X_train.shape[1]

In [19]:
# define number of output neurons
output_neurons = 1

In [20]:
# activation function of different layers

# for now I have picked relu as an activation function for hidden layers, you can change it as well
# since it is a regression problem, I have used linear activation function in the final layer

In [21]:
# define hidden layers and neuron in each layer
number_of_hidden_layers = 2
neuron_hidden_layer_1 = 1024
neuron_hidden_layer_2 = 64

In [22]:
model = Sequential()
model.add(InputLayer(input_shape=(input_neurons,)))
model.add(Dense(units=neuron_hidden_layer_1, activation='relu'))
model.add(Dense(units=neuron_hidden_layer_2, activation='relu'))
model.add(Dense(units=output_neurons, activation='linear'))

In [23]:
# summary of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1024)              47104     
                                                                 
 dense_1 (Dense)             (None, 64)                65600     
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 112,769
Trainable params: 112,769
Non-trainable params: 0
_________________________________________________________________


In [24]:
# compiling the model

# loss as binary_crossentropy, since we have binary classification problem
# defining the optimizer as adam
# Evaluation metric as accuracy

model.compile(loss='mse',optimizer='Adam',metrics=['mse'])

In [25]:
# training the model

# passing the independent and dependent features for training set for training the model

# validation data will be evaluated at the end of each epoch

# setting the epochs as 50

# storing the trained model in model_history variable which will be used to visualize the training process

model_history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [26]:
# getting predictions for the validation set
# prediction = model.predict_classes(X_test)
prediction = model.predict(X_test)
mse = tf.keras.losses.MeanSquaredError()
mse(prediction, y_test).numpy()



4612492.988727846

In [27]:
mae = tf.keras.losses.MeanAbsoluteError()
mae(prediction,y_test).numpy()

1669.0763992986742

In [32]:
from keras import backend as K
# def root_mean_squared_error(y_true, y_pred):
#         return K.sqrt(K.mean(K.square(y_pred - y_true)))

# def root_mean_squared_error(y_true, y_pred):
#         return K.sqrt(K.mean(K.square(y_pred - y_true)))
rmse = tf.keras.metrics.RootMeanSquaredError()

rmse(y_test, prediction).numpy()

1070.9346

In [33]:
# predict on test dataset.
test_data = pd.read_csv('test_modified.csv')
test_data.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Visibility,Item_MRP,Outlet_Identifier,Outlet_Years,Item_Fat_Content_LF,Item_Fat_Content_R,Item_Type_Baking Goods,Item_Type_Breads,...,Outlet_Location_Type_Tier 1,Outlet_Location_Type_Tier 2,Outlet_Location_Type_Tier 3,Outlet_Type_Grocery Store,Outlet_Type_Supermarket Type1,Outlet_Type_Supermarket Type2,Outlet_Type_Supermarket Type3,Item_Type_Combined_Drinks,Item_Type_Combined_Food,Item_Type_Combined_Non-Consumable
0,FDW58,20.75,0.007565,107.8622,OUT049,14,1,0,0,0,...,1,0,0,0,1,0,0,0,1,0
1,FDW14,8.3,0.038428,87.3198,OUT017,6,0,1,0,0,...,0,1,0,0,1,0,0,0,1,0
2,NCN55,14.6,0.099575,241.7538,OUT010,15,1,0,0,0,...,0,0,1,1,0,0,0,0,0,1
3,FDQ58,7.315,0.015388,155.034,OUT017,6,1,0,0,0,...,0,1,0,0,1,0,0,0,1,0
4,FDY38,13.6,0.118599,234.23,OUT027,28,0,1,0,0,...,0,0,1,0,0,0,1,0,1,0


In [34]:
test_data.dtypes

Item_Identifier                       object
Item_Weight                          float64
Item_Visibility                      float64
Item_MRP                             float64
Outlet_Identifier                     object
Outlet_Years                           int64
Item_Fat_Content_LF                    int64
Item_Fat_Content_R                     int64
Item_Type_Baking Goods                 int64
Item_Type_Breads                       int64
Item_Type_Breakfast                    int64
Item_Type_Canned                       int64
Item_Type_Dairy                        int64
Item_Type_Frozen Foods                 int64
Item_Type_Fruits and Vegetables        int64
Item_Type_Hard Drinks                  int64
Item_Type_Health and Hygiene           int64
Item_Type_Household                    int64
Item_Type_Meat                         int64
Item_Type_Others                       int64
Item_Type_Seafood                      int64
Item_Type_Snack Foods                  int64
Item_Type_

In [35]:
# storing all the independent variables as X
X_sub = test_data.drop(['Item_Identifier','Outlet_Identifier'], axis=1)

# storing the dependent variable as y
# y = data['Item_Outlet_Sales']

In [36]:
prediction = model.predict(X_sub)



In [37]:
#Define a dataframe with IDs for submission:
sub = test_data[['Item_Identifier','Outlet_Identifier']]
sub['Item_Outlet_Sales'] = prediction

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [38]:
sub.head()

Unnamed: 0,Item_Identifier,Outlet_Identifier,Item_Outlet_Sales
0,FDW58,OUT049,1734.959351
1,FDW14,OUT017,1421.753418
2,NCN55,OUT010,605.368042
3,FDQ58,OUT017,2508.496338
4,FDY38,OUT027,5535.45752


In [39]:
#Export submission file
sub.to_csv("submission.csv",index=False)