# Setting up the Environment

In [1]:
import numpy as np
import pandas as pd

# Getting the Data

In [2]:
data = pd.read_csv("./train.csv")
data.sample(frac = 1).reset_index(drop = True)
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
print("No of Examples: ",data.shape[0])

No of Examples:  42000


# Splitting the Data into Train and Validation

In [4]:
train_data = data.iloc[:33600]
valid_data = data.iloc[33600:]

In [5]:
print(train_data.shape)
print(valid_data.shape)

(33600, 785)
(8400, 785)


In [6]:
X_train = train_data.iloc[:,1:].reset_index(drop = True)
y_train = train_data.iloc[:,0].reset_index(drop = True)

X_valid = valid_data.iloc[:,1:].reset_index(drop = True)
y_valid = valid_data.iloc[:,0].reset_index(drop = True)

In [7]:
print("X_train : ", X_train.shape)
print("y_train : ", y_train.shape)
print("X_valid : ", X_valid.shape)
print("y_valid : ", y_valid.shape)

X_train :  (33600, 784)
y_train :  (33600,)
X_valid :  (8400, 784)
y_valid :  (8400,)


# Getting the Train Input and Labels into Shape

In [8]:
X_train = X_train.to_numpy().transpose()

In [9]:
# One Hot Encoding the Labels
label_mat = np.zeros((10, y_train.shape[0]))

for i in range(y_train.shape[0]):
    label_mat[y_train[i]][i] = 1
    
y_train_onehot = label_mat

In [10]:
print("X_train : ", X_train.shape)
print("y_train : ", y_train_onehot.shape)

X_train :  (784, 33600)
y_train :  (10, 33600)


# Rescaling the Train Input

In [11]:
X_train = X_train / 255

# Modelling the Neural Network and Functions

In [12]:
def sigmoid(x):
    return 1.0/(1+ np.exp(-x))
  
def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1.0 - s) 

class NeuralNetwork:   
    def __init__(self, sizes): 
        np.random.seed(7)
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = np.array([np.zeros((y,1)) for y in sizes[1:]])
        self.weights = np.array([np.random.randn(y, x) * 0.05 for x, y in zip(sizes[:-1], sizes[1:])]) 

    def feedforward(self, a):        
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
    def cost_func(self,a2,y):
        logloss = np.multiply(np.log(a2),y) + np.multiply((1-y),np.log(1-a2))
        cost = -( np.sum(logloss) ) / m
        cost = np.squeeze(cost)
        return cost
    
    def test(self,t):
        test_output = self.feedforward(t)
        t_label = test_output.argmax(axis = 0)
        return t_label

    def cost_derivative(self, output_activations, y):
        return (output_activations-y)    
        
    def backprop(self,x,y,alpha):
        n_b = np.array([np.zeros(b.shape) for b in self.biases])     
        n_w = np.array([np.zeros(w.shape) for w in self.weights])
        m = x.shape[1]
        
        # feedforward pass
        activation = x
        activations = [x] 
        zs = [] 
        for b, w in zip(self.biases, self.weights):            
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
            
        cost = self.cost_func(activations[-1],y)
       
        delta = self.cost_derivative(activations[-1], y)                                     
        n_b[-1] = (1/m) * delta.sum(axis=1,keepdims = True )        
        n_w[-1] = (1/m) * np.dot(delta, activations[-2].transpose())
        
        # l=1 means last layer, l=2 means 2nd layer layer, so on and so forth.
        for l in range(2, self.num_layers):
            z = zs[-l]
            sd = sigmoid_derivative(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sd
            n_b[-l] = (1/m) * delta.sum(axis=1, keepdims= True)
            n_w[-l] = (1/m) * np.dot(delta, activations[-l-1].transpose())

        self.weights = self.weights - alpha * n_w 
        self.biases = self.biases - alpha * n_b 
        
        return cost

# Training the Model

In [13]:
n, m = X_train.shape
print("No of Training Examples: ", m)
print("No of Pixels in each image: ", n)

No of Training Examples:  33600
No of Pixels in each image:  784


In [14]:
sizes = [n,500,10]  # No of units in each layers
learning_rate = 0.1
epochs = 10000

In [15]:
nn = NeuralNetwork(sizes)

In [16]:
for i in range(epochs):
    cost = nn.backprop(X_train, y_train_onehot, learning_rate)
    if (i%10 == 0) or (i == epochs-1):
        print("Iteration: {0} ---> Cost: {1}".format(i,cost))   

Iteration: 0 ---> Cost: 7.814775643575334
Iteration: 10 ---> Cost: 3.1218717215801046
Iteration: 20 ---> Cost: 2.979932475022972
Iteration: 30 ---> Cost: 2.843202139634936
Iteration: 40 ---> Cost: 2.710422967517665
Iteration: 50 ---> Cost: 2.581791068799056
Iteration: 60 ---> Cost: 2.458342814616878
Iteration: 70 ---> Cost: 2.341389246104713
Iteration: 80 ---> Cost: 2.2320924085424063
Iteration: 90 ---> Cost: 2.131221187181326
Iteration: 100 ---> Cost: 2.039070178169018
Iteration: 110 ---> Cost: 1.95549889096356
Iteration: 120 ---> Cost: 1.8800390182934674
Iteration: 130 ---> Cost: 1.8120210185758678
Iteration: 140 ---> Cost: 1.750686171214641
Iteration: 150 ---> Cost: 1.695268728916242
Iteration: 160 ---> Cost: 1.64504697095832
Iteration: 170 ---> Cost: 1.5993692284153718
Iteration: 180 ---> Cost: 1.5576627849316256
Iteration: 190 ---> Cost: 1.5194325387974248
Iteration: 200 ---> Cost: 1.4842543987014711
Iteration: 210 ---> Cost: 1.4517665821218577
Iteration: 220 ---> Cost: 1.42166063

Iteration: 1820 ---> Cost: 0.7076789642754283
Iteration: 1830 ---> Cost: 0.7068570176249789
Iteration: 1840 ---> Cost: 0.7060409868072124
Iteration: 1850 ---> Cost: 0.7052307674994099
Iteration: 1860 ---> Cost: 0.7044262576140597
Iteration: 1870 ---> Cost: 0.7036273572394632
Iteration: 1880 ---> Cost: 0.7028339685822222
Iteration: 1890 ---> Cost: 0.7020459959115282
Iteration: 1900 ---> Cost: 0.7012633455051984
Iteration: 1910 ---> Cost: 0.7004859255973921
Iteration: 1920 ---> Cost: 0.6997136463279443
Iteration: 1930 ---> Cost: 0.6989464196932657
Iteration: 1940 ---> Cost: 0.6981841594987478
Iteration: 1950 ---> Cost: 0.6974267813126239
Iteration: 1960 ---> Cost: 0.6966742024212343
Iteration: 1970 ---> Cost: 0.6959263417856424
Iteration: 1980 ---> Cost: 0.6951831199995661
Iteration: 1990 ---> Cost: 0.6944444592485637
Iteration: 2000 ---> Cost: 0.6937102832704453
Iteration: 2010 ---> Cost: 0.6929805173168591
Iteration: 2020 ---> Cost: 0.6922550881160157
Iteration: 2030 ---> Cost: 0.69153

Iteration: 3610 ---> Cost: 0.6046049795521108
Iteration: 3620 ---> Cost: 0.6041345674399439
Iteration: 3630 ---> Cost: 0.6036645694543629
Iteration: 3640 ---> Cost: 0.6031949807908467
Iteration: 3650 ---> Cost: 0.6027257967405835
Iteration: 3660 ---> Cost: 0.6022570126893554
Iteration: 3670 ---> Cost: 0.6017886241164341
Iteration: 3680 ---> Cost: 0.6013206265934936
Iteration: 3690 ---> Cost: 0.6008530157835305
Iteration: 3700 ---> Cost: 0.6003857874397989
Iteration: 3710 ---> Cost: 0.5999189374047572
Iteration: 3720 ---> Cost: 0.5994524616090254
Iteration: 3730 ---> Cost: 0.5989863560703539
Iteration: 3740 ---> Cost: 0.5985206168926035
Iteration: 3750 ---> Cost: 0.5980552402647379
Iteration: 3760 ---> Cost: 0.5975902224598225
Iteration: 3770 ---> Cost: 0.597125559834038
Iteration: 3780 ---> Cost: 0.5966612488257015
Iteration: 3790 ---> Cost: 0.5961972859542973
Iteration: 3800 ---> Cost: 0.5957336678195198
Iteration: 3810 ---> Cost: 0.595270391100323
Iteration: 3820 ---> Cost: 0.5948074

Iteration: 5400 ---> Cost: 0.5251824905080762
Iteration: 5410 ---> Cost: 0.5247651602969295
Iteration: 5420 ---> Cost: 0.5243481730680419
Iteration: 5430 ---> Cost: 0.5239315299189662
Iteration: 5440 ---> Cost: 0.5235152319425833
Iteration: 5450 ---> Cost: 0.5230992802269389
Iteration: 5460 ---> Cost: 0.5226836758550814
Iteration: 5470 ---> Cost: 0.5222684199049067
Iteration: 5480 ---> Cost: 0.5218535134490024
Iteration: 5490 ---> Cost: 0.5214389575545003
Iteration: 5500 ---> Cost: 0.5210247532829289
Iteration: 5510 ---> Cost: 0.5206109016900702
Iteration: 5520 ---> Cost: 0.5201974038258215
Iteration: 5530 ---> Cost: 0.5197842607340597
Iteration: 5540 ---> Cost: 0.5193714734525072
Iteration: 5550 ---> Cost: 0.5189590430126049
Iteration: 5560 ---> Cost: 0.5185469704393859
Iteration: 5570 ---> Cost: 0.5181352567513526
Iteration: 5580 ---> Cost: 0.5177239029603599
Iteration: 5590 ---> Cost: 0.5173129100714964
Iteration: 5600 ---> Cost: 0.5169022790829754
Iteration: 5610 ---> Cost: 0.51649

Iteration: 7180 ---> Cost: 0.45701195711877235
Iteration: 7190 ---> Cost: 0.45666586849410457
Iteration: 7200 ---> Cost: 0.4563201960541054
Iteration: 7210 ---> Cost: 0.45597493958467505
Iteration: 7220 ---> Cost: 0.45563009886713735
Iteration: 7230 ---> Cost: 0.4552856736782792
Iteration: 7240 ---> Cost: 0.45494166379039397
Iteration: 7250 ---> Cost: 0.4545980689713216
Iteration: 7260 ---> Cost: 0.45425488898448935
Iteration: 7270 ---> Cost: 0.453912123588954
Iteration: 7280 ---> Cost: 0.45356977253944114
Iteration: 7290 ---> Cost: 0.45322783558638546
Iteration: 7300 ---> Cost: 0.45288631247597205
Iteration: 7310 ---> Cost: 0.4525452029501748
Iteration: 7320 ---> Cost: 0.45220450674679696
Iteration: 7330 ---> Cost: 0.4518642235995104
Iteration: 7340 ---> Cost: 0.4515243532378948
Iteration: 7350 ---> Cost: 0.45118489538747636
Iteration: 7360 ---> Cost: 0.4508458497697665
Iteration: 7370 ---> Cost: 0.45050721610230127
Iteration: 7380 ---> Cost: 0.45016899409867817
Iteration: 7390 ---> C

Iteration: 8950 ---> Cost: 0.4019103461804209
Iteration: 8960 ---> Cost: 0.40163180481052496
Iteration: 8970 ---> Cost: 0.4013536007307722
Iteration: 8980 ---> Cost: 0.4010757333997353
Iteration: 8990 ---> Cost: 0.4007982022761062
Iteration: 9000 ---> Cost: 0.40052100681871017
Iteration: 9010 ---> Cost: 0.40024414648651796
Iteration: 9020 ---> Cost: 0.3999676207386601
Iteration: 9030 ---> Cost: 0.39969142903443994
Iteration: 9040 ---> Cost: 0.39941557083334633
Iteration: 9050 ---> Cost: 0.39914004559506716
Iteration: 9060 ---> Cost: 0.3988648527795012
Iteration: 9070 ---> Cost: 0.39858999184677185
Iteration: 9080 ---> Cost: 0.3983154622572384
Iteration: 9090 ---> Cost: 0.3980412634715099
Iteration: 9100 ---> Cost: 0.39776739495045554
Iteration: 9110 ---> Cost: 0.39749385615521843
Iteration: 9120 ---> Cost: 0.3972206465472263
Iteration: 9130 ---> Cost: 0.39694776558820416
Iteration: 9140 ---> Cost: 0.3966752127401852
Iteration: 9150 ---> Cost: 0.39640298746552316
Iteration: 9160 ---> Co

# Getting the Train Accuracy

In [17]:
train_preds = nn.test(X_train)
count = 0
for i in range(X_train.shape[1]):
    if train_preds[i] == y_train[i]:
        count += 1
    
print("Training Accuracy: ", count / X_train.shape[1]) 

Training Accuracy:  0.9507440476190476


# Getting the Validation Accuracy

In [18]:
X_valid = X_valid.to_numpy().transpose()
X_valid = X_valid / 255

print("X_valid: ", X_valid.shape)
print("y_valid: ", y_valid.shape)

X_valid:  (784, 8400)
y_valid:  (8400,)


In [19]:
valid_preds = nn.test(X_valid)
count = 0
for i in range(X_valid.shape[1]):
    if valid_preds[i] == y_valid[i]:
        count += 1

print("Validation Accuracy: ", count / X_valid.shape[1])

Validation Accuracy:  0.945


# Getting the Test Data

In [20]:
test_data = pd.read_csv("./test.csv")
test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
X_test = test_data.to_numpy().transpose()
X_test = X_test / 255
print("X_test: ", X_test.shape)

X_test:  (784, 28000)


In [22]:
test_preds = nn.test(X_test)
test_preds.shape

(28000,)

# Creating the Output CSV File

In [23]:
imageid = np.arange(test_preds.shape[0]) + 1
imageid

array([    1,     2,     3, ..., 27998, 27999, 28000])

In [24]:
df = pd.DataFrame({'ImageId': imageid, 'Label': test_preds})
df.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,7
4,5,3


In [25]:
df.to_csv('output_N_Hidden_Layers.csv', index = False)

**This model gave a score of 0.94171 (~94.17% accuracy) when the output was submitted to Kaggle**