# Setting up the Environment

In [1]:
import numpy as np
import pandas as pd

# Getting the Data

In [2]:
data = pd.read_csv("./train.csv")
data.sample(frac = 1).reset_index(drop = True)
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
print("No of Examples: ", data.shape[0])

No of Examples:  42000


# Splitting the Data into Train and Validation

In [4]:
train_data = data.iloc[:33600]
valid_data = data.iloc[33600:]

In [5]:
print(train_data.shape)
print(valid_data.shape)

(33600, 785)
(8400, 785)


In [6]:
X_train = train_data.iloc[:,1:].reset_index(drop = True)
y_train = train_data.iloc[:,0].reset_index(drop = True)

X_valid = valid_data.iloc[:,1:].reset_index(drop = True)
y_valid = valid_data.iloc[:,0].reset_index(drop = True)

In [7]:
print("X_train : ", X_train.shape)
print("y_train : ", y_train.shape)
print("X_valid : ", X_valid.shape)
print("y_valid : ", y_valid.shape)

X_train :  (33600, 784)
y_train :  (33600,)
X_valid :  (8400, 784)
y_valid :  (8400,)


# Getting the Train Input and labels into Shape

In [8]:
X_train = X_train.to_numpy().transpose()

In [9]:
# One Hot Encoding the Labels
label_mat = np.zeros((10, y_train.shape[0]))

for i in range(y_train.shape[0]):
    label_mat[y_train[i]][i] = 1
    
y_train_onehot = label_mat

In [10]:
print("X_train : ", X_train.shape)
print("y_train : ", y_train_onehot.shape)

X_train :  (784, 33600)
y_train :  (10, 33600)


# Rescaling the Train Input

In [11]:
X_train = X_train / 255

# Modelling the Neural Network and Functions

In [12]:
def sigmoid(x):
    return 1.0/(1+ np.exp(-x))
  
def leaky_relu(x):
    xrr=np.array( [ [ x[i][j] if (x[i][j]>=0) else 0.01 * x[i][j] for i in range(x.shape[0]) ]for j in range(x.shape[1]) ] )
    return xrr.T
  
def leaky_relu_derivative(x):
    xrr_deriv = np.array( [ [ 1 if (x[i][j]>=0) else 0.01 for i in range(x.shape[0]) ]for j in range(x.shape[1]) ] )
    return xrr_deriv.T

  
def init_param(x, h, y):
    np.random.seed(7)
    w1 = np.random.randn(h, x) * 0.05
    b1 = np.zeros(shape=(h, 1))
    w2 = np.random.randn(y, h) * 0.05
    b2 = np.zeros(shape=(y, 1))
    parameters = {"Weight_1": w1,"Bias_1": b1,"Weight_2": w2,"Bias_2": b2}
    return parameters

def feedforward(X, parameters):
    w1 = parameters['Weight_1']
    b1 = parameters['Bias_1']
    w2 = parameters['Weight_2']
    b2 = parameters['Bias_2']
    z1 = np.dot(w1, X) + b1
    a1 = leaky_relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = sigmoid(z2)
    values = {"Z1": z1,"A1": a1,"Z2": z2,"A2": a2}
    return a2, values

def cost_func(a2, Y):
    m = Y.shape[1] 
    logloss = np.multiply(Y,np.log(a2)) + np.multiply((1 - Y), np.log(1 - a2))
    cost = - np.sum(logloss) / m
    cost = np.squeeze(cost)  
    return cost

def backprop(parameters, values, X, Y):
    m = X.shape[1]
    w1 = parameters['Weight_1']
    w2 = parameters['Weight_2']
    a1 = values['A1']
    a2 = values['A2']
    error2 = a2 - Y
    grad_w2 = (1 / m) * np.dot(error2, a1.T)
    grad_b2 = (1 / m) * np.sum(error2, axis=1, keepdims=True)
    error1 = np.multiply(np.dot(w2.T, error2), leaky_relu_derivative(a1))
    grad_w1 = (1 / m) * np.dot(error1, X.T)
    grad_b1 = (1 / m) * np.sum(error1, axis=1, keepdims=True)
    grads = {"GRAD_W1": grad_w1,"GRAD_B1": grad_b1,"GRAD_W2": grad_w2,"GRAD_B2": grad_b2}
    return grads

def update_param(parameters, grads, alpha = 0.05):
    w1 = parameters['Weight_1']
    b1 = parameters['Bias_1']
    w2 = parameters['Weight_2']
    b2 = parameters['Bias_2']
    grad_w1 = grads['GRAD_W1']
    grad_b1 = grads['GRAD_B1']
    grad_w2 = grads['GRAD_W2']
    grad_b2 = grads['GRAD_B2']
    w1 = w1 - alpha * grad_w1
    b1 = b1 - alpha * grad_b1
    w2 = w2 - alpha * grad_w2
    b2 = b2 - alpha * grad_b2
    parameters = {"Weight_1": w1,"Bias_1": b1,"Weight_2": w2,"Bias_2": b2}
    return parameters

# Training the Model

In [13]:
n, m = X_train.shape
print("No of Training Examples: ", m)
print("No of Pixels in each image: ", n)

No of Training Examples:  33600
No of Pixels in each image:  784


In [14]:
hidden_units = 500
output_units = 10
learning_rate = 0.1
epochs = 1500

In [15]:
parameters = init_param(n, hidden_units, output_units)

for i in range(epochs):
    A2,values = feedforward(X_train, parameters)
    cost = cost_func(A2, y_train_onehot)
    
    if (i%10 == 0):
        print("Iteration: ", i,"---> Cost: ", cost)
        
    grads = backprop(parameters, values, X_train, y_train_onehot)
    parameters = update_param(parameters, grads, learning_rate)

Iteration:  0 ---> Cost:  7.004992045159023
Iteration:  10 ---> Cost:  2.5610165115386834
Iteration:  20 ---> Cost:  1.9824854667393503
Iteration:  30 ---> Cost:  1.6453224751450652
Iteration:  40 ---> Cost:  1.440550232597137
Iteration:  50 ---> Cost:  1.3037581486585244
Iteration:  60 ---> Cost:  1.2051056804829217
Iteration:  70 ---> Cost:  1.1299703768414786
Iteration:  80 ---> Cost:  1.0704825949041197
Iteration:  90 ---> Cost:  1.0219560965054715
Iteration:  100 ---> Cost:  0.9814180877008539
Iteration:  110 ---> Cost:  0.9468892315470652
Iteration:  120 ---> Cost:  0.9170042226440303
Iteration:  130 ---> Cost:  0.8907860863012644
Iteration:  140 ---> Cost:  0.867505418865495
Iteration:  150 ---> Cost:  0.846625733813122
Iteration:  160 ---> Cost:  0.8277272799074561
Iteration:  170 ---> Cost:  0.810494300925325
Iteration:  180 ---> Cost:  0.79467552275459
Iteration:  190 ---> Cost:  0.7800614985383578
Iteration:  200 ---> Cost:  0.7664779863799969
Iteration:  210 ---> Cost:  0.7

# Getting the Train Accuracy

In [16]:
train_preds_probs, _ = feedforward(X_train, parameters)
train_preds_labels = train_preds_probs.argmax(axis = 0)

count = 0
for i in range(X_train.shape[1]):
    if train_preds_labels[i] == y_train[i]:
        count += 1
        
print("Training Accuracy: ", count / X_train.shape[1]) 

Training Accuracy:  0.9561607142857143


# Getting the Validation Accuracy

In [17]:
X_valid = X_valid.to_numpy().transpose()
X_valid = X_valid / 255
print("X_valid: ", X_valid.shape)
print("y_valid: ", y_valid.shape)

X_valid:  (784, 8400)
y_valid:  (8400,)


In [18]:
valid_preds_probs, _ = feedforward(X_valid, parameters)
valid_preds_labels = valid_preds_probs.argmax(axis = 0)

count = 0
for i in range(X_valid.shape[1]):
    if valid_preds_labels[i] == y_valid[i]:
        count += 1
    
print("Validation Accuracy: ", count / X_valid.shape[1])

Validation Accuracy:  0.9528571428571428


# Getting the Test Data

In [19]:
test_data = pd.read_csv("./test.csv")
test_data.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
X_test = test_data.to_numpy().transpose()
X_test = X_test / 255
print("X_test: ", X_test.shape)

X_test:  (784, 28000)


In [21]:
test_preds_probs, _ = feedforward(X_test, parameters)
test_preds_labels = test_preds_probs.argmax(axis = 0)
test_preds_labels.shape

(28000,)

# Creating the Output CSV File

In [22]:
imageid = np.arange(test_preds_labels.shape[0]) + 1
imageid

array([    1,     2,     3, ..., 27998, 27999, 28000])

In [23]:
df = pd.DataFrame({'ImageId': imageid, 'Label': test_preds_labels})
df.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3


In [24]:
df.to_csv('output_Leaky_ReLu_1_Hidden_Layers.csv', index = False)

**This model gave a score of 0.94514 (~94.5% accuracy) when the output was submitted to Kaggle**