In [1]:
import numpy as np
import pandas as pd

# Functions

In [2]:
def relu(z):
    return np.maximum(0,z)
def relu_prime(z):
    return np.array(z>0,dtype='float')

def softmax(z):
    max_z=np.max(z,axis=0)
    z=z-max_z
    expZ = np.exp(z)
    return expZ/(np.sum(expZ, 0))

In [3]:
def initialize_parameters(layer_dims):
    
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) /np.sqrt(layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    return parameters

In [4]:
def forward_propagation(X, parameters, activation):
   
    forward_cache = {}
    L = len(parameters) // 2                     
    forward_cache['A0'] = X

    for l in range(1, L):
        forward_cache['Z' + str(l)] = parameters['W' + str(l)].dot(forward_cache['A' + str(l-1)]) + parameters['b' + str(l)]
        forward_cache['A' + str(l)] = activation(forward_cache['Z' + str(l)])
            
    forward_cache['Z' + str(L)] = parameters['W' + str(L)].dot(forward_cache['A' + str(L-1)]) + parameters['b' + str(L)]
        
    forward_cache['A' + str(L)] = softmax(forward_cache['Z' + str(L)])
    
    return forward_cache['A' + str(L)], forward_cache

In [5]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    epsilon=1e-8
    cost = -(1/m) * np.sum(Y * np.log(AL+epsilon))  
    return cost

In [6]:
def backward_propagation(AL, Y, parameters, forward_cache, activation):
    
    grads = {}
    L = len(parameters)//2
    m = AL.shape[1]
    
    grads["dZ" + str(L)] = AL - Y
    grads["dW" + str(L)] = 1/m * np.dot(grads["dZ" + str(L)],forward_cache['A' + str(L-1)].T)
    grads["db" + str(L)] = 1/m * np.sum(grads["dZ" + str(L)], axis = 1, keepdims = True)
    for l in reversed(range(1, L)):    
        grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*relu_prime(forward_cache['A' + str(l)])
            
        grads["dW" + str(l)] = 1/m * np.dot(grads["dZ" + str(l)],forward_cache['A' + str(l-1)].T)
        grads["db" + str(l)] = 1/m * np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True)

    return grads

In [7]:
def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
        
    return parameters

In [8]:
def acc(X, y, parameters):
    
    y_pred, caches = forward_propagation(X, parameters, relu)  
    y = np.argmax(y, 0)
    y_pred = np.argmax(y_pred, 0)
    return y_pred,np.round(np.mean(y_pred == y),4)

In [9]:
def model(X, Y, layers_dims, learning_rate = 0.009, activation = 'relu', num_iterations = 3000):

    np.random.seed(1)   
    parameters = initialize_parameters(layers_dims)
    for i in range(0, num_iterations):
        
        AL, forward_cache = forward_propagation(X, parameters, activation)
        cost = compute_cost(AL, Y)
        grads = backward_propagation(AL, Y, parameters, forward_cache, activation)
        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % (num_iterations/10) == 0:
            print(f"\nIterations:{i} \t cost: {cost} \t train_acc:{acc(X,Y,parameters)[1]} \t test_acc:{acc(x_cv,y_cv,parameters)[1]}")
        if i%(10) ==0:     
            print('==',end='')
    print(f"\nfinal_cost:{compute_cost(AL,Y)},\t  train_acc:{acc(X,Y,parameters)[1]}\t test_acc:{acc(x_cv,y_cv,parameters)[1]}")
       
    return parameters

In [10]:
def one_hot_encode(y,num_classes):
    y_train=np.zeros((y.shape[0],num_classes))
    for i in range(num_classes):
        for j in range(y.shape[0]):
            if y[j]==i:
                y_train[j,i]=1                       
    return y_train

# Load Data

In [16]:
df=pd.read_csv('Classification_train.csv')
z=np.array(df.head(25000))
y=z[:,0]
x=z[:,1:]
y_train=one_hot_encode(y,10).T
x_train=x.T/255
z_=np.array(df.tail(5000))
x_cv=z_[:,1:].T/255
y_cv=one_hot_encode(z_[:,0],10).T

In [17]:
layer_dims=[x_train.shape[0],16,16,10]  

# Fit Data

In [18]:
params=model(x_train,y_train,layer_dims,0.1,relu,2500) 


Iterations:0 	 cost: 2.2941250461146985 	 train_acc:0.1395 	 test_acc:0.143
Iterations:250 	 cost: 0.21132851807147224 	 train_acc:0.9427 	 test_acc:0.9406
Iterations:500 	 cost: 0.14234052904416367 	 train_acc:0.961 	 test_acc:0.958
Iterations:750 	 cost: 0.11679970136893345 	 train_acc:0.9677 	 test_acc:0.9624
Iterations:1000 	 cost: 0.10146925610468911 	 train_acc:0.9717 	 test_acc:0.9656
Iterations:1250 	 cost: 0.09071728931391142 	 train_acc:0.9744 	 test_acc:0.9676
Iterations:1500 	 cost: 0.08239837071539026 	 train_acc:0.9775 	 test_acc:0.9696
Iterations:1750 	 cost: 0.0755360492649674 	 train_acc:0.9794 	 test_acc:0.972
Iterations:2000 	 cost: 0.06968432416497312 	 train_acc:0.9808 	 test_acc:0.972
Iterations:2250 	 cost: 0.06458535390424562 	 train_acc:0.9826 	 test_acc:0.9734
final_cost:0.060158217216550514,	  train_acc:0.984	 test_acc:0.9742


# Load Test data

In [19]:
td=pd.read_csv('Classification_test.csv')
td=np.array(td)
ID=td[:,0]
x_test=td[:,1:].T/255

In [20]:
def predict(x,parameters):
    y_pred,info=forward_propagation(x,parameters,relu)
    return np.argmax(y_pred,axis=0)

# Predicting Test data


In [21]:
y_test_pred=predict(x_test,params)

result=np.c_[ID,y_test_pred]

result=pd.DataFrame(result,columns=['ID','Y_Predicted'])

In [22]:
result.to_csv('Neural_pred')