In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data=pd.read_csv('/content/drive/MyDrive/Training dataset/classification_train.csv')
X_train=((data.iloc[0:24000,1:785].values).T)/255
Y=data.iloc[:,0].values
X_test=((data.iloc[24000:30000,1:785].values).T)/255
Y2=data.iloc[0:24000,0].values
# one hot encoding

Y1=np.zeros((30000,10))
for j in range(10):
  for i in range(30000):
    if (Y[i]==j):
      Y1[i,j]=1
#now, Y1 is the target values for n neural network

#splitting the target values
Y_train=(Y1[0:24000,:]).T
Y_test=(Y1[24000:30000,:]).T

print(np.shape(Y_test))

(10, 6000)


In [3]:
def param(dimensions):   
    w = {}
    b = {}
    N = len(dimensions)            
    for i in range(1, N):
        w[i] = np.random.randn(dimensions[i], dimensions[i-1]) / np.sqrt(dimensions[i-1]) 
        b[i] = np.zeros((dimensions[i], 1))
        
    return w,b

In [4]:
w1,b1=param([784,28,10])
print(np.shape(w1[2]))

(10, 28)


In [5]:
def softmax(z):
    expZ = np.exp(z)
    return expZ/(np.sum(expZ, 0))

def relu(Z):
    A = np.maximum(0,Z)
    return A

def tanh(x):
    return np.tanh(x)

def forward_prop(X, w, b, activation):
    a = {}
    z = {}
    n = len(w)         
    a[0] = X
    for i in range(1, n):
        z[i] = w[i].dot(a[i-1]) + b[i]
        if activation == 'tanh':
            a[i] = tanh(z[i])
        else:
            a[i] = relu(z[i])
    z[n] = w[n].dot(a[n-1]) + b[n]
    a[n] = softmax(z[n])
    
    return a[n], a ,z

In [6]:
def compute_cost(AN, Y):
    m = Y.shape[1]
    cost = -(1./m) * np.sum(Y * np.log(AN))      
    cost = np.squeeze(cost)          
    return cost

In [7]:
def d_relu(Z):
    return np.array(Z > 0, dtype = 'float')

def d_tanh(x):
    return (1 - np.power(np.tanh(x), 2))

def backward_prop(an, Y, w, b, a, z, activation):
    dw = {}
    db = {}
    dz = {}
    n = len(w)
    m = a[n].shape[1]

    dz[n] = an - Y
    dw[n] = 1./m * np.dot(dz[n],a[n-1].T)
    db[n] = 1./m * np.sum(dz[n], axis = 1, keepdims = True)
    for i in reversed(range(1, n)):
        if (activation == 'tanh'):
            dz[i] = np.dot(w[i+1].T,dz[i+1])*d_tanh(a[i])
        else:
            dz[i] = np.dot(w[i+1].T,dz[i+1])*d_relu(a[i])    
        dw[i] = 1./m * np.dot(dz[i],a[i-1].T)
        db[i] = 1./m * np.sum(dz[i], axis = 1, keepdims = True)

    return dw , db

In [8]:
def update_parameters(w ,b ,dw, db, alpha):
    L = len(w)  
    for i in range(L):
        w[i+1] = w[i+1] - alpha * dw[i+1]
        b[i+1] = b[i+1] - alpha * db[i+1]
    return w,b

In [9]:
def predict(X, y, w, b, activation):
    m = X.shape[1]
    an, a, z = forward_prop(X, w, b, activation)
    y = np.argmax(y, 0)
    an = np.argmax(an, 0)
    return np.round(np.sum((an == y)/m), 4)

In [10]:
def model(X, Y, dimensions, alpha , activation , num_iterations):
    np.random.seed(1)
    costs = []                 
    w,b = param(dimensions)
    n = len(w)
    for i in range(0, num_iterations):
        an, a, z = forward_prop(X, w, b, activation)
        cost = compute_cost(an, Y)
        costs.append(cost)
        dw, db = backward_prop(an , Y, w, b, a, z, activation)
        w,b = update_parameters(w, b, dw, db, alpha)
        if (i % 100) == 0:
            print("iterations: ",i, "cost: ",np.round(cost, 4), "test accuracy: ", (predict(X_test, Y_test, w, b, activation))*100,"%")    
    t = np.arange(0, num_iterations)
    plt.plot(t, costs)
    plt.show()
    return w,b

In [None]:
n=int(input("enter the value of number of hidden layers: "))
dim=[0]*(n+2)
dim[0]=784
dim[n+1]=10
for i in range(n):
    dim[i+1]=int(input("enter the neurons in hidden layer "))
alpha = 0.035
num = 4001
w, b = model(X_train, Y_train, dim, alpha, 'relu', num)