## Libraries

In [29]:
import numpy as np
import matplotlib.pyplot as plt
import random

## Loading Data

#### Multiclass

In [30]:
# xTrain = np.loadtxt('train_X.csv', delimiter = ',').T
# yTrain = np.loadtxt('train_label.csv', delimiter = ',').T
# xTest = np.loadtxt('test_X.csv', delimiter = ',').T
# yTest = np.loadtxt('test_label.csv', delimiter = ',').T

# print("shape of xTrain :", xTrain.shape)
# print("shape of yTrain :", yTrain.shape)
# print("shape of xTest :", xTest.shape)
# print("shape of yTest :", yTest.shape)

# idx = random.randrange(0, xTrain.shape[1])
# plt.figure(figsize=(1.5, 1.5))
# plt.imshow(X_train[:, idx].reshape(28, 28), cmap = 'gray')
# plt.show()

#### Binary

In [31]:
# xTrain = np.loadtxt('cat_train_x.csv', delimiter = ',')/255.0
# yTrain = np.loadtxt('cat_train_y.csv', delimiter = ',').reshape(1, xTrain.shape[1])
# xTest = np.loadtxt('cat_test_x.csv', delimiter = ',')/255.0
# yTest = np.loadtxt('cat_test_y.csv', delimiter = ',').reshape(1, xTest.shape[1])

# print("shape of xTrain :", xTrain.shape)
# print("shape of yTrain :", yTrain.shape)
# print("shape of xTest :", xTest.shape)
# print("shape of yTest :", yTest.shape)

# index = random.randrange(0, xTrain.shape[1])
# plt.figure(figsize=(2, 2))
# plt.imshow(xTrain[:, index].reshape(64,64, 3))
# plt.show()

In [32]:
marks = np.random.normal(50, 15, size=(10, 100000))
marks = np.clip(marks, 5, 95)
result = np.random.randint(2, size=(1, 100000))

xTrain, xTest = marks[:, :80000], marks[:, 80000:]
yTrain, yTest = result[:, :80000], result[:, 80000:]

print("shape of xTrain :", xTrain.shape)
print("shape of yTrain :", yTrain.shape)
print("shape of xTest :", xTest.shape)
print("shape of yTest :", yTest.shape)

shape of xTrain : (10, 80000)
shape of yTrain : (1, 80000)
shape of xTest : (10, 20000)
shape of yTest : (1, 20000)


## Activation Functions

In [33]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    expX = np.exp(x)
    return expX / np.sum(expX, 0)

def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(x, 0)

In [34]:
def derivative_tanh(x):
    return (1 - np.power(np.tanh(x), 2))

def derivative_relu(x):
    return np.array(x > 0, dtype = np.float32)

## Initialize Parameters

In [35]:
def initParameters(ldims):
    params = {}
    L = len(ldims)

    for l in range(1, L):
        # params[f'w{l}'] = np.random.randn(ldims[l], ldims[l-1]) * 0.01
        params[f'w{l}'] = np.random.randn(ldims[l], ldims[l-1]) / np.sqrt(ldims[l-1])
       
        params[f'b{l}'] = np.zeros((ldims[l], 1))

    return params

## Forward Propagation

In [36]:
def forwardPropagation(X, params, aFun):
    forwardCache = {}
    L = len(params) // 2

    forwardCache['a0'] = X
    for l in range(1, L):
        forwardCache[f'z{l}'] = np.dot(params[f'w{l}'], forwardCache[f'a{l-1}']) + params[f'b{l}']
        
        if aFun == 'tanh':
            forwardCache[f'a{l}'] = tanh(forwardCache[f'z{l}'])
        else:
            forwardCache[f'a{l}'] = relu(forwardCache[f'z{l}'])

    forwardCache[f'z{L}'] = np.dot(params[f'w{L}'], forwardCache[f'a{L-1}']) + params[f'b{L}']
    
    if forwardCache[f'z{L}'].shape[0] == 1:
        forwardCache[f'a{L}'] = sigmoid(forwardCache[f'z{L}'])
    else :
        forwardCache[f'a{L}'] = softmax(forwardCache[f'z{L}'])
    
    return forwardCache[f'a{L}'], forwardCache

## Backpropagation

In [37]:
def backPropagation(aL, y, params, forwardCache, aFun):
    grads = {}
    L = len(params) // 2
    m = aL.shape[1]

    grads[f'dz{L}'] = aL - y
    grads[f'dw{L}'] = 1./m * np.dot(grads[f'dz{L}'], forwardCache[f'a{L-1}'].T)
    grads[f'db{L}'] = 1./m * np.sum(grads[f'dz{L}'], axis=1, keepdims=True)

    for l in reversed(range(1, L)):
        if aFun == 'tanh':
            # grads[f'dz{l}'] = 1./m * np.dot(params[f'w{l+1}'].T, grads[f'dz{l+1}']) * derivative_tanh(forwardCache[f'a{l}'])
            grads[f'dz{l}'] = np.dot(params[f'w{l+1}'].T, grads[f'dz{l+1}']) * derivative_tanh(forwardCache[f'a{l}'])
        else:
            # grads[f'dz{l}'] = 1./m * np.dot(params[f'w{l+1}'].T, grads[f'dz{l+1}']) * derivative_relu(forwardCache[f'a{l}'])
            grads[f'dz{l}'] = np.dot(params[f'w{l+1}'].T, grads[f'dz{l+1}']) * derivative_relu(forwardCache[f'a{l}'])
        
        grads[f'dw{l}'] = 1./m * np.dot(grads[f'dz{l}'], forwardCache[f'a{l-1}'].T)
        grads[f'db{l}'] = 1./m * np.sum(grads[f'dz{l}'], axis=1, keepdims=True)

    return grads

## Update Parameters

In [38]:
def updateParams(params, grads, eta):
    L = len(params) // 2
    for l in range(L):
        params[f'w{l+1}'] = params[f'w{l+1}'] - eta * grads[f'dw{l+1}']
        params[f'b{l+1}'] = params[f'b{l+1}'] - eta * grads[f'db{l+1}']

    return params

## Cost Function, Accuracy, Prediction

In [39]:
def costFun(aL, y):
    m = y.shape[1]
    if y.shape[0] == 1:
        c = (1./m) * (-np.dot(y, np.log(aL).T) - np.dot(1-y, np.log(1-aL).T))
    else:
        c = -(1./m) * np.sum(y * np.log(aL))
    return np.squeeze(c)

In [40]:
def accuracy(aL, y):
    m = y.shape[1]
    if y.shape[0] == 1:
        aL = np.array(aL > 0.5, dtype='float')
    else:
        aL = np.argmax(aL, 0)  # 0 represents row wise 
        y = np.argmax(y, 0)
        
    return np.round(np.sum((aL == y)/m), 2) * 100

In [41]:
def predict(X, params, aFun):
    yPred, _ = forwardPropagation(X, params, aFun)
    return yPred

## A Model

In [42]:
def model(X, y, ldims, eta, epochs, aFun):

    np.random.seed(1)
    costs = []
    accs = []
    
    params = initParameters(ldims)
    
    for i in range(epochs):
        
        aL, forwardCache = forwardPropagation(X, params, aFun)
        
        cost = costFun(aL, y)
        acc = accuracy(aL, y)
        
        grads = backPropagation(aL, y, params, forwardCache, aFun)
        
        params = updateParams(params, grads, eta)
        
        costs.append(cost)
        accs.append(acc)
        
        if(i%(epochs/10) == 0):
            print("after", i, "epochs :\tCost = ", cost, "\tAccuracy = ", acc)
        
    return params, costs, accs

### running

In [None]:
# eta = 0.02
# epochs = 100
# ldims = [xTrain.shape[0], 1000, yTrain.shape[0]]
# params, costList, accuracyList = model(xTrain, yTrain, ldims, eta, epochs, 'tanh')

# eta = 0.001
# epochs = 1000
# ldims = [xTrain.shape[0], 20, 7, 5, yTrain.shape[0]]
# params, costList, accuracyList = model(xTrain, yTrain, ldims, eta, epochs, 'relu')

eta = 0.09
epochs = 100
ldims = [xTrain.shape[0],  yTrain.shape[0]]
params, costList, accuracyList = model(xTrain, yTrain, ldims, eta, epochs, 'relu')

In [None]:
t = np.arange(0, epochs)
plt.figure(figsize=(15,5))
plt.subplot(1, 2, 1)
plt.plot(t, costList)
plt.subplot(1, 2, 2)
plt.plot(t, accuracyList)
plt.show()

In [None]:
print("Accuracy of Train Dataset", accuracy(predict(xTrain, params, 'tanh'), yTrain), "%")
print("Accuracy of Test Dataset", accuracy(predict(xTest, params, 'tanh'), yTest), "%")

In [None]:
# idx = int(random.randrange(0,xTest.shape[1]))
# plt.figure(figsize=(1.5, 1.5))
# plt.imshow(xTest[:, idx].reshape((28,28)),cmap='gray')
# plt.show()

# yPred = predict(xTest[:, idx].reshape(xTest[:, idx].shape[0], 1), params, 'tanh')
# yPred = np.argmax(yPred, 0)
# print("Our model says it is :", yPred[0])

In [None]:
# idx = int(random.randrange(0, xTest.shape[1]))
# plt.figure(figsize=(2, 2))
# plt.imshow(xTest[:, idx].reshape(64,64, 3))
# plt.show()

# yPred = predict(xTest[:, idx].reshape(xTest[:, idx].shape[0], 1), params, 'tanh')
# yPred = 1 * np.array(yPred > 0.5)
# print("Our model says it is :", np.squeeze(yPred))