In [None]:
import pandas as pd
import numpy as np
import sklearn
df = pd.read_csv ('train_data.csv')

In [None]:
#split the output class label
df['class1'] = (df[' class'] == 0)
df.class1 = df.class1.astype(int)
X = df[[' x1',' x2']].values
y = df[[' class','class1']].values

In [None]:
def softmax(z):
    exp_scores = np.exp(z)
    return (exp_scores / np.sum(exp_scores))

def dsoftmax(z):
    exp_scores = np.exp(z)
    s = (exp_scores / np.sum(exp_scores))
    return (s * (1-s))

def loss(y,y_hat):
    loss = np.sum((y-y_hat)**2)
    return loss

def loss_derivative(y,y_hat):
    return (y_hat-y)

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def dSigmoid(Z):
    s = 1/(1+np.exp(-Z))
    return (s * (1-s))

# Forward pass function
def forward_prop(model,a0):  
    # Load parameters from model
    W1, W2, W3 = model['W1'], model['W2'], model['W3']
    z1 = a0.dot(W1)
    a1 = sigmoid(z1)
    z2 = a1.dot(W2)
    a2 = sigmoid(z2)
    z3 = a2.dot(W3)
    a3 = softmax(z3)
    #Store all results in cache
    cache = {'a0':a0,'z1':z1,'a1':a1,'z2':z2,'a2':a2,'a3':a3,'z3':z3}
    return cache

# Backpropagation
def backward_prop(model,cache,y):

    W1, W2, W3 = model['W1'], model['W2'], model['W3']
    
    a0,a1, a2,a3 = cache['a0'],cache['a1'],cache['a2'],cache['a3']

    dz3 = np.multiply(loss_derivative(y=y,y_hat=a3),dsoftmax(a3))

    dW3 = (a2.T).dot(dz3) 

    dz2 = np.multiply(dz3.dot(W3.T) ,dSigmoid(a2))

    dW2 = np.dot(a1.T, dz2)
    
    dz1 = np.multiply(dz2.dot(W2.T),dSigmoid(a1))

    dW1 = np.dot(a0.T,dz1)
    
    # Store gradients
    grads = {'dW3':dW3, 'dW2':dW2, 'dW1':dW1}
    return grads

# Training
def initialize_parameters(nn_input_dim,nn_hdim,nn_output_dim):

    W1 = 2 *np.random.randn(nn_input_dim, nn_hdim) - 1

    W2 = 2 * np.random.randn(nn_hdim, nn_hdim) - 1
    
    W3 = 2 * np.random.rand(nn_hdim, nn_output_dim) - 1
    
    model = { 'W1': W1, 'W2': W2,'W3':W3}
    return model

def update_parameters(model,grads,learning_rate):
    # Load parameters
    W1, W2, W3 = model['W1'], model['W2'], model["W3"]
    
    # Update parameters
    W1 -= learning_rate * grads['dW1']
    W2 -= learning_rate * grads['dW2']
    W3 -= learning_rate * grads['dW3']
    
    # Store and return parameters
    model = { 'W1': W1, 'W2': W2, 'W3':W3}
    return model

def predict(model, x):
    # Do forward pass
    c = forward_prop(model,x)
    #get y_hat
    y_hat = np.argmax(c['a3'], axis=1)
    return y_hat

losses = []
def train(model,X_,y_,learning_rate, epochs=50, print_loss=False):
    # Stochastic Gradient descent. Loop over epochs
    for i in range(0, epochs):
        # Update weights after every sample
        for k in range(0,999):
                
            # Forward propagation
            Xsample = np.reshape(X_[k],(1,2))            
            cache = forward_prop(model,Xsample)

            # Backpropagation            
            Ysample = np.reshape(y_[k],(1,2))
            grads = backward_prop(model,cache,Ysample)
            
            # SGD parameter update
            model = update_parameters(model=model,grads=grads,learning_rate=learning_rate) 
            
            # Pring loss & accuracy every 100 iterations
            if k == 998:
                    a3 = cache['a3']
                    print('Loss after epoch',i,':',loss(Ysample,a3))
                    
    return model

In [None]:
np.random.seed(0)
# This is what we return at the end
model = initialize_parameters(nn_input_dim=2, nn_hdim= 3, nn_output_dim= 2)
model = train(model,X,y,learning_rate=0.01,epochs=50,print_loss=True)
print(model)

Loss after epoch 0 : 0.5228850135421004
Loss after epoch 1 : 0.4251828402169788
Loss after epoch 2 : 0.37310619409685475
Loss after epoch 3 : 0.326079091975328
Loss after epoch 4 : 0.27311489917870213
Loss after epoch 5 : 0.21554998415882592
Loss after epoch 6 : 0.16066229889834419
Loss after epoch 7 : 0.11517640146437469
Loss after epoch 8 : 0.08121017924353074
Loss after epoch 9 : 0.0572482634838678
Loss after epoch 10 : 0.04082747060848101
Loss after epoch 11 : 0.029702337840705043
Loss after epoch 12 : 0.02209843233694466
Loss after epoch 13 : 0.016805604748765933
Loss after epoch 14 : 0.01304632034364529
Loss after epoch 15 : 0.010322110772377345
Loss after epoch 16 : 0.008308568127657748
Loss after epoch 17 : 0.006791749237836165
Loss after epoch 18 : 0.005628642397660649
Loss after epoch 19 : 0.004722105094944911
Loss after epoch 20 : 0.004004977666636773
Loss after epoch 21 : 0.003429991690826443
Loss after epoch 22 : 0.002963301678659768
Loss after epoch 23 : 0.002580281042841

In [None]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
df_test = pd.read_csv (r'/Users/arzoo/Desktop/SEM1/Intro to DL/test_data.csv')
X_test = df_test[[' x1',' x2']].values
df_test['class1'] = (df_test[' class'] == 0)
df_test.class1 = df_test.class1.astype(int)
y_test = df[[' class','class1']].values
y_hat = predict(model,X_test)
y_true = y_test.argmax(axis=1)
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
print("precision: ", precision_score(y_true, y_hat , average="macro"))
print("recall: ", recall_score(y_true, y_hat , average="macro"))
print("f1 score: ", f1_score(y_true, y_hat , average="macro"))

precision:  0.999
recall:  0.999
f1 score:  0.998998998998999
