In [27]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
import os
import time
import math
import re
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree

import warnings
warnings.filterwarnings('ignore')

In [4]:
def load_data(filename, values_dict = {}, train_data = False):
    df = np.asarray(pd.read_csv(filename, header=None, dtype=int))
    Y = df[:,-1]
    
    df = df[:,0:df.shape[1]-1]
    x = np.zeros((df.shape[0],85))
    y = np.zeros((df.shape[0],len(list(set(list(Y))))))
            
    if values_dict == {}:
        for i in range(df.shape[1]):
            length = len(list(set(list(df[:,i]))))
            values_dict[i] = length
    
    for j in range(df.shape[0]):
        ohe_encoded = []
        y[j][Y[j]] = 1
        for i in range(df.shape[1]):
            val = df[j][i]
            ohe_mat = np.zeros((values_dict[i]))
            ohe_mat[val-1] = 1
            ohe_encoded.extend(ohe_mat)
        ohe_encoded = np.asarray(ohe_encoded)
        x[j] = ohe_encoded

    if train_data == True:
        return x,y,values_dict
    else:
        return x,y

In [5]:
PART = 'a'
BASE_DIR = '../'

train_path = os.path.join(BASE_DIR, 'data', 'Poker_Hand_dataset', 'poker-hand-training-true.data')
test_path  = os.path.join(BASE_DIR, 'data', 'Poker_Hand_dataset', 'poker-hand-testing.data')

# if PART == 'a':

Xtrain, Ytrain, values_dict = load_data(train_path,train_data=True)
Xtest, Ytest = load_data(test_path, values_dict)

In [87]:
BATCH_SIZE = 100
NUMBER_OF_INPUTS = 85
HIDDEN_LAYERS_UNITS = [5]
NUMBER_OF_OUTPUTS = 10
LEARNING_RATE = 0.2
EPSILON = 0.001
tolerance = 0.001
EPOCHS = 100

In [101]:
class NN_architecture:
    def __init__(self, learning_rate, batch_size, num_of_inputs, hidden_layer_units, num_of_outputs):
        self.learning_rate = learning_rate
        self.init_learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_of_inputs = num_of_inputs
        self.hidden_layer_list = hidden_layer_units
        self.num_of_outputs = num_of_outputs
        self.learning_rate_threshold = 1e-5 # Min possible learning rate
    
    def sigmoid_activation(self,data):
        return (1.0/(1.0 + np.exp(-data)) )
    
    def relu_activation(self,data):
        return np.multiply(data>0,data)
    
    def sigmoid_der(self,data):
        # x = self.sigmoid_activation(data)
        x = data
        return np.multiply(x,1.0-x)
    
    def relu_der(self,data):
        temp = np.ones(data.shape,dtype=float)
        return np.multiply(temp>0,temp)
                    
    def initialize(self):
        neuron_count = [self.num_of_inputs]
        neuron_count.extend(self.hidden_layer_list)
        neuron_count.append(self.num_of_outputs)
        self.neuron_count = neuron_count
        params = {}
        np.random.seed(1)
        
        # xavier initialization
        for i in range(1, len(neuron_count)):
            params["W" + str(i)] = np.random.normal(0,1,(neuron_count[i],neuron_count[i-1]))*np.sqrt(2.0/neuron_count[i-1])
            params["b" + str(i)] = np.zeros((neuron_count[i],1),dtype=float)
        
        self.params = params
        self.num_of_layers = len(neuron_count)
        
        return
    
    def forward_propagation(self, X, activation_function):        
        forward_prop = {}
        data = (X.T).copy()
        forward_prop["a0"] = data # n*m

        for i in range(self.num_of_layers-2):
            data = np.dot(self.params["W"+str(i+1)], data) + self.params["b"+str(i+1)]

            if activation_function == "relu":
                data = self.relu_activation(data)
            elif activation_function == "sigmoid":
                data = self.sigmoid_activation(data)

            forward_prop["a"+str(i+1)] = data.copy()
        
        data = np.dot(self.params["W"+str(self.num_of_layers-1)], data) + self.params["b"+str(self.num_of_layers-1)]
        data = self.sigmoid_activation(data)
        forward_prop["a"+str(self.num_of_layers-1)] = data.copy()

        self.forward_prop = forward_prop
        return
    
    def backward_propagation(self, Y, activation_function):
        self.backward_prop = {}
        dataY = (Y.T).copy()
        
        self.backward_prop["dz"+str(self.num_of_layers-1)] = self.forward_prop["a" + str(self.num_of_layers-1)] - dataY
        
        for i in range(self.num_of_layers-2,0,-1):
            temp_mat = np.dot(self.params["W"+str(i+1)].T, self.backward_prop["dz"+str(i+1)])
            
            if activation_function == "sigmoid":
                temp_mat =np.multiply(temp_mat, self.sigmoid_der(self.forward_prop["a"+str(i)]))
            elif activation_function == "relu":
                temp_mat =np.multiply(temp_mat, self.relu_der(self.forward_prop["a"+str(i)]))
                
            self.backward_prop["dz"+str(i)] = temp_mat
        
        # i = self.num_of_layers-2
        # while i>=0:
        #     temp_mat = np.dot(self.params["W"+str(i+1)].T, backward_prop["dz" + str(i+1)])
            
        #     if activation_function == "sigmoid":
        #         temp_mat = np.multiply(temp_mat, self.sigmoid_der(self.forward_prop["a"+str(i)]))
        #     elif activation_function == "relu":
        #         temp_mat = np.multiply(temp_mat, self.relu_der(self.forward_prop["a"+str(i)]))
                
        #     backward_prop["dz" + str(i)] = temp_mat            
        #     i-=1
        # self.backward_prop = backward_prop
        # return
    
    def backward_propagation2(self, Y, activation_function):
        self.backward_prop = {}
        dataY = (Y.T).copy()
        
        temp_mat = np.multiply(dataY - self.forward_prop["a"+str(self.num_of_layers-1)], self.forward_prop["a"+str(self.num_of_layers-1)])
        temp_mat = np.multiply(temp_mat, (1-self.forward_prop["a"+str(self.num_of_layers-1)]) )
        
        self.backward_prop["dz"+str(self.num_of_layers-1)] = temp_mat
        
        for i in range(self.num_of_layers-2, 0, -1):
            if activation_function == "sigmoid":
                temp_mat = np.multiply(self.params["W"+str(i+1)].T @ self.backward_prop["dz"+str(i+1)], self.forward_prop["a"+str(i)] * (1-self.forward_prop["a"+str(i)]) )
            elif activation_function == "relu":
                temp_mat = temp_mat = np.multiply(self.params["W"+str(i+1)].T @ self.backward_prop["dz"+str(i+1)], np.multiply(self.forward_prop["a"+str(i)] > 0, self.forward_prop["a"+str(i)]) )
        
            self.backward_prop["dz"+str(i)] = temp_mat
        return
                    
    def update_params(self,M):
        new_params = {}
        for i in range(1,self.num_of_layers):
            new_params["W"+str(i)] = self.params["W"+str(i)] - (self.learning_rate/M)*np.dot(self.backward_prop["dz"+str(i)],(self.forward_prop["a"+str(i-1)]).T)
            
            temp = (self.learning_rate/M)*np.sum(self.backward_prop["dz"+str(i)],axis=1)
            temp = temp.reshape((temp.shape[0],1))
            
            new_params["b"+str(i)] = self.params["b"+str(i)] - temp
            
        self.params = new_params
        return
    
    def predict(self,X,activation_function="sigmoid"):
        data_x = (X.T).copy()
        for i in range(1,self.num_of_layers):
            data_x = np.add(np.dot(self.params["W"+str(i)],data_x),self.params["b"+str(i)])
            if activation_function == "sigmoid":
                data_x = self.sigmoid_activation(data_x)
                
            elif activation_function == "relu":
                data_x = self.relu_activation(data_x)    
        
        data_x = self.sigmoid_activation(data_x)
        data_x = data_x.T
        data_x = data_x/(np.sum(data_x,axis=1).reshape(data_x.shape[0],1))
                
        return data_x, np.argmax(data_x,axis=1)
    
    def loss_function(self,y1,y2):
        # print(y1,y2)
        y = np.abs(y1-y2)
        y = np.multiply(y,y)
        return np.sum(y)/(2*y.shape[0])
    
    def print_param(self, i):
        print("Iteration: {}".format(i))
        for i in self.params:
            print(i,np.max(self.params[i]), np.min(self.params[i]), self.params[i].shape)
        print()

        for i in self.backward_prop:
            print(i,np.max(self.backward_prop[i]), np.min(self.backward_prop[i]), self.backward_prop[i].shape)
        print()    

        for i in self.forward_prop:
            print(i,np.max(self.forward_prop[i]), np.min(self.forward_prop[i]), self.forward_prop[i].shape)
        print()
    
    def print_class_param(self):
        print("Batch Size: {}, Learning rate: {}, Num of layers: {}".format(self.batch_size, self.learning_rate, self.num_of_layers))
        print("Neuron count: {}".format(self.neuron_count))

    def batch_loss(self, y1, y2):
        
        # Here y2 is the actual Y
        # y1 is the predicted y
        l1 = np.log(np.multiply(1, y1==0) + y1)
        l1 = np.multiply(l1, y2)
        
        l2 = np.log(np.multiply(1, y1==1) + 1 - y1)
        l2 = np.multiply(l2, 1-y2)
        
        l = np.mean(-1.0*l1-1.0*l2,axis=1)
        l = np.sqrt(np.sum(np.multiply(l,l)))/(2.0*y1.shape[1])
        
        return l
        

    def run(self,epochs,epsilon,X,Y,activation_function,adaptive=False):
        
        self.examples = X.shape[0]
        self.batches = (int)(self.examples/self.batch_size)        
        self.initialize()
        # self.print_param(0)
        # self.print_class_param()
        
        iteration  = 1
        error = float("inf")        
        time_start = time.time()
        print("Training phase ... ")
        
        error_list = []
        
        while iteration <= epochs and error > epsilon and self.learning_rate > self.learning_rate_threshold:
            
            error = 0
            for batch in range(self.batches):
                start = batch*self.batch_size
                end   = min(start + self.batch_size,self.examples)

                X_new = X[start:end,:]
                Y_new = Y[start:end,:]

                self.forward_propagation(X_new,activation_function)

                # self.backward_propagation2(Y_new,activation_function)
                self.backward_propagation(Y_new,activation_function)

                self.update_params(Y_new.shape[0])

                loss_partial = self.batch_loss(self.forward_prop["a"+str(self.num_of_layers-1)], Y_new.T)
                error += (loss_partial)
            
            error_list.append(error)
            
            if iteration%10 == 0:
                print("Epoch: {}, Error: {}".format(iteration, error))            
            iteration += 1
            
            if adaptive:
                self.learning_rate = (self.init_learning_rate)/(np.sqrt(iteration))
        
        time_end = time.time()
        self.training_time = (time_end - time_start)
            

In [102]:
model = NN_architecture(LEARNING_RATE,BATCH_SIZE,NUMBER_OF_INPUTS,[5, 5],NUMBER_OF_OUTPUTS)
model.run(1000, EPSILON, Xtrain, Ytrain,'sigmoid',adaptive=False)

print("Training time: {}s".format(round(model.training_time,3)))

Training phase ... 
Epoch: 10, Error: 1.2530753030433384
Epoch: 20, Error: 1.253038962173449
Epoch: 30, Error: 1.253020327680691
Epoch: 40, Error: 1.253009128543071
Epoch: 50, Error: 1.253001628856403
Epoch: 60, Error: 1.2529962052201846
Epoch: 70, Error: 1.2529920460867472
Epoch: 80, Error: 1.2529887039923056
Epoch: 90, Error: 1.252985913980883
Epoch: 100, Error: 1.2529835104225455
Epoch: 110, Error: 1.252981385254278
Epoch: 120, Error: 1.252979465479124
Epoch: 130, Error: 1.2529777003405835
Epoch: 140, Error: 1.2529760536690588
Epoch: 150, Error: 1.2529744991371412
Epoch: 160, Error: 1.2529730172217388
Epoch: 170, Error: 1.2529715932043946
Epoch: 180, Error: 1.2529702158228901
Epoch: 190, Error: 1.2529688763426126
Epoch: 200, Error: 1.2529675679048597
Epoch: 210, Error: 1.2529662850618093
Epoch: 220, Error: 1.2529650234396164
Epoch: 230, Error: 1.252963779491003
Epoch: 240, Error: 1.2529625503112782
Epoch: 250, Error: 1.252961333499987
Epoch: 260, Error: 1.2529601270557456
Epoch: 270

In [103]:
y_class_train, y_pred_train = model.predict(Xtrain,'sigmoid')
error = model.loss_function(y_class_train, Ytrain)
print("Train Accuracy: {}%, Error: {}".format(round(100*accuracy_score(y_pred_train, np.argmax(Ytrain,axis=1)),3), error))

y_class_test, y_pred_test = model.predict(Xtest,'sigmoid')
error2 = model.loss_function(y_class_test, Ytest)
print("Test Accuracy: {}%, Error: {}".format(round(100*accuracy_score(y_pred_test, np.argmax(Ytest,axis=1)),3), error2))

y_conf = np.argmax(Ytest, axis=1)
confusion_matrix = np.zeros((10,10))
for i in range(Ytest.shape[0]):
    confusion_matrix[y_conf[i]][y_pred_test[i]] += 1

confusion_matrix = confusion_matrix.astype(int)

print(confusion_matrix)

#  5  10  15  20  25
# 51, 55, 55  55  50

Train Accuracy: 49.952%, Error: 0.4348318820233563
Test Accuracy: 50.121%, Error: 0.43482004821381637
[[501209      0      0      0      0      0      0      0      0      0]
 [422498      0      0      0      0      0      0      0      0      0]
 [ 47622      0      0      0      0      0      0      0      0      0]
 [ 21121      0      0      0      0      0      0      0      0      0]
 [  3885      0      0      0      0      0      0      0      0      0]
 [  1996      0      0      0      0      0      0      0      0      0]
 [  1424      0      0      0      0      0      0      0      0      0]
 [   230      0      0      0      0      0      0      0      0      0]
 [    12      0      0      0      0      0      0      0      0      0]
 [     3      0      0      0      0      0      0      0      0      0]]


In [23]:
for i in model.params:
    print(i,np.max(model.params[i]), np.min(model.params[i]), model.params[i].shape)
print()

for i in model.backward_prop:
    print(i,np.max(model.backward_prop[i]), np.min(model.backward_prop[i]), model.backward_prop[i].shape)
print()    

for i in model.forward_prop:
    print(i,np.max(model.forward_prop[i]), np.min(model.forward_prop[i]), model.forward_prop[i].shape)
print()


W1 0.6159433781072531 -0.5049561528689354 (100, 85)
b1 0.03445102346635879 -0.026007391987375665 (100, 1)
W2 0.6012644202711865 -0.5026017293875579 (100, 100)
b2 0.06744211180635681 -0.046595278571981626 (100, 1)
W3 0.5863420821983738 -0.27864917905740544 (10, 100)
b3 0.22454948086129342 0.16349251670983755 (10, 1)

dz3 5.835407169399111e-08 -0.0002621945302480613 (10, 100)
dz2 3.528285792021057e-05 -9.947474680130935e-05 (100, 100)
dz1 3.4465562330752285e-05 -8.680280428545231e-05 (100, 100)

a0 1.0 0.0 (85, 100)
a1 0.8642525617767342 0.13003704924674026 (100, 100)
a2 0.958915189777472 0.17342259559126652 (100, 100)
a3 0.9998281509721083 0.9997376678514929 (10, 100)



1 (5, 25010)
2 (10, 25010)
0.3425029988004798 0.8999999765695527
1 (5, 1000000)
2 (10, 1000000)
0.340879 0.8999999768439552


In [108]:
print(y_class_train[1], Ytrain[1])

[0.09774662 0.05943147 0.13015246 0.06003993 0.06181143 0.06969242
 0.14298412 0.12182448 0.13628063 0.12003645] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [99]:
model2 = NN_architecture(LEARNING_RATE,BATCH_SIZE,NUMBER_OF_INPUTS,[5, 5],NUMBER_OF_OUTPUTS)
model2.run(1000, EPSILON, Xtrain, Ytrain,'relu')
print("Training time: {}s".format(round(model2.training_time,3)))

Training phase ... 
Epoch: 10, Error: 1.3209447245451744
Epoch: 20, Error: 1.309657894758027
Epoch: 30, Error: 1.2698166477909654
Epoch: 40, Error: 1.3050447677498611
Epoch: 50, Error: 3.726864440979116
Epoch: 60, Error: 1.2916355433426845
Epoch: 70, Error: 1.333688563643145
Epoch: 80, Error: 1.268991924825986
Epoch: 90, Error: 1.5195353628179034
Epoch: 100, Error: nan
Training time: 13.821s


In [100]:
y_class_train, y_pred_train = model2.predict(Xtrain,'relu')
error = model2.loss_function(y_class_train, Ytrain)
print("Train Accuracy: {}%, Error: {}".format(round(100*accuracy_score(y_pred_train, np.argmax(Ytrain,axis=1)),3), error))

y_class_test, y_pred_test = model2.predict(Xtest,'relu')
error2 = model2.loss_function(y_class_test, Ytest)
print("Test Accuracy: {}%, Error: {}".format(round(100*accuracy_score(y_pred_test, np.argmax(Ytest,axis=1)),3), error2))

y_conf = np.argmax(Ytest, axis=1)
confusion_matrix = np.zeros((10,10))
for i in range(Ytest.shape[0]):
    confusion_matrix[y_conf[i]][y_pred_test[i]] += 1

confusion_matrix = confusion_matrix.astype(int)

print(confusion_matrix)

#  5  10  15  20  25
# 51, 55, 55  55  50

Train Accuracy: 49.952%, Error: nan
Test Accuracy: 50.121%, Error: nan
[[501209      0      0      0      0      0      0      0      0      0]
 [422498      0      0      0      0      0      0      0      0      0]
 [ 47622      0      0      0      0      0      0      0      0      0]
 [ 21121      0      0      0      0      0      0      0      0      0]
 [  3885      0      0      0      0      0      0      0      0      0]
 [  1996      0      0      0      0      0      0      0      0      0]
 [  1424      0      0      0      0      0      0      0      0      0]
 [   230      0      0      0      0      0      0      0      0      0]
 [    12      0      0      0      0      0      0      0      0      0]
 [     3      0      0      0      0      0      0      0      0      0]]


In [42]:
for i in model2.backward_prop:
    print(i,np.max(model2.backward_prop[i]), np.min(model2.backward_prop[i]), model2.backward_prop[i].shape)
print()

for i in model2.forward_prop:
    print(i,np.max(model2.forward_prop[i]), np.min(model2.forward_prop[i]), model2.forward_prop[i].shape)
print()

for i in model2.params:
    print(i,np.max(model2.params[i]), np.min(model2.params[i]), model2.params[i].shape)

dz4 1.0 -1.0 (10, 100)
dz3 6.435164271970555e+42 -4.404224181042292 (10, 100)
dz2 8.815363775156895e+84 -3.502298235246524e+42 (50, 100)
dz1 1.8566504146402347e+127 -1.0062145051098991e+85 (70, 100)
dz0 2.961153975535918e+169 4.476963190857688e+167 (85, 100)

a0 1.0 0.0 (85, 100)
z1 1.7200192885385603e+42 -1.4085656221727102e+28 (70, 100)
a1 1.7200192885385603e+42 -0.0 (70, 100)
z2 5.719136015978563e+84 -3.3449375096514033e+70 (50, 100)
a2 5.719136015978563e+84 -0.0 (50, 100)
z3 3.0859791960474724e+127 -2.210107472982474e+113 (10, 100)
a3 3.0859791960474724e+127 -0.0 (10, 100)
z4 2.9184023788594952e+169 -3.2059813725977796e+168 (10, 100)
a4 1.0 0.0 (10, 100)

W1 5.187071734070013e+125 -2.811141384889173e+83 (70, 85)
b1 1.6732545734748368e+126 -9.068228511386736e+83 (70, 1)
W2 1.235903635889553e+126 -4.910180944669756e+83 (50, 70)
b2 7.944601545511659e+83 -3.1563489247030058e+41 (50, 1)
W3 2.9998608977374687e+126 -1.698756360041152e+84 (10, 50)
b3 5.799512910039699e+41 -2.27249621385059

In [None]:
print(model.forward_prop["a1"].shape)

In [None]:
print(model.neuron_count)

In [None]:
model.params["W2"].shape

In [None]:
print(model.params["b1"])

In [None]:
print(model.params["b2"])

In [None]:
print(model.params["W1"])

In [None]:
print(model.params["W2"])

In [60]:
for i in range(10,0,-1):
    print(i)

10
9
8
7
6
5
4
3
2
1
