In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
import os
import math
import re
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree

import warnings
warnings.filterwarnings('ignore')

In [2]:
def load_data(filename, values_dict = {}, train_data = False):
    df = np.asarray(pd.read_csv(filename, header=None, dtype=int))
    Y = df[:,-1]
    
    df = df[:,0:df.shape[1]-1]
    x = np.zeros((df.shape[0],85))
    y = np.zeros((df.shape[0],len(list(set(list(Y))))))
            
    if values_dict == {}:
        for i in range(df.shape[1]):
            length = len(list(set(list(df[:,i]))))
            values_dict[i] = length
    
    for j in range(df.shape[0]):
        ohe_encoded = []
        y[j][Y[j]] = 1
        for i in range(df.shape[1]):
            val = df[j][i]
            ohe_mat = np.zeros((values_dict[i]))
            ohe_mat[val-1] = 1
            ohe_encoded.extend(ohe_mat)
        ohe_encoded = np.asarray(ohe_encoded)
        x[j] = ohe_encoded

    if train_data == True:
        return x,y,values_dict
    else:
        return x,y

In [3]:
PART = 'a'
BASE_DIR = '../'

train_path = os.path.join(BASE_DIR, 'data', 'Poker_Hand_dataset', 'poker-hand-training-true.data')
test_path  = os.path.join(BASE_DIR, 'data', 'Poker_Hand_dataset', 'poker-hand-testing.data')

# if PART == 'a':

Xtrain, Ytrain, values_dict = load_data(train_path,train_data=True)
Xtest, Ytest = load_data(test_path, values_dict)

In [150]:
BATCH_SIZE = 1000
NUMBER_OF_INPUTS = 85
HIDDEN_LAYERS_UNITS = [70, 50, 10]
NUMBER_OF_OUTPUTS = 10
LEARNING_RATE = 1
EPSILON = 0.001
EPOCHS = 100

In [151]:
class NN_architecture:
    def __init__(self, learning_rate, batch_size, num_of_inputs, hidden_layer_units, num_of_outputs):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_of_inputs = num_of_inputs
        self.hidden_layer_list = hidden_layer_units
        self.num_of_outputs = num_of_outputs
    
    def sigmoid_activation(self,data):
        return (1.0/(1.0 + np.exp(-data)) )
    
    def relu_activation(self,data):
        return np.multiply(data>0,data)
    
    def sigmoid_der(self,data):
        x = self.sigmoid_activation(data)
        return np.multiply(x,1.0-x)
    
    def relu_der(self,data):
        temp = np.ones(data.shape,dtype=float)
        return np.multiply(temp>0,temp)
                    
    def initialize(self):
        neuron_count = [self.num_of_inputs]
        neuron_count.extend(self.hidden_layer_list)
        neuron_count.append(self.num_of_outputs)
        self.neuron_count = neuron_count
        params = {}
        np.random.seed(1)
        
        # xavier initialization
        for i in range(1, len(neuron_count)):
            params["W" + str(i)] = np.random.normal(0,1,(neuron_count[i],neuron_count[i-1]))*np.sqrt(2.0/neuron_count[i-1])
            params["b" + str(i)] = np.zeros((neuron_count[i],1),dtype=float)
        
        self.params = params
        self.num_of_layers = len(neuron_count)        
        return
    
    def forward_propagation(self, X, activation_function):        
        forward_prop = {}
        data = (X.T).copy()
        forward_prop["a0"] = data # n*m

        for i in range(1,self.num_of_layers-1):
            data = np.add(np.dot(self.params["W"+str(i)], data),self.params["b"+str(i)])
            # forward_prop["z"+str(i)] = data

            if activation_function == "relu":
                data = self.relu_activation(data)
            elif activation_function == "sigmoid":
                data = self.sigmoid_activation(data)

            forward_prop["a"+str(i)] = data.copy()
        
        data = np.add(np.dot(self.params["W"+str(self.num_of_layers-1)], data), self.params["b"+str(self.num_of_layers-1)])
        # forward_prop["z"+str(self.num_of_layers-1)] = data
        data = self.sigmoid_activation(data)
        forward_prop["a"+str(self.num_of_layers-1)] = data.copy()

        self.forward_prop = forward_prop
        return
    
    def backward_propagation(self, Y, activation_function):
        backward_prop = {}
        dataY = Y.reshape((self.num_of_outputs,Y.shape[0]))
        backward_prop["dz"+str(self.num_of_layers-1)] = self.forward_prop["a" + str(self.num_of_layers-1)] - dataY
        
        i = self.num_of_layers-2
        while i>=0:
            temp_mat = np.dot(self.params["W"+str(i+1)].T, backward_prop["dz" + str(i+1)])
            
            if activation_function == "sigmoid":
                temp_mat = np.multiply(temp_mat, self.sigmoid_der(self.forward_prop["a"+str(i)]))
            elif activation_function == "relu":
                temp_mat = np.multiply(temp_mat, self.relu_der(self.forward_prop["a"+str(i)]))
                
            backward_prop["dz" + str(i)] = temp_mat            
            i-=1
        self.backward_prop = backward_prop
        return
    
    def backward_propagation2(self, Y, activation_function):
        self.backward_prop = {}
        dataY = Y.reshape((self.num_of_outputs,Y.shape[0]))
        
        temp_mat = np.multiply(dataY - self.forward_prop["a"+str(self.num_of_layers-1)], self.forward_prop["a"+str(self.num_of_layers-1)])
        temp_mat = np.multiply(temp_mat, (1-self.forward_prop["a"+str(self.num_of_layers-1)]) )
        self.backward_prop["dz"+str(self.num_of_layers-1)] = temp_mat
        
        for i in range(self.num_of_layers-2, 0, -1):
            if activation_function == "sigmoid":
                temp_mat = np.multiply(self.params["W"+str(i+1)].T @ self.backward_prop["dz"+str(i+1)], self.forward_prop["a"+str(i)] * (1-self.forward_prop["a"+str(i)]) )
            elif activation_function == "relu":
                temp_mat = temp_mat = np.multiply(self.params["W"+str(i+1)].T @ self.backward_prop["dz"+str(i+1)], np.multiply(self.forward_prop["a"+str(i)] > 0, self.forward_prop["a"+str(i)]) )
        
            self.backward_prop["dz"+str(i)] = temp_mat
        return
                    
    def update_params(self,Y):
        for i in range(1,self.num_of_layers):
            # self.params["W"+str(i)] = self.params["W"+str(i)] + (self.learning_rate/Y.shape[0])*np.dot(self.backward_prop["dz"+str(i)],(self.forward_prop["a"+str(i-1)]).T)
            # self.params["b"+str(i)] = self.params["b"+str(i)] + ( (self.learning_rate/Y.shape[0])*np.sum(self.backward_prop["dz"+str(i)],axis=1) ).reshape((self.params["b"+str(i)].shape[0],1))
            self.params["W"+str(i)] = self.params["W"+str(i)] - (self.learning_rate/1)*np.dot(self.backward_prop["dz"+str(i)],(self.forward_prop["a"+str(i-1)]).T)
            self.params["b"+str(i)] = self.params["b"+str(i)] - ( (self.learning_rate/1)*np.sum(self.backward_prop["dz"+str(i)],axis=1) ).reshape((self.params["b"+str(i)].shape[0],1))
        return
    
    def predict(self,X,activation_function="sigmoid"):
        data_x = (X.T).copy()
        for i in range(1,self.num_of_layers-1):
            data_x = np.add(np.dot(self.params["W"+str(i)],data_x),self.params["b"+str(i)])
            if activation_function == "sigmoid":
                data_x = self.sigmoid_activation(data_x)
                
            elif activation_function == "relu":
                data_x = self.relu_activation(data_x)
        
        if activation_function == "sigmoid":
            data_x = np.exp(data_x)

        data_x = data_x.T
        data_x = data_x/(np.sum(data_x,axis=1).reshape(data_x.shape[0],1))
                
        return data_x, np.argmax(data_x,axis=1)
    
    def loss_function(self,y1,y2):
        # print(y1,y2)
        y = np.abs(y1-y2)
        # y = np.multiply(y,y)
        return np.sum(y)/(2*y.shape[0])
    
    def print_param(self, i):
        print("Iteration: {}".format(i))
        for i in self.params:
            print(i,np.max(self.params[i]), np.min(self.params[i]), self.params[i].shape)
        print()

        for i in self.backward_prop:
            print(i,np.max(self.backward_prop[i]), np.min(self.backward_prop[i]), self.backward_prop[i].shape)
        print()    

        for i in self.forward_prop:
            print(i,np.max(self.forward_prop[i]), np.min(self.forward_prop[i]), self.forward_prop[i].shape)
        print()
            
    def run(self,epochs,epsilon,X,Y,activation_function):
        self.initialize()
        iteration = 1
        error = float("inf")
        start = 0
        end   = self.batch_size
        
        # self.print_param(0)
        
        while iteration <= epochs and error > epsilon:
            X_new = X[start:end,:]
            Y_new = Y[start:end]
            
            self.forward_propagation(X_new,activation_function)
            self.backward_propagation2(Y_new,activation_function)
            
            # if iteration == 1:
            #     self.print_param(0)
            
            self.update_params(Y_new)
            y_class, y_pred = self.predict(X_new,activation_function)
                        
            error = self.loss_function(y_class,Y_new)
            
            if iteration%100 == 0:
                print("Iteration: {}, Error:{}".format(iteration,error))
            
            # self.print_param(iteration)
            iteration += 1            
            
            if end == Y.shape[0]:
                start = 0
                end = self.batch_size
            else:            
                start = end
                end += self.batch_size
                end = min(Y.shape[0],end)
            

In [152]:
model = NN_architecture(LEARNING_RATE,BATCH_SIZE,NUMBER_OF_INPUTS,HIDDEN_LAYERS_UNITS,NUMBER_OF_OUTPUTS)
model.run(1000, EPSILON, Xtrain, Ytrain,'sigmoid')

Iteration: 100, Error:0.8782367340603764
Iteration: 200, Error:0.8783147385977945
Iteration: 300, Error:0.8776907022984505
Iteration: 400, Error:0.8783147385977945
Iteration: 500, Error:0.8779247159107044
Iteration: 600, Error:0.8778467113732865
Iteration: 700, Error:0.8781587295229586
Iteration: 800, Error:0.8786267567474665
Iteration: 900, Error:0.8787047612848845
Iteration: 1000, Error:0.8783927431352125


In [153]:
for i in model.params:
    print(i,np.max(model.params[i]), np.min(model.params[i]), model.params[i].shape)
print()

for i in model.backward_prop:
    print(i,np.max(model.backward_prop[i]), np.min(model.backward_prop[i]), model.backward_prop[i].shape)
print()    

for i in model.forward_prop:
    print(i,np.max(model.forward_prop[i]), np.min(model.forward_prop[i]), model.forward_prop[i].shape)
print()


W1 1.0181244117223098 -0.8755942042176866 (70, 85)
b1 2.156544921861159 -1.7955063232058568 (70, 1)
W2 3.6452028710623905 -3.9473637039877087 (50, 70)
b2 5.753545185251151 -6.244521022220592 (50, 1)
W3 28.255086660428756 -20.20794206594833 (10, 50)
b3 34.79317130766732 -24.507497069388393 (10, 1)
W4 69.05412078300968 16.566654554098307 (10, 10)
b4 125.77147099722076 57.372978141916626 (10, 1)

dz4 -0.0 -0.0 (10, 1000)
dz3 0.0 0.0 (10, 1000)
dz2 0.0 0.0 (50, 1000)
dz1 0.0 0.0 (70, 1000)

a0 1.0 0.0 (85, 1000)
a1 0.9993621249042929 0.0024393745272811365 (70, 1000)
a2 1.0 1.6637267171218848e-54 (50, 1000)
a3 1.0 4.326524138853294e-153 (10, 1000)
a4 1.0 1.0 (10, 1000)



In [154]:
y_class_train, y_pred_train = model.predict(Xtrain,'sigmoid')
error = model.loss_function(y_class_train, Ytrain)
print(accuracy_score(y_pred_train, np.argmax(Ytrain,axis=1)), error)

y_class_test, y_pred_test = model.predict(Xtest,'sigmoid')
error2 = model.loss_function(y_class_test, Ytest)
print(accuracy_score(y_pred_test, np.argmax(Ytest,axis=1)), error2)

0.4995201919232307 0.8783826689786407
0.501209 0.8784028057205402


In [108]:
print(y_class_train[1], Ytrain[1])

[0.09774662 0.05943147 0.13015246 0.06003993 0.06181143 0.06969242
 0.14298412 0.12182448 0.13628063 0.12003645] [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]


In [101]:
model2 = NN_architecture(LEARNING_RATE,BATCH_SIZE,NUMBER_OF_INPUTS,HIDDEN_LAYERS_UNITS,NUMBER_OF_OUTPUTS)
model2.run(2000, EPSILON, Xtrain, Ytrain,'relu')

HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1
HEy1


KeyboardInterrupt: 

In [99]:
_, y_pred_train = model2.predict(Xtrain,'relu')
print(accuracy_score(y_pred_train, np.argmax(Ytrain,axis=1)))

_, y_pred_test = model2.predict(Xtest,'relu')
print(accuracy_score(y_pred_test, np.argmax(Ytest,axis=1)))

0.42379048380647744
0.422498


In [42]:
for i in model2.backward_prop:
    print(i,np.max(model2.backward_prop[i]), np.min(model2.backward_prop[i]), model2.backward_prop[i].shape)
print()

for i in model2.forward_prop:
    print(i,np.max(model2.forward_prop[i]), np.min(model2.forward_prop[i]), model2.forward_prop[i].shape)
print()

for i in model2.params:
    print(i,np.max(model2.params[i]), np.min(model2.params[i]), model2.params[i].shape)

dz4 1.0 -1.0 (10, 100)
dz3 6.435164271970555e+42 -4.404224181042292 (10, 100)
dz2 8.815363775156895e+84 -3.502298235246524e+42 (50, 100)
dz1 1.8566504146402347e+127 -1.0062145051098991e+85 (70, 100)
dz0 2.961153975535918e+169 4.476963190857688e+167 (85, 100)

a0 1.0 0.0 (85, 100)
z1 1.7200192885385603e+42 -1.4085656221727102e+28 (70, 100)
a1 1.7200192885385603e+42 -0.0 (70, 100)
z2 5.719136015978563e+84 -3.3449375096514033e+70 (50, 100)
a2 5.719136015978563e+84 -0.0 (50, 100)
z3 3.0859791960474724e+127 -2.210107472982474e+113 (10, 100)
a3 3.0859791960474724e+127 -0.0 (10, 100)
z4 2.9184023788594952e+169 -3.2059813725977796e+168 (10, 100)
a4 1.0 0.0 (10, 100)

W1 5.187071734070013e+125 -2.811141384889173e+83 (70, 85)
b1 1.6732545734748368e+126 -9.068228511386736e+83 (70, 1)
W2 1.235903635889553e+126 -4.910180944669756e+83 (50, 70)
b2 7.944601545511659e+83 -3.1563489247030058e+41 (50, 1)
W3 2.9998608977374687e+126 -1.698756360041152e+84 (10, 50)
b3 5.799512910039699e+41 -2.27249621385059

In [None]:
print(model.forward_prop["a1"].shape)

In [None]:
print(model.neuron_count)

In [None]:
model.params["W2"].shape

In [None]:
print(model.params["b1"])

In [None]:
print(model.params["b2"])

In [None]:
print(model.params["W1"])

In [None]:
print(model.params["W2"])

In [60]:
for i in range(10,0,-1):
    print(i)

10
9
8
7
6
5
4
3
2
1
