In [1]:
import numpy as np
import pandas as pd

In [6]:
data = pd.read_csv('iris.csv');
catgs = pd.unique(data.iloc[:,-1]);
unq_cat = len(pd.unique(catgs));

In [7]:

#Changing the categorical data entries into integers

for i in range(unq_cat):
    data.iloc[:,-1].replace(catgs[i], float(i), inplace=True)


In [8]:
# Converting pandas dataset into numpy array
# Dimension: [150 X 5]
dataset = data.values

print(type(dataset))

<class 'numpy.ndarray'>


In [152]:
hlayer_dims = [6,6,5,3]



# x_data all the columns before the last column
x_data = dataset[:, :-1];

# just the last column
y_data = dataset[:, -1].astype(int);



def normalize(x):
    means = np.mean(x, axis=0, keepdims=True);
    deviations = x - means;
    return deviations / deviations ** 2

x_data = normalize(x_data)
    
    




def one_hotify(vector):
    """
    args: 
    vector-> (m,1) dimensional categorical vector
    
    output:
    (m,C) dimensional matrix of one hot vectors
          where C is the number of unique values in vector
    
    """
    
    rows = np.arange(vector.shape[0])
    one_hot_v = np.zeros((vector.shape[0], len(np.unique(vector))))

    one_hot_v[rows, vector] = 1

    one_hot_v = np.flip(one_hot_v, axis=1);
    
    return one_hot_v;

y_data = one_hotify(y_data);



INPUT_DIM = x_data.shape[1];
OUTPUT_DIM = y_data.shape[1];

print("input dimensions: ",INPUT_DIM)
print("output dimensions: ",OUTPUT_DIM)


input dimensions:  4
output dimensions:  3


## Layer Class

In [180]:
class NNlayer():
    def __init__(self, i_dim, o_dim, activation='relu'):
        self.i_dim = i_dim;
        self.o_dim = o_dim;
        self.activation_str = activation;
        self.w = np.random.randn(i_dim, o_dim);
        self.b = np.zeros((1, o_dim))
        self.activation = eval("self."+activation);
        
        #Derivatives
        self.dzdw = None;
        self.dadz = None;
        
        self.dw = None;
        self.db = None;
    

    
    
    def sigmoid(self, x, deriv=False):
        s = 1/(1+np.exp(-x));
        if deriv:
            return s*(1-s);
        return s;
    
    def relu(self, x, deriv=False):
        if deriv:
            return 1. * (x > 0);
        return np.maximum(x, 0, x);
    
    
    def tanh(self, x, deriv=False):
        t = (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x));
        if deriv:
            return (1 - np.square(t))
        return t
    
    def forward(self, x):
        z = np.dot(x, self.w) + self.b;
        
        #Calculate intermidiate gradients
        self.dzdw = x.T #dervative of the weights with respect to the linear unit
        self.dadz = self.activation(z, deriv=True) # derivative of activations with respect to the linear unit
        
        a = self.activation(z);
        return a;
    
    def backward(self, grad_A): #grad_A is DJ/DA(where A is the previous activation layer)
        
        m = grad_A.shape[0]
        
        dz = self.dadz*grad_A # derivative of z with respect to the loss
        
        self.dw = np.dot(self.dzdw, dz)
        self.db = np.mean(dz, axis=0, keepdims=True);
        
        return np.dot(dz,self.w.T) # derivative of the next activation layer
    
    def update(self, learning_rate):
        self.w += learning_rate * -self.dw;
        self.b += learning_rate * -self.db
    
    def __call__(self, x):
        return self.forward(x);
    
    def __repr__(self):
        return "NNLayer(input_dim={}, output_dim={}, activation={})".format(self.i_dim, 
                                                                            self.o_dim, 
                                                                            self.activation_str)
    
        

## Model Class
Composed of NNlayer objects

In [181]:
"""
TODO:

    Figure out why the fuck the loss isn't going down
    probably the cost derivative is wrong
    maybe dw calculation is wrong
    
"""


"""
activations = ['r', 'r', 'r', 'r']
layer dims = (4, 5, 7, 4, 3)
            w[4, 5], w[5, 7], w[7, 4], w[4 , 3]
"""


class NNModel():
    def __init__(self, layer_dims, activations):
        self.nlayers = len(layer_dims);
        self.activations = activations; #len(activations) will be nlayers-1 cuz the input layer.
        
        self.params = {"l"+str(i+1):NNlayer(layer_dims[i], layer_dims[i+1], activations[i])
                            for i in range(len(layer_dims)-1)}
        
    
    def forward_propogate(self, x):
        for i in self.params:
            x = self.params[i].forward(x)
        return x;
    
    def backward_propogate(self, loss_derivative): #loss_derivative -> DJ/DAL(Activation of the last layer)
        loss_derivative = loss_derivative
        for i in reversed(range(1, self.nlayers)):
            loss_derivative = self.params['l'+str(i)].backward(loss_derivative)
    
    def update_model(self, learning_rate):
        for i in range(1,self.nlayers):
            self.params['l'+str(i)].update(learning_rate);
    
    def __repr__(self):
        ret = ''
        for i in range(1,self.nlayers):
            ret += self.params['l'+str(i)].__repr__() + '\n';
        return ret
            

In [191]:
#We need to figure out what the how to calculate loss and how to derive the gradient.

activation_keys = {'r': 'relu',
                   's': 'sigmoid',
                   't': 'tanh'}



layer_dims = [INPUT_DIM, 6, 6, 6, 4, 3];
activations_k = list("s"*(len(layer_dims)-2)) + ["s"];
activations = [activation_keys[a] for a in activations_k]




In [192]:
def softmax(x):
    exps = np.exp(x - np.max(x))
    
    return exps / exps.sum()

def cross_entropy_loss(y, y_hat):
    return -np.mean(y*np.log(y_hat + 1e-9))



In [194]:
def train(model, loss_function, learning_rate, x_data, y_data, epochs=200, apply_softmax=True):
    for epoch in range(epochs):
        perm = np.random.permutation(x_data.shape[0])
        y_data = y_data[perm, :]
        x_data = x_data[perm, :]
        
        y_hat = model.forward_propogate(x_data);
        
        if apply_softmax:
            y_hat = softmax(y_hat);
        
        cost = loss_function(y_data, y_hat);
        print(cost);
        cost_derivative = y_hat - y_data;
        
        model.backward_propogate(cost_derivative);
        model.update_model(learning_rate);
    return model;

net = NNModel(layer_dims, activations)


train(net, cross_entropy_loss, 0.01, x_data, y_data, epochs=2000)
    



2.0377919906025124
2.0375529506964685
2.037297690951817
2.037058270323527
2.036855082025893
2.036695881189381
2.036578920719411
2.036497356429096
2.0364428790630758
2.0364078641734387
2.036386232905054
2.036373527684427
2.0363666456911433
2.0363634958703214
2.036362696551007
2.036363345622094
2.036364858553584
2.036366858847689
2.0363691055899005
2.0363714460722884
2.0363737849525063
2.0363760641875186
2.0363782499551437
2.0363803241063168
2.0363822785620402
2.036384111631652
2.0363858255904383
2.0363874250865632
2.036388916096484
2.0363903052444576
2.036391599364352
2.0363928052229094
2.0363939293505093
2.0363949779433046
2.036395956812421
2.036396871363883
2.036397726598216
2.036398527122312
2.036399277168583
2.036399980618056
2.036400641025246
2.036401261643363
2.0364018454489554
2.0364023951654517
2.0364029132852854
2.0364034020904542
2.0364038636714725
2.036404299944736
2.0364047126683644
2.0364051034566115
2.0364054737929354
2.0364058250418426
2.0364061584596063
2.036406475203956

2.036415515182011
2.0364155155708037
2.0364155159580424
2.0364155163437374
2.0364155167278963
2.0364155171105294
2.036415517491644
2.0364155178712497
2.0364155182493544
2.0364155186259674
2.036415519001097
2.0364155193747515
2.036415519746939
2.036415520117669
2.0364155204869485
2.036415520854786
2.03641552122119
2.036415521586168
2.0364155219497286
2.0364155223118794
2.0364155226726286
2.036415523031984
2.0364155233899526
2.0364155237465433
2.036415524101763
2.03641552445562
2.0364155248081217
2.0364155251592746
2.036415525509087
2.036415525857567
2.036415526204721
2.0364155265505564
2.0364155268950803
2.0364155272383
2.0364155275802234
2.0364155279208567
2.0364155282602074
2.036415528598282
2.036415528935088
2.0364155292706325
2.0364155296049216
2.0364155299379623
2.0364155302697613
2.036415530600326
2.0364155309296628
2.0364155312577776
2.0364155315846775
2.036415531910369
2.0364155322348583
2.0364155325581526
2.0364155328802576
2.0364155332011795
2.0364155335209255
2.03641553383950

2.0364156207740116
2.036415620850799
2.0364156209274458
2.036415621003953
2.036415621080321
2.0364156211565496
2.03641562123264
2.036415621308592
2.0364156213844065
2.0364156214600833
2.036415621535623
2.0364156216110256
2.036415621686292
2.036415621761423
2.036415621836418
2.0364156219112783
2.0364156219860035
2.036415622060594
2.036415622135051
2.0364156222093737
2.036415622283563
2.03641562235762
2.0364156224315435
2.0364156225053356
2.0364156225789953
2.0364156226525236
2.036415622725921
2.0364156227991868
2.036415622872323
2.0364156229453285
2.0364156230182044
2.036415623090951
2.036415623163569
2.0364156232360577
2.036415623308418
2.0364156233806505
2.0364156234527555
2.036415623524733
2.0364156235965836
2.0364156236683075
2.0364156237399054
2.036415623811377
2.036415623882723
2.036415623953944
2.03641562402504
2.0364156240960116
2.0364156241668585
2.036415624237582
2.0364156243081815
2.036415624378658
2.036415624449011
2.036415624519242
2.0364156245893508
2.036415624659337
2.036

2.0364156522763404
2.036415652306997
2.03641565233762
2.0364156523682095
2.036415652398765
2.0364156524292865
2.036415652459775
2.0364156524902297
2.0364156525206503
2.036415652551038
2.0364156525813923
2.0364156526117134
2.036415652642001
2.036415652672255
2.036415652702477
2.0364156527326647
2.0364156527628197
2.0364156527929413
2.0364156528230306
2.036415652853087
2.0364156528831105
2.036415652913101
2.0364156529430586
2.036415652972984
2.0364156530028765
2.0364156530327366
2.0364156530625643
2.036415653092359
2.036415653122122
2.0364156531518525
2.0364156531815505
2.0364156532112165
2.03641565324085
2.036415653270452
2.036415653300021
2.0364156533295588
2.036415653359064
2.036415653388538
2.0364156534179796
2.03641565344739
2.0364156534767677
2.0364156535061144
2.0364156535354296
2.036415653564713
2.036415653593965
2.036415653623185
2.0364156536523743
2.0364156536815314
2.036415653710658
2.0364156537397533
2.036415653768817
2.03641565379785
2.036415653826851
2.036415653855822
2.036

NNLayer(input_dim=4, output_dim=6, activation=sigmoid)
NNLayer(input_dim=6, output_dim=6, activation=sigmoid)
NNLayer(input_dim=6, output_dim=6, activation=sigmoid)
NNLayer(input_dim=6, output_dim=4, activation=sigmoid)
NNLayer(input_dim=4, output_dim=3, activation=sigmoid)