In [33]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn.datasets import load_iris

In [34]:
def loadIrisData():
    iris = load_iris()
    X=iris['data']
    t=iris['target']
    print(X.shape)
    print(t.shape)
    return X, t
if __name__=="__main__":
    loadIrisData();

(150, 4)
(150,)


In [35]:
def one_hot_encoding(t_indices, N):
    '''
    Inputs:
        t_indices: list of indices
        N: total no. of classes
    '''
    assert N>max(t_indices), (N, max(t_indices))

    ### WRITE YOUR CODE HERE - 2 MARKS
    t_indices = np.array(t_indices)
    t_1hot = np.zeros((t_indices.size, N))
    t_1hot[np.arange(t_indices.size), t_indices[np.arange(t_indices.size)]] = 1
    return t_1hot

In [36]:
def test_one_hot_encoding():
    t_1hot = one_hot_encoding([0,2], 3)
    t_1hotTrue = np.array([[1.,0.,0.], [0.,0.,1.]])
    assert np.all(np.isclose( t_1hot, t_1hotTrue ))
    print('Test passed', '\U0001F44D')
if __name__=="__main__":
    test_one_hot_encoding()

Test passed 👍


In [37]:
def splitData(X,t,testFraction=0.2):
    """
    Use numpy functions only
    Inputs:
        X: np array of shape (Nsamples, dim)
        t: np array of len Nsamples; can be one hot vectors or labels
        testFraction: (float) Nsamples_test = testFraction * Nsamples
    """

    ### WRITE YOUR CODE HERE - 2 MARKS
    X = np.array(X)
    t = np.array(t)
    ind = np.arange(t.shape[0])
    np.random.shuffle(ind)
    X = X[ind]
    t = t[ind]
    testsize = int(testFraction*t.shape[0])
    X_test = X[0:testsize]
    t_test = t[0:testsize]
    X_train = X[testsize:]
    t_train = t[testsize:]
    return X_train, t_train, X_test, t_test

In [38]:
def test_splitData():
    X = np.random.random((5,2))
    t1hot = one_hot_encoding([1,0,2,1,2],3)
    X_train, t1hot_train, X_test, t1hot_test = splitData(X,t1hot,.2)
    assert X_train.shape==(4,2), ["X_train.shape", X_train.shape]
    assert X_test.shape==(1,2), ["X_test.shape", X_test.shape]
    print('Test passed', '\U0001F44D')    
if __name__=="__main__":
    test_splitData()

Test passed 👍


In [39]:
### Normalize data to be of zero mean and unit variance
def normalizeX(X_train, X_test):
    '''
    Inputs:
        X_train: np array 2d
        X_test: np array 2d
    Outputs:
        Normalized np arrays 2d
    '''

    ### WRITE YOUR CODE HERE - 2 MARKS
    X_train = np.array(X_train).astype(float)
    X_test = np.array(X_test).astype(float)
    X_test_normalized=np.copy(X_test)
    X_train_normalized=np.copy(X_train)
    
    for i in np.arange(X_train.shape[1]):
        X_test_normalized[:,i] = (X_test[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])
        X_train_normalized[:,i] = (X_train[:,i] - np.mean(X_train[:,i])) / np.std(X_train[:,i])

    return X_train_normalized, X_test_normalized

In [40]:
def test_normalizeX():
    X_train = np.array([[1,1,0],[2,2,1]])
    X_test = np.array([[1,1,0],[3,3,2]])
    X_train_normalized, X_test_normalized = normalizeX(X_train, X_test)
    a = np.array([[-1.,-1.,-1.], [ 1., 1., 1.]])
    b = np.array([[-1.,-1.,-1.], [ 3., 3., 3.]])
    assert np.all(np.isclose( X_train_normalized, a )), a
    assert np.all(np.isclose( X_test_normalized, b )), b
    print('Test passed', '\U0001F44D')    
if __name__=="__main__":
    test_normalizeX()

Test passed 👍


In [41]:
def sigmoid(x):
    '''
    Input:
        x: numpy array of any shape
    Output:
        y: numpy array of same shape as x
    '''

    ### WRITE YOUR CODE HERE - 1 MARKS
    y = 1/(1+np.exp(-1*x))
    return y

In [42]:
def test_sigmoid():
    x = np.array([np.log(4),np.log(0.25),0])
    y = sigmoid(x)
    assert np.all(np.isclose( y, np.array([0.8, 0.2, 0.5]) )), y
    print('Test passed', '\U0001F44D')    
if __name__=="__main__":
    test_sigmoid()

Test passed 👍


In [43]:
def softmax(x):
    '''
    Input:
        x: numpy array of any shape
    Output:
        y: numpy array of same shape as x
    '''

    ### WRITE YOUR CODE HERE - 1 MARKS
    y = np.exp(x)
    Den = np.sum(y)
    y = y / Den
    return y

In [44]:
def test_softmax():
    x = np.array([np.log(2),np.log(7),0])
    y = softmax(x)
    assert np.all(np.isclose( y, np.array([0.2, 0.7, 0.1]) )), y
    print('Test passed', '\U0001F44D')    
if __name__=="__main__":
    test_softmax()

Test passed 👍


In [45]:
def sigmoid_derivative(x):
    '''
    Input:
        x: numpy array of any shape; it is sigmoid layer's output
    Output:
        y: numpy array of same shape as x; it is the derivative of sigmoid
    '''

    ### WRITE YOUR CODE HERE - 1 MARKS
    y = x * (1 - x)
    return y

In [46]:
class NeuralNetwork:
    def __init__(self, ni, nh, no):
        '''   
        Input:
            ni: int, size of input layer
            nh: int, size of hidden layer
            no: int, size of output layer
        Action:
            Creates instance variables
        NOTE: We do not use bias explicitly here. Input x can have the first element 1 to have a bias term.
        '''
        self.ni = ni
        self.nh = nh
        self.no = no
        self.weights1 = []
        self.weights2 = []
        return
    
    def init_weights(self):
        '''
        Action:
            Randomly initialize weights1 and weights2 with proper size random np arrays
        '''

        ### WRITE YOUR CODE HERE - 2 MARKS
        self.weights1 = np.random.uniform(-1, 1, (self.nh,self.ni+1))
        self.weights2 = np.random.uniform(-1, 1, (self.no,self.nh+1))
    
    def predict(self, x):
        x = np.insert(x,0,1,axis=0) # inserts a row of 1s. This is for the bias
        h1 = self.weights1.dot(x)
        v1 = sigmoid(h1)
        v1 = np.insert(v1,0,1,axis=0) # inserts a row of 1s. This is for the bias
        h2 = self.weights2.dot(v1)
        v2 = softmax(h2)
        return v2

    def backprop(self,x1,y1,eta):
        '''
        # application of the chain rule to find derivative of the categorical cross entropy loss function with respect to weights2 and weights1
        Input:
            x: numpy array of shape (ni,1)
            y: numpy array of shape (no,1)
            eta: learning rate
        Action:
            # Finding the derivatives
            del_weights2: np array that stores the derivative of the loss function with respect to weights2
            del_weights1: np array that stores the derivative of the loss function with respect to weights1

            # Update the weights with the derivative of the categorical cross entropy loss function
              weights1 += eta*del_weights1
              weights2 += eta*del_weights2
        ''' 

        ### WRITE YOUR CODE HERE - 5 MARKS
        x = np.reshape(np.array(x1), (np.array(x1).shape[0],1))
        y = np.reshape(np.array(y1), (np.array(y1).shape[0],1))
        
        x = np.insert(x,0,1,axis=0) # inserts a row of 1s. This is for the bias
        h1 = self.weights1 @ x
        v1o = sigmoid(h1)
        v1 = np.insert(v1o,0,1,axis=0) # inserts a row of 1s. This is for the bias
        h2 = self.weights2 @ v1
        v2 = softmax(h2)
        
        del_weights2 = (v2 - y) @ v1.T
        
        ind = np.arange(self.weights1.shape[0])
        v = np.identity(self.weights1.shape[0])
        for i in np.arange(self.weights1.shape[0]):
            v[i, i] = v1o[i,0]*(1-v1o[i,0])
        
        
        del_weights1 = v @ self.weights2.T[1:] @ (v2 - y) @ x.T
        
        return del_weights1, del_weights2

    def fit(self, X, t, eta, epochs):
        '''
        input:
            X: training input data 
            t: training targets
            eta: learning rate
            epochs: number of epochs
        Action:
            train the weights
        '''

        ### WRITE YOUR CODE HERE - 5 MARKS
        self.init_weights()
        count = 0
        sum_der1 = np.zeros(self.weights1.shape)
        sum_der2 = np.zeros(self.weights2.shape)
        for i in range(epochs):
            for j in range(t.shape[0]):
                count += 1
                temp1, temp2 = self.backprop(X[j], t[j], eta)
                sum_der1 += temp1
                sum_der2 += temp2
                if count==1:
                    self.weights1 -= eta * sum_der1
                    self.weights2 -= eta * sum_der2
                    count = 0
                    sum_der1 = sum_der1*0
                    sum_der2 = sum_der2*0
                
        
    def predict_label(self,x):    
        '''
        Output:
            y: np array of index
        '''

        ### WRITE YOUR CODE HERE - 1 MARKS
        y = np.zeros((np.array(x).shape[0],1))
        for ind in range(x.shape[0]):
            y[ind] = np.argmax(self.predict(x[ind]))
        return y

In [47]:
### Lastly, report the accuracy of your model and print the Confusion Matrix
#printing the confusion matrix
def getCM(y,t):
    '''
    Inputs:
        y: estimated labels np array (Nsample,1)
        t: targets np array (Nsamples,1)
    Outputs:
        CM : np array of confusion matrix
    '''

    ### WRITE YOUR CODE HERE - 3 MARKS
    CM = np.zeros((3,3))
    acc_pred = 0
    for i,j in zip(t,y):
        CM[int(i),int(j)] += 1
        if i==j:
            acc_pred += 1
    #print("Accuracy:",acc_pred/t.shape[0])
    #print(CM)
    return CM
    #return acc_pred/t.shape[0]

#### Experiments
Use the above functions to carry out the experiment:
- load iris data and prepare it for NN
- split randomly into 20% test data
- create a NN with 1 hidden layer
- train the network with training data
- Plot loss w.r.t. number of epochs
- Print confusion matrix on test data

In [48]:
def experiment():

    ### WRITE YOUR CODE HERE - 10 MARKS
    X, t = loadIrisData()
    X_train, t_train, X_test, t_test = splitData(X, t)
    X_train, X_test = normalizeX(X_train, X_test)
    t_train = one_hot_encoding(t_train, 3)
    
    NN = NeuralNetwork(4, 10, 3)
    NN.fit(X_train, t_train, 0.1, 1000)
    y = NN.predict_label(X_test)
    print(getCM(y.T[0], t_test))
        
    #print("Average Accuracy over 10 iterations:", acc_avg/1)
if __name__=="__main__":
    experiment()

(150, 4)
(150,)
[[15.  0.  0.]
 [ 0.  6.  0.]
 [ 0.  1.  8.]]
