In [1]:
import numpy as np
import random

In [2]:
def Preprocess(FILE_NAME = "seeds_dataset.txt"):
    fo = open(FILE_NAME, "r")
    lines = fo.readlines()
    fo.close()
    features = []
    labels = []
    parseFeatures = lambda list_: [float(item) for item in list_] 
    convert2onehot = {'1': [1, 0, 0], '2': [0, 1, 0], '3': [0, 0, 1]}

    for line in lines:
        splitted_lines = line.split()
        features.append(parseFeatures(splitted_lines[0:len(splitted_lines)-1]))
        labels.append(convert2onehot[splitted_lines[len(splitted_lines)-1]])

    features = np.array(features)

    # Z-score normalisation
    means = np.mean(features, axis=0)
    standardDevs = np.std(features, axis=0)
    normalizedFeatures = (features - means)/standardDevs

    # Randomly shuffle
    print(normalizedFeatures.shape, np.shape(labels))
    print(normalizedFeatures[0:2], labels[0:2])
    datas = list(zip(normalizedFeatures, labels))
    random.shuffle(datas)
    normalizedFeatures, labels = zip(*datas)

    # Traaing test split
    train_size = int(len(labels) * 0.8)
    train_feats, train_labels = normalizedFeatures[0:train_size], labels[0:train_size]
    l = len(normalizedFeatures)
    test_feats, test_labels = normalizedFeatures[train_size:l], labels[train_size:l]

    # Save the test and traing datas
    np.savetxt("train_feats.txt", train_feats)
    np.savetxt("test_feats.txt", test_feats)
    np.savetxt("train_labels.txt", train_labels, fmt="%d")
    np.savetxt("test_labels.txt", test_labels, fmt="%d")

Preprocess()

(210, 7) (210, 3)
[[ 1.42097769e-01  2.15462437e-01  6.06017918e-05  3.04218199e-01
   1.41701823e-01 -9.86151745e-01 -3.83577423e-01]
 [ 1.11880257e-02  8.22375713e-03  4.28515270e-01 -1.68624664e-01
   1.97432229e-01 -1.78816620e+00 -9.22013487e-01]] [[1, 0, 0], [1, 0, 0]]


In [3]:
def Data_loader():
    train_feats = np.array(np.loadtxt("train_feats.txt"))
    test_feats = np.array(np.loadtxt("test_feats.txt"))
    train_labels = np.array(np.loadtxt("train_labels.txt", dtype=int))
    test_labels = np.array(np.loadtxt("test_labels.txt", dtype=int))

    train_mini_batched = [(train_feats[i:min(i+32, 168)], train_labels[i:min(i+32, 168)]) for i in range(0, 168, 32)]
    test_mini_batched = [(train_feats[i:min(i+32, 42)], train_labels[i:min(i+32, 42)]) for i in range(0, 42, 32)]
    return train_mini_batched, test_mini_batched, train_feats, train_labels, test_feats, test_labels

In [4]:
class crossEntropyLoss:
    def __init__(self):
        pass
    def forward(self, p, y_true):
        return -1 * np.sum(y_true * np.log(p))
    def backward(self, p, y_true):
        return -1 * (y_true * (1 / p)) / (np.shape(y_true)[0])


In [5]:
class Dense:
    def __init__(self, in_dims, out_dims, activation=None):
        self.W = np.random.randn(in_dims, out_dims)
        self.b = np.random.randn(1, out_dims)
        self.activation = activation
        self.activation_func = None
        self.activation_grad_func = None
        if activation == "relu":
            self.activation_func = np.vectorize(lambda x: 0.0 if x < 0 else x)
            self.activation_grad_func = np.vectorize(lambda x: 0.0 if x <= 0 else 1.0)
        self.cache = {}
        self.grads = {}

    def forward(self, x):
        self.cache['x'] = x
        Wx = np.matmul(x, self.W)
        s = Wx + self.b

        if self.activation == None:
            return s
        elif self.activation == "sigmoid":
            z = 1 / (1 + np.exp(-1 * s))
            self.cache['z'] = z
            return z
        elif self.activation == "relu":
            z = self.activation_func(s)
            self.cache['z'] = z
            return z

    def backward(self, grads):
        if self.activation == None:
            ds = grads
        elif self.activation == "sigmoid":
            z = self.cache['z']
            ds = grads * z * (1 - z)
        elif self.activation == "relu":
            z = self.cache['z']
            ds = grads * self.activation_grad_func(z)

        db = np.expand_dims(np.sum(ds, axis=0), axis=0)
        self.grads['db'] = db

        da = ds
        x = self.cache['x']
        dW = np.sum(x[:,:, np.newaxis] * da[:, np.newaxis, :], axis = -3)
        self.grads['dW'] = dW
        dx = np.squeeze(np.matmul(da[:,np.newaxis, :], (self.W).T))

        self.cache = {}
        return dx

    def update_weights(self, learning_rate):
        self.W = self.W - learning_rate * self.grads['dW']
        self.b = self.b - learning_rate * self.grads['db']
        self.grads = {}


In [6]:
class softmax:
    def __init__(self, num_classes):
        self.I = np.identity(num_classes)
        self.cache = {}
        self.transpose_axis = np.array([0, 2, 1])

    def forward(self, z):
        maxim = (np.max(z, axis=1))[:, np.newaxis]
        exps = np.exp(z - maxim)
        sums = np.sum(exps, axis=1)[:, np.newaxis]
        P = exps / sums # P = probablilities
        self.cache['P'] = P[:, np.newaxis, :]
        return P

    def backward(self, dp):
        P = self.cache['P']
        self.cache = {}
        local_grad = P * np.transpose(self.I-P, axes=self.transpose_axis)
        return np.squeeze(np.matmul(dp[:, np.newaxis,:], local_grad))
    
    def update_weights(self, learning_rate):
        pass # No weights to update

In [7]:
def Weight_intializer(model): # Takes the model as input
    # Iterates over eacvh layer
    # And initializes between -1 and +1
    for layer in model:
        if type(layer) == Dense:
            layer.W = np.random.rand(np.shape(layer.W)[0], np.shape(layer.W)[1]) * 2 - 1
            layer.b = np.random.rand(np.shape(layer.b)[0], np.shape(layer.b)[1]) * 2 - 1
        else:
            pass


In [8]:
def Forward_pass(model, x): # Does the forward pas over all the layers
    for layer in model:
        x = layer.forward(x)

    return x

In [9]:
def Backward_pass(model, loss_fn, probabilities, y, learning_rate):
    # Computeloss
    loss = loss_fn.forward(probabilities, y)

    # Backpropagate and accumulate grads
    dx = loss_fn.backward(probabilities, y)
    for layer in reversed(model):
        dx = layer.backward(dx)

    # Update weights
    for layer in model:
        layer.update_weights(learning_rate)


In [10]:
def Predict(model, train_x, train_y, test_x, test_y):
    # For training Data
    predictions = Forward_pass(model, train_x)
    train_correct = np.sum(np.argmax(predictions, axis=1) == np.argmax(train_y, axis=1))

    # For testing Data
    predictions = Forward_pass(model, test_x)
    test_correct = np.sum(np.argmax(predictions, axis=1) == np.argmax(test_y, axis=1))
    
    return train_correct / np.shape(train_x)[0], test_correct / np.shape(test_x)[0]

In [11]:
def Training(model, training_data, train_x, train_y, test_x, test_y, learning_rate=0.01, NUM_ITERS=200):
    # Implement a simple mini batch SGD loop and 
    # train your neural network, using forward and backward passes.
    loss_fn = crossEntropyLoss()
    print("Intializing the Weights")
    Weight_intializer(model)

    print("Entering the training loop")
    training_accuracy = []
    testing_accuracy = []

    for i in range(1, NUM_ITERS+1):
        for (x, y) in training_data:
            predicted_probabilities = Forward_pass(model, x)
            Backward_pass(model, loss_fn, predicted_probabilities, y, learning_rate)
        if (i%10 == 0):
            print("Finished", i, "th iteration")
            _train, _test = Predict(model, train_x, train_y, test_x, test_y)
            training_accuracy.append(_train)
            testing_accuracy.append(_test)

    return training_accuracy, testing_accuracy

In [12]:
training_data, _, train_x, train_y, test_x, test_y = Data_loader()

model = (Dense(7, 32, "sigmoid"), Dense(32, 3), softmax(3))
train_accu, test_accu = Training(model, training_data, train_x, train_y, test_x, test_y)
list(zip(train_accu, test_accu))

Intializing the Weights
Entering the training loop
Finished 10 th iteration
Finished 20 th iteration
Finished 30 th iteration
Finished 40 th iteration
Finished 50 th iteration
Finished 60 th iteration
Finished 70 th iteration
Finished 80 th iteration
Finished 90 th iteration
Finished 100 th iteration
Finished 110 th iteration
Finished 120 th iteration
Finished 130 th iteration
Finished 140 th iteration
Finished 150 th iteration
Finished 160 th iteration
Finished 170 th iteration
Finished 180 th iteration
Finished 190 th iteration
Finished 200 th iteration


[(0.4642857142857143, 0.5476190476190477),
 (0.6785714285714286, 0.7857142857142857),
 (0.8035714285714286, 0.8333333333333334),
 (0.8452380952380952, 0.9285714285714286),
 (0.8571428571428571, 0.9285714285714286),
 (0.8809523809523809, 0.9047619047619048),
 (0.8809523809523809, 0.9047619047619048),
 (0.8928571428571429, 0.9047619047619048),
 (0.8988095238095238, 0.9047619047619048),
 (0.8988095238095238, 0.9285714285714286),
 (0.9107142857142857, 0.9285714285714286),
 (0.9107142857142857, 0.9285714285714286),
 (0.9166666666666666, 0.9285714285714286),
 (0.9166666666666666, 0.9285714285714286),
 (0.9166666666666666, 0.9285714285714286),
 (0.9166666666666666, 0.9285714285714286),
 (0.9226190476190477, 0.9285714285714286),
 (0.9226190476190477, 0.9285714285714286),
 (0.9285714285714286, 0.9285714285714286),
 (0.9285714285714286, 0.9285714285714286)]

In [13]:
model = (Dense(7, 64, "relu"), Dense(64, 32, "relu"), Dense(32, 3), softmax(3))
train_accu, test_accu = Training(model, training_data, train_x, train_y, test_x, test_y)
list(zip(train_accu, test_accu))

Intializing the Weights
Entering the training loop
Finished 10 th iteration
Finished 20 th iteration
Finished 30 th iteration
Finished 40 th iteration
Finished 50 th iteration
Finished 60 th iteration
Finished 70 th iteration
Finished 80 th iteration
Finished 90 th iteration
Finished 100 th iteration
Finished 110 th iteration
Finished 120 th iteration
Finished 130 th iteration
Finished 140 th iteration
Finished 150 th iteration
Finished 160 th iteration
Finished 170 th iteration
Finished 180 th iteration
Finished 190 th iteration
Finished 200 th iteration


[(0.9166666666666666, 0.9285714285714286),
 (0.9404761904761905, 0.9285714285714286),
 (0.9464285714285714, 0.9285714285714286),
 (0.9583333333333334, 0.9285714285714286),
 (0.9702380952380952, 0.9285714285714286),
 (0.9702380952380952, 0.9285714285714286),
 (0.9761904761904762, 0.9285714285714286),
 (0.9821428571428571, 0.9285714285714286),
 (0.9821428571428571, 0.9285714285714286),
 (0.9880952380952381, 0.9285714285714286),
 (0.9880952380952381, 0.9285714285714286),
 (0.9880952380952381, 0.9285714285714286),
 (0.9880952380952381, 0.9523809523809523),
 (0.9880952380952381, 0.9523809523809523),
 (0.9880952380952381, 0.9523809523809523),
 (0.9880952380952381, 0.9523809523809523),
 (0.9940476190476191, 0.9523809523809523),
 (0.9940476190476191, 0.9523809523809523),
 (0.9940476190476191, 0.9523809523809523),
 (0.9940476190476191, 0.9523809523809523)]

In [26]:
def test():
    model = (Dense(7, 8, "relu"), Dense(8, 6, "relu"), Dense(6, 3), softmax(3))

    w1 = np.zeros((7, 8))
    for i in range(0, 7):
        for j in range(0, 8):
            w1[i][j] = 1/(i + j + 1)

    w2 = np.zeros((8, 6))
    for i in range(0, 8):
        for j in range(0, 6):
            w2[i][j] = 1/(i + j + 1)

    w3 = np.zeros((6, 3))
    for i in range(0, 6):
        for j in range(0, 3):
            w3[i][j] = 1/(i + j + 1)

    b1 = np.zeros((1, 8))
    for j in range(0, 8):
        b1[0][j] = 1/(j + 1)

    b2 = np.zeros((1, 6))
    for j in range(0, 6):
        b2[0][j] = 1/(j + 1)

    b3 = np.zeros((1, 3))
    for j in range(0, 3):
        b3[0][j] = 1/(j + 1)

    print(w1, "\n\n", w2, "\n\n", w3, "\n\n", b1, "\n\n", b2, "\n\n", b3, "\n\n")
    model[0].W = w1
    model[0].b = b1
    model[1].W = w2
    model[1].b = b2
    model[2].W = w3
    model[2].b = b3
    
    x = [[ 1.42097769e-01, 2.15462437e-01, 6.06017918e-05, 3.04218199e-01,
       1.41701823e-01, -9.86151745e-01, -3.83577423e-01], 
     [ 1.11880257e-02, 8.22375713e-03, 4.28515270e-01, -1.68624664e-01,
       1.97432229e-01, -1.78816620e+00, -9.22013487e-01]]
    x = np.array(x)
    y = [[1, 0, 0], [1, 0, 0]]

    loss_fn = crossEntropyLoss()
    predicted_probabilities = Forward_pass(model, x)     

    dx = loss_fn.backward(predicted_probabilities, y)
    for layer in reversed(model):
        print(dx.shape)
        dx = layer.backward(dx)

    return model[0].grads, model[1].grads, model[2].grads 
