## Question 2

In [25]:
#importing essential libraries

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from keras.datasets import fashion_mnist
from keras.utils import to_categorical

#loading the datasets

(x_training_set, y_training_set), (x_testing_set, y_testing_set) = fashion_mnist.load_data()


In [26]:
#splitting the data for cross validation

x_validation_set = x_training_set[50000:]
y_validation_set = y_training_set[50000:]     # validation set has 10000 data

x_training_set = x_training_set[:50000]
y_training_set = y_training_set[:50000]


#vactorising the data

x_training_set = x_training_set.reshape(x_training_set.shape[0], 784)  #28x28 pixels = 784
x_testing_set = x_testing_set.reshape(x_testing_set.shape[0], 784)
x_validation_set = x_validation_set.reshape(x_validation_set.shape[0], 784)

#normalising the data

x_train = x_training_set/255  # since, pixel range from 0 to 255
x_test = x_testing_set/255
x_valid = x_validation_set/255

#one hot encoding for labels to represent categorical variables as numerical values

y_train = to_categorical(y_training_set)
y_test = to_categorical(y_testing_set)
y_valid = to_categorical(y_validation_set)


# some useful functions

#for hidden layer
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def gradient_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

#for output layer
def softmax(x):
    exponents = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exponents / np.sum(exponents, axis=1, keepdims=True)

In [27]:
#class definition

class Network:
  def __init__(self, neuron_sizes):
    self.total_layers = len(neuron_sizes)

    #initialising the weights and biases
    #After initialising storing weights and biases in separate lists
    self.Weights = [np.random.randn(neuron_sizes[i], neuron_sizes[i+1])*0.05 for i in range(self.total_layers-1)]
    self.biases = [np.random.randn(1, neuron_sizes[i+1])*0.05 for i in range(self.total_layers-1)]


  #defining function for forward propagation
  def forward_prop(self, X):
    self.pre_activations_A = [None]*(self.total_layers)  #list to store the pre-activations
    self.activations_H = [X]  #list to store the activations

    for i in range(self.total_layers-1):
      self.pre_activations_A[i+1] = np.dot(self.activations_H[i], self.Weights[i]) + self.biases[i]

      if i == self.total_layers-2:  #for output layer: activation function = softmax
        h = softmax(self.pre_activations_A[i+1])
        self.activations_H.append(h)

      else:  #for hidden layers: activation function = sigmoid
        h = sigmoid(self.pre_activations_A[i+1])
        self.activations_H.append(h)
    return self.activations_H[-1]



In [28]:
#choosing the parameters

#these all are hyper-parameters we can tune then to avoid overfitting
neuron_sizes = [784, 64, 64, 10]   #input size, hidden layers, output size
learning_rate_eta = 0.01
total_epochs = 20
batch_size = 128

#initialization of the neural network by making an object of class network
my_model = Network(neuron_sizes)

In [29]:
pred = my_model.forward_prop(x_train)

In [30]:
print(pred)

[[0.08307613 0.07371361 0.123548   ... 0.0983791  0.08373833 0.10773881]
 [0.08252284 0.07437419 0.12525714 ... 0.0977909  0.084644   0.10735657]
 [0.08319421 0.07368614 0.12497619 ... 0.098262   0.08428977 0.10748327]
 ...
 [0.08284599 0.0736776  0.12514944 ... 0.09824595 0.08437049 0.10729375]
 [0.08289171 0.07366876 0.12487186 ... 0.09831357 0.08428044 0.107841  ]
 [0.08322459 0.07360706 0.124295   ... 0.09798421 0.0844034  0.10730283]]


In [33]:
print(round(sum(pred[0]),6))

1.0
