<a href="https://colab.research.google.com/github/swapnilmn/Assignment_1-CS6910/blob/master/Question_2ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2. Implement a feedforward neural network which takes images from the fashion-mnist data as input and outputs a probability distribution over the 10 classes.

Your code should be flexible such that it is easy to change the number of hidden layers and the number of neurons in each hidden layer.

# Libraries 

In [1]:
from keras.datasets import fashion_mnist, mnist
import numpy as np
import matplotlib.pyplot as plt
import math
import copy
from sklearn.model_selection import train_test_split 
import pickle
import pandas as pd
import os
from tqdm import tqdm_notebook 
from sklearn.preprocessing import OneHotEncoder

In [2]:
def preprocess(data):
    if data == 'fashion_mnist':
        (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
    elif data == 'mnist':
        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    else:
        raise ValueError('Invalid dataset name')

    X_train, X_val, Y_train, Y_val = train_test_split(train_images, train_labels, test_size=0.1, random_state=42)
    X_train = X_train / 255.0
    X_val = X_val / 255.0
    X_test = test_images / 255.0

    def flatten_images(images):
        return np.array([image.flatten() for image in images])

    X_train = flatten_images(X_train)
    X_val = flatten_images(X_val)
    X_test = flatten_images(test_images)

    enc = OneHotEncoder()
    Y_train = enc.fit_transform(np.expand_dims(Y_train, 1))
    Y_val = enc.transform(np.expand_dims(Y_val, 1))
    Y_test = enc.transform(np.expand_dims(test_labels, 1))

    return X_train, X_val, X_test, Y_train, Y_val, Y_test


X_train, X_val, X_test, Y_train, Y_val, Y_test = preprocess('fashion_mnist')

#X_train, X_val, X_test, Y_train, Y_val, Y_test = X_train[0:100, :], X_val[0:10, :], X_test[0:30, :], Y_train[0:100, :], Y_val[0:10, :], Y_test[0:30, :]
print(Y_train.shape, Y_val.shape, Y_test.shape)
print(X_train.shape, X_val.shape, X_test.shape)

(54000, 10) (6000, 10) (10000, 10)
(54000, 784) (6000, 784) (10000, 784)


In [3]:

no_classes = 10

config_ = {
    'size_hidden_layers': [32, 64, 128, 256],
    'activation': 'tanh',
    'weight_initializations': "Xavier",
}

config_['no_hidden_layers'] = len(config_['size_hidden_layers'])

class FFNetwork:
  def __init__(self, X, Y, 
               num_outputs=no_classes,
               activation=config_['activation'],
               weight_init=config_['weight_initializations'],
               no_hidden_layers = config_['no_hidden_layers'],
               hidden_layers =  config_['size_hidden_layers']):
    self.nx = X.shape[1] # Number of inputs
    self.ny = Y.shape[1] # Number of outputs
    self.nh = no_hidden_layers  # Number of hidden layers
    self.sizes = [self.nx] + hidden_layers + [self.ny] # input layer + hidden layers + output layers
    self.params={}
    self.params_h = []
    self.num_classes = num_outputs
    self.weight_init = weight_init
    self.activation_function = activation
    np.random.seed(0)
    
    if self.weight_init == "random":
      for i in range(1,self.nh+2):
        self.params["W"+str(i)] = np.random.randn(self.sizes[i-1],self.sizes[i])
        self.params["B"+str(i)] = np.random.randn(1,self.sizes[i])
    
    elif self.weight_init == "Xavier":
      for i in range(1,self.nh+2):
        self.params["W"+str(i)]=np.random.randn(self.sizes[i-1],self.sizes[i])*np.sqrt(1/self.sizes[i-1])
        self.params["B"+str(i)]=np.random.randn(1,self.sizes[i])

  
  def forward_activation(self, X): 
    if self.activation_function == "sigmoid":
      return 1.0/(1.0 + np.exp(-X))
    elif self.activation_function == "tanh":
      return np.tanh(X)
    elif self.activation_function == "Relu":
      return np.maximum(0,X)

  def softmax(self, X):
    exps = np.exp(X - np.max(X, axis=1, keepdims=True))
    return  exps / np.sum(exps, axis=1, keepdims=True)
  
  def forward_pass(self, X, params=None):
    if params is None:
        params = self.params
    self.A = {}
    self.H = {}
    self.H[0] = X
    for i in range(self.nh):
        self.A[i+1] = np.matmul(self.H[i], params[f"W{i+1}"]) + params[f"B{i+1}"]
        self.H[i+1] = self.forward_activation(self.A[i+1])
    self.A[self.nh+1] = np.matmul(self.H[self.nh], params[f"W{self.nh+1}"]) + params[f"B{self.nh+1}"]
    self.H[self.nh+1] = self.softmax(self.A[self.nh+1])
    return self.H[self.nh+1]


model = FFNetwork(X_train, Y_train,
                  num_outputs=no_classes,
                  activation=config_['activation'],
                  weight_init=config_['weight_initializations'],
                  no_hidden_layers = config_['no_hidden_layers'],
                  hidden_layers =  config_['size_hidden_layers'])
model.forward_pass(X_train)[0]

array([0.0163483 , 0.06188866, 0.07630141, 0.3604386 , 0.13119604,
       0.03389549, 0.12472596, 0.05018947, 0.10003386, 0.0449822 ])