<a href="https://colab.research.google.com/github/safikhanSoofiyani/CS6910-Assignment1/blob/main/CS6910_Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing necessary libraries.

In [None]:
import keras
from keras.datasets import fashion_mnist
import numpy as np
import matplotlib.pyplot as plt
import copy 


Installing and importing wandb

In [None]:
!pip install wandb -qqq
import wandb

[K     |████████████████████████████████| 1.7 MB 5.2 MB/s 
[K     |████████████████████████████████| 180 kB 64.9 MB/s 
[K     |████████████████████████████████| 143 kB 68.5 MB/s 
[K     |████████████████████████████████| 63 kB 1.4 MB/s 
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


Preparing dataset

In [None]:
def prepdata():
  # Loading data
  (train_x,train_y),(test_x,test_y)=fashion_mnist.load_data()
  # Defining labels for data
  labels=['T-shirt/top','Trouser','Pullover','Dresss','Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot']
  
  image_shape=train_x.shape[1]*train_x.shape[2]
  
  train_image_count=len(train_x)
  test_image_count=len(test_x)
  
  # Creating a matrix of image data 
  # each image is represented as a row by flattening the matrix: converting (60000,28,28) tensor to (60000,784) matrix
  x_train=np.zeros((train_image_count,image_shape))
  x_test=np.zeros((test_image_count,image_shape))
  
  for i in range(train_image_count):
    x_train[i]=(copy.deepcopy(train_x[i].flatten()))/255.0 # converting the images into grayscale
  for i in range(test_image_count):
    x_test[i]=(copy.deepcopy(test_x[i].flatten()))/255.0
  
  # Creating training and testing dataset,
  # with each row corresponding to an image and its associated label.
  train_data=np.array(list(zip(x_train,train_y)))
  test_data=np.array(list(zip(x_test,test_y)))

  # Creating validation dataset, with 10% of training data
  train_data_len=len(train_data)
  val_data_len=int(0.1*train_data_len)

  # Choosing random data points
  idx=np.random.choice(train_data_len,val_data_len,replace=False)

  val_data=train_data[idx]

  
  return(train_data,test_data,val_data,labels)
  

Implementing Feed Forward Neural Network

In [None]:
def nn_init(layer_sizes,w_type):
  # Layer Sizes denotes the number of neurons per layer
  # 784 is for the input layer. 
  # 32 is for the hidden layers. 
  # 10 is for the output layers

  # initializing parameters for the neural network, 
  params={}
  if(w_type=="xavier"):
    for i in range(1,len(layer_sizes)):
      norm_xav=np.sqrt(6)/np.sqrt(layer_sizes[i]+layer_sizes[i-1])
      params["w"+str(i)]=np.random.randn(layer_sizes[i],layer_sizes[i-1])*norm_xav
      params["b"+str(i)]=np.zeros((layer_sizes[i],1))

  elif(w_type=="random"):
    for i in range(1,len(layer_sizes)):
      params["w"+str(i)]=np.random.randn(layer_sizes[i],layer_sizes[i-1])
      params["b"+str(i)]=np.random.randn((layer_sizes[i],1))

  else:
    print("Enter a valid weight initilization type")

  return params


In [None]:
# Activation Functions

def sigmoid(pre_act):
  return (1/(1+np.exp(-pre_act)))

def tanh(pre_act):
  return (np.tanh(pre_act))

def relu(pre_act):
  return (np.maximum(0,pre_act))

def softmax(x):
  return(np.exp(x)/np.sum(np.exp(x)))

In [None]:
def forward_prop(input_data,params,active,layer_sizes):
  
  # Extracting only the image data not the label for the image data
  out=copy.deepcopy(input_data[0][0])
  out=out.reshape(-1,1)
  
  h=[out] # To save the activations for each neuron in a layer
  a=[out] # To save the preactivation for each neuron in a layer

  if(active=="sigmoid"):
    for i in range(1,len(layer_sizes)-1):
      weights=params["w"+str(i)]
      biases=params["b"+str(i)]
      
      out=np.dot(weights,out)+biases
      a.append(out)
      post_a=sigmoid(out)
      h.append(post_a)
  
  elif(active=="tanh"):
    for i in range(1,len(layer_sizes)-1):
      weights=params["w"+str(i)]
      biases=params["b"+str(i)]
      
      out=np.dot(weights,out)+biases
      a.append(out)
      post_a=tanh(out)
      h.append(post_a)
  
  elif(active=="relu"):
    for i in range(1,len(layer_sizes)-1):
      weights=params["w"+str(i)]
      biases=params["b"+str(i)]
      
      out=np.dot(weights,out)+biases
      a.append(out)
      post_a=relu(out)
      h.append(post_a)       
  else:
    print("Enter a valid activation function") 

  # Final step for forward propagation, using softmax.
  weights=params["w"+str(len(layer_sizes)-1)]
  biases=params["b"+str(len(layer_sizes)-1)]
  
  out=np.dot(weights,post_a)+biases
  y_hat=softmax(out)
  
  
  return h,a,y_hat

In [None]:
parameters=nn_init(layer_sizes=[784,30,30,30,10],w_type="xavier")


In [None]:
# Calculating loss 
def loss_calc(name,y_t,y_hat):
  error=0
  if(name=="sse"):
    error=np.sum(((y_t-y_hat)**2))
  elif(name=="cross_entropy"):
    error=-1*np.sum(np.multiply(y_t,np.log(y_hat)))

  return error


In [None]:
# Calculating derivatives of the acitvation functions
def sigmoid_der(x):
  return sigmoid(x)*(1-sigmoid(x))

def tanh_der(x):
  return 1-tanh(x)**2

def relu_der(x):
  if(x<0):
    return 0
  elif(x>0):
    return 1
  else :
    print("error in relu_derivative")
