In [114]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [258]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
import random

In [259]:
(X,y),(X_test,y_test)=fashion_mnist.load_data()

In [260]:
X.shape

## X and X_test to be reshaped to (60000, 784(28x28)) array 

(60000, 28, 28)

In [261]:
num_features=784
num_classes=np.max(y)+1

In [262]:
# Reshaping the training and test feature data to a 2-D array
X=np.reshape(X,(X.shape[0],784))
X_test=np.reshape(X_test,(X_test.shape[0],784))

# Nomrlaizing the pixel 
X=X/255
X_test=X_test/255

In [263]:
### One hot encode the Class_labels (y & y_test)
def one_hot_encode(labels):
  z=np.zeros((len(labels),10))
  for i in range(0,len(labels)):
    z[i,labels[i]]=1  
  return z

y=one_hot_encode(y)
y_test=one_hot_encode(y_test)

In [124]:
y.shape

(60000, 10)

In [314]:
## Splitting data to train and validation data

X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.1,random_state=123)

In [304]:
X_train=X_train.T

In [240]:
X_train[:,0].shape

(784,)

In [None]:
## Number of samples in each of training, validation and test set

no_sample_train=X_train.shape[0]
no_sample_val=X_val.shape[0]
no_sample_test=X_test.shape[0]

In [216]:
#### Activation functions and their derivatives

def sigmoid(a):
  return 1./(1.+np.exp(-a))

def sigmoid_derivative(a):
  return sigmoid(a)*(1-sigmoid(a))

def tanh(a):
  return np.tanh(a)

def tanh_derivative(a):
  return 1-np.power(tanh(a),2)

def relu(a):
  return np.max([0,a])

def derivative_relu(a):
  if relu(a)<0:
    return 0
  else:
    return 1

def identity(a):
  return a

def derivative_identity(a):
  return 1

def softmax(a):
  #----
  ## a-> np.array 
  #----
  return np.exp(a)/(np.sum(np.exp(a),axis=0))

def derivative_softmax(a):
  return softmax(a)*(1-softmax(a))


In [128]:
## Loss function
def loss_computation(y_true,y_hat,loss,batch_size,lambda_val,param):


  
  if loss=='cross_entropy':
    J=-(np.sum(np.multiply(y_true,np.log(y_hat))))/batch_size
     
  elif loss=='mse':
    J=(1/2*(batch_size))*(np.sum((y_true-y_hat)**2))

  # L2 Regularisation
  sum_weight=0
  for i in range(1,(len(param)//2)+1):
    sum_weight+=np.sum(np.power(param['W_'+str(i)],2))
  
  J=J+(lambda_val/(2*batch_size))*sum_weight
  
  

  return J



In [129]:
def weight_bias_initialize(neurons_per_layer,init='Xavier'):

  random.seed(123)
  parameters={}

  for i in range(1,len(neurons_per_layer)):
    if init=='Xavier':
      parameters['W_'+str(i)]=np.random.randn(neurons_per_layer[i],neurons_per_layer[i-1])*np.sqrt(2/(neurons_per_layer[i-1]+neurons_per_layer[i]))
    
    if init=='random':    ### Question: what does random mean here? random normal/uniform etc 
      pass
  

    
    parameters['b_'+str(i)]=np.zeros((neurons_per_layer[i],1))
  return parameters

In [309]:
def forward_propagation(data,parameter,activation_function='sigmoid'):
    
    total_layers=len(parameter)//2+1

    Activation = [None]*total_layers # activations
    Pre_Activation = [None]*total_layers # pre-activations
    
    Activation[0] = data
    
    for layer in range(1, total_layers):
        Weight = parameter["W_"+str(layer)]
        bias = parameter["b_"+str(layer)]
        
        Pre_Activation[layer] = np.matmul(Weight,Activation[layer-1]) + bias
        
        if layer == total_layers-1:
            Activation[layer] = softmax(Pre_Activation[layer]) # activation function for output layer
        else:
            if activation_function == 'sigmoid':
                Activation[layer] = sigmoid(Pre_Activation[layer])
            elif activation_function == 'relu':
                Activation[layer] = relu(Pre_Activation[layer])
            elif activation_function == 'tanh':
                Activation[layer] = tanh(Pre_Activation[layer])
            elif activation_function== 'identity':
                Activation[layer] = identity(Pre_Activation[layer])
                
    y_pred = Activation[total_layers-1]

    return y_pred,Activation,Pre_Activation
    
    




In [310]:
prem=weight_bias_initialize([784,10,20,10])

In [315]:
work=np.array([X_train[0],X_train[1]])

In [317]:
output,_,_=forward_propagation(work.T,prem)

In [318]:
output.shape

(10, 2)

In [316]:
work.shape

(2, 784)

In [305]:
X_train[0]

array([0., 0., 0., ..., 0., 0., 0.])