<a href="https://colab.research.google.com/github/peeush-agarwal/ml-projects/blob/master/FeedForwardNN/FeedForwardNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[Source](https://github.com/Niranjankumar-c/Feedforward_NeuralNetworrks/blob/master/FeedForwardNetworks/FeedForwardNeuralNetwork.ipynb)

# Setup

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm import tqdm_notebook

from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs

In [0]:
class SigmoidNeuron:
  def __init__(self):
    self.w = None
    self.b = None

  def perceptron(self, x):
    return np.dot(x, self.w) + self.b

  def sigmoid(self, x):
    return 1.0/(1.0 + np.exp(-1 * self.perceptron(x)))

  def grad_w_mse(self, x, y):
    y_pred = self.sigmoid(x)
    return (y_pred - y) * y_pred * (1 - y_pred) * x
  
  def grad_b_mse(self, X, y):
    y_pred = self.sigmoid(X)
    return (y_pred - y) * y_pred * (1 - y_pred)
  
  def grad_w_ce(self, x, y):
    y_pred = self.sigmoid(x)
    return (y_pred - y) * x

  def grad_b_ce(self, x, y):
    y_pred = self.sigmoid(x)
    return y_pred - y

  def fit(self, X, Y, epochs = 1, lr = 0.01, loss_fn='mse', display_loss=False):
    m = X.shape[1]
    self.w = np.ones(m)
    self.b = 0

    if display_loss:
      loss = {}
    
    if loss_fn == 'mse':
      grad_w = self.grad_w_mse
      grad_b = self.grad_b_mse
      calc_loss = mean_squared_error
      plt_ylabel = 'Mean Squared Error'
    elif loss_fn == 'ce':
      grad_w = self.grad_w_ce
      grad_b = self.grad_b_ce
      calc_loss = log_loss
      plt_ylabel = 'Cross Entropy loss'

    for i in tqdm_notebook(np.arange(epochs), total=epochs, unit='epoch'):
      dw = 0
      db = 0
      for x, y in zip(X, Y):
        dw += grad_w(x, y)
        db += grad_b(x, y)
      
      self.w -= lr * dw/m
      self.b -= lr * db/m

      if display_loss:
        Y_pred = self.sigmoid(X)
        loss[i] = calc_loss(Y, Y_pred)
    
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      plt.ylabel(plt_ylabel)
      plt.show()
  
  def predict(self, X):
    return self.sigmoid(X)


In [0]:
my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ['red', 'yellow', 'green'])

# Generate data

In [0]:
data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
print(data.shape, labels.shape)

In [0]:
plt.scatter(data[:,0],data[:, 1], c=labels, cmap=my_cmap)
plt.show()

In [0]:
# Binarise labels
labels_orig = labels
labels = np.mod(labels_orig, 2)

In [0]:
plt.scatter(data[:,0],data[:, 1], c=labels, cmap=my_cmap)
plt.show()

In [0]:
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
print(X_train.shape, X_val.shape, Y_train.shape, Y_val.shape)

# SN classification

In [0]:
sn = SigmoidNeuron()
sn.fit(X_train, Y_train, epochs=1000, lr = 1, display_loss=True)

In [0]:
Y_pred_train = sn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype('int').ravel()
Y_pred_val = sn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype('int').ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)

print('Training accuracy=', round(accuracy_train, 2))
print('Validation accuracy=', round(accuracy_val, 2))

In [0]:
plt.scatter(X_train[:, 0], X_train[:, 1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train - Y_train)+.2))
plt.show() # Plot show 2 features with Predicted value as color and size=3 if correct predict and 18 if not correct predict

# First FeedForward Network

In [0]:
class FirstFFNetwork:
  def __init__(self):
    self.w1 = np.random.randn()
    self.w2 = np.random.randn()
    self.w3 = np.random.randn()
    self.w4 = np.random.randn()
    self.w5 = np.random.randn()
    self.w6 = np.random.randn()
    self.b1 = 0
    self.b2 = 0
    self.b3 = 0

  def sigmoid(self, x):
    return 1.0 / (1.0 + np.exp(-x))

  def forward_pass(self, x):
    self.x1, self.x2 = x
    self.a1 = self.w1 * self.x1 + self.w2 * self.x2 + self.b1
    self.h1 = self.sigmoid(self.a1)
    self.a2 = self.w3 * self.x1 + self.w4 * self.x2 + self.b2
    self.h2 = self.sigmoid(self.a2)
    self.a3 = self.w5 * self.h1 + self.w6 * self.h2 + self.b3
    self.h3 = self.sigmoid(self.a3)
    return self.h3
  
  def grad(self, x, y):
    self.forward_pass(x)

    self.dw5 = (self.h3-y) * self.h3*(1-self.h3) * self.h1
    self.dw6 = (self.h3-y) * self.h3*(1-self.h3) * self.h2
    self.db3 = (self.h3-y) * self.h3*(1-self.h3)
    
    self.dw1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x1
    self.dw2 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x2
    self.db1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1)
  
    self.dw3 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x1
    self.dw4 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x2
    self.db2 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2)

  def fit(self, X, Y, epochs = 1, lr = 1, initialise=True, display_loss = False):
    if initialise:
      self.w1 = np.random.randn()
      self.w2 = np.random.randn()
      self.w3 = np.random.randn()
      self.w4 = np.random.randn()
      self.w5 = np.random.randn()
      self.w6 = np.random.randn()
      self.b1 = 0
      self.b2 = 0
      self.b3 = 0
    
    if display_loss:
      loss = {}

    for i in tqdm_notebook(range(epochs), total=epochs, unit='epoch'):
      dw1, dw2, dw3, dw4, dw5, dw6, db1, db2, db3 = [0] * 9
      for x, y in zip(X, Y):
        self.grad(x, y)
        dw1 += self.dw1
        dw2 += self.dw2
        dw3 += self.dw3
        dw4 += self.dw4
        dw5 += self.dw5
        dw6 += self.dw6
        db1 += self.db1
        db2 += self.db2
        db3 += self.db3
      
      m = X.shape[1]

      self.w1 -= lr * dw1/m
      self.w2 -= lr * dw2/m
      self.w3 -= lr * dw3/m
      self.w4 -= lr * dw4/m
      self.w5 -= lr * dw5/m
      self.w6 -= lr * dw6/m
      self.b1 -= lr * db1/m
      self.b2 -= lr * db2/m
      self.b3 -= lr * db3/m

      if display_loss:
        Y_pred = self.predict(X)
        loss[i] = mean_squared_error(Y, Y_pred)
    
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      plt.ylabel('Mean Squared error')
      plt.show()
  
  def predict(self, X):
    Y_pred = []
    for x in X:
      Y_pred.append(self.forward_pass(x))
    return np.asarray(Y_pred)


# FFNetwork Classification

In [0]:
ffn = FirstFFNetwork()
ffn.fit(X_train, Y_train, epochs=2000, lr=0.01, display_loss=True)

In [0]:
Y_pred_train = ffn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype('int').ravel()
Y_pred_val = ffn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype('int').ravel()
accuracy_train = accuracy_score(Y_train, Y_pred_binarised_train)
accuracy_val = accuracy_score(Y_val, Y_pred_binarised_val)

print('Accuracy train=',round(accuracy_train, 2))
print('Accuracy val=',round(accuracy_val, 2))

In [0]:
plt.scatter(X_train[:,0], X_train[:, 1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train - Y_train)+.2))
plt.show()

In [0]:
class FFSNNetwork:
  def __init__(self, n_inputs, hidden_sizes=[2]):
    self.nx = n_inputs
    self.ny = 1
    self.nh = len(hidden_sizes)
    self.sizes = [self.nx] + hidden_sizes + [self.ny] 

    self.W = {}
    self.B = {}
    for i in range(self.nh+1):
      self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
      self.B[i+1] = np.zeros((1, self.sizes[i+1]))
      
  def sigmoid(self, x):
    return 1.0 / (1.0 + np.exp(-x))

  def forward_pass(self, x):
    self.A = {}
    self.H = {}
    self.H[0] = x.reshape(1, -1)
    for i in range(self.nh+1):
      self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
      self.H[i+1] = self.sigmoid(self.A[i+1])
    return self.H[self.nh+1]
  
  def grad_sigmoid(self, x):
    return x*(1-x) 
  
  def cross_entropy(self,label,pred):
    yl=np.multiply(pred,label)
    yl=yl[yl!=0]
    yl=-np.log(yl)
    yl=np.mean(yl)
    return yl
 
  def grad(self, x, y):
    self.forward_pass(x)
    self.dW = {}
    self.dB = {}
    self.dH = {}
    self.dA = {}
    L = self.nh + 1
    self.dA[L] = (self.H[L] - y)
    for k in range(L, 0, -1):
      self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
      self.dB[k] = self.dA[k]
      self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
      self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1])) 

  def fit(self, X, Y, epochs=1, learning_rate=1, initialize=True, display_loss=False):
      
    if display_loss:
      loss = {}
      
    if initialize:
      for i in range(self.nh+1):
        self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
        self.B[i+1] = np.zeros((1, self.sizes[i+1]))
        
    for epoch in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
      dW = {}
      dB = {}
      for i in range(self.nh+1):
        dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
        dB[i+1] = np.zeros((1, self.sizes[i+1]))
      for x, y in zip(X, Y):
        self.grad(x, y)
        for i in range(self.nh+1):
          dW[i+1] += self.dW[i+1]
          dB[i+1] += self.dB[i+1]
                  
      m = X.shape[1]
      for i in range(self.nh+1):
        self.W[i+1] -= learning_rate * (dW[i+1]/m)
        self.B[i+1] -= learning_rate * (dB[i+1]/m)
        
      if display_loss:
        Y_pred = self.predict(X) 
        loss[epoch] = self.cross_entropy(Y, Y_pred)
    
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      plt.ylabel('CE')
      plt.show()


  def predict(self, X):
    Y_pred = []
    for x in X:
      y_pred = self.forward_pass(x)
      Y_pred.append(y_pred)
    return np.array(Y_pred).squeeze()
 

In [0]:
ffsnn = FFSNNetwork(2,[2,3])
ffsnn.fit(X_train, Y_train, epochs=2000, learning_rate=.001, display_loss=True)

#ffsnn.fit(X_train, Y_train, epochs=1000, initialize=False, learning_rate=.001, display_loss=True)

In [0]:
Y_pred_train = ffsnn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype('int').ravel()
Y_pred_val = ffsnn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype('int').ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)

print('Accuracy train=',round(accuracy_train, 2))
print('Accuracy val=',round(accuracy_val, 2))

In [0]:
plt.scatter(X_train[:,0],X_train[:,1],c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()

# Multiclass Classification

In [0]:
class FFSN_MultiClass:
  def __init__(self, n_inputs, n_outputs, hidden_layers=[2]):
    self.nx = n_inputs
    self.ny = n_outputs
    self.nh = len(hidden_layers)
    self.sizes = [self.nx] + hidden_layers + [self.ny]

    self.W = {}
    self.B = {}
    for i in range(self.nh+1):
      self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
      self.B[i+1] = np.zeros((1, self.sizes[i+1]))

  def sigmoid(self, x):
    return 1.0 / (1.0 + np.exp(-1 * x))

  def softmax(self, x):
    exps = np.exp(x)
    return exps / np.sum(exps)

  def forward_pass(self, x):
    self.A = {}
    self.H = {}

    self.H[0] = x.reshape(1, -1)
    for i in range(self.nh+1):
      self.A[i+1] = np.matmul(self.H[i], self.W[i+1]) + self.B[i+1]
      self.H[i+1] = self.sigmoid(self.A[i+1])
    
    return self.H[self.nh+1]

  def grad_sigmoid(self, x):
    return x*(1-x) 
    
  def cross_entropy(self,label,pred):
    yl=np.multiply(pred,label)
    yl=yl[yl!=0]
    yl=-np.log(yl)
    yl=np.mean(yl)
    return yl
 
  def grad(self, x, y):
    self.forward_pass(x)
    self.dW = {}
    self.dB = {}
    self.dA = {}
    self.dH = {}
    L = self.nh + 1
    self.dA[L] = (self.H[L] - y)
    for k in range(L, 0, -1):
      self.dW[k] = np.matmul(self.H[k-1].T, self.dA[k])
      self.dB[k] = self.dA[k]
      self.dH[k-1] = np.matmul(self.dA[k], self.W[k].T)
      self.dA[k-1] = np.multiply(self.dH[k-1], self.grad_sigmoid(self.H[k-1]))
    
  def fit(self, X, Y, epochs=100, lr=0.001, initialize=True, display_loss=False):
    
    if display_loss:
      loss = {}
      
    if initialize:
      for i in range(self.nh+1):
        self.W[i+1] = np.random.randn(self.sizes[i], self.sizes[i+1])
        self.B[i+1] = np.zeros((1, self.sizes[i+1]))
    
    for e in tqdm_notebook(range(epochs), total=epochs, unit='epoch'):
      dW = {}
      dB = {}
      for i in range(self.nh+1):
        dW[i+1] = np.zeros((self.sizes[i], self.sizes[i+1]))
        dB[i+1] = np.zeros((1, self.sizes[i+1]))
      for x,y in zip(X, Y):
        self.grad(x,y)
        for i in range(self.nh+1):
          dW[i+1] += self.dW[i+1]
          dB[i+1] += self.dB[i+1]
      
      m = X.shape[1]
      for i in range(self.nh+1):
        self.W[i+1] -= lr * (dW[i+1]/m)
        self.B[i+1] -= lr * (dB[i+1]/m)
      
      if display_loss:
        Y_pred = self.predict(X)
        loss[e] = self.cross_entropy(Y, Y_pred)
    
    if display_loss:
      plt.plot(loss.values())
      plt.xlabel('Epochs')
      plt.ylabel('Cross Entropy')
      plt.show()
  
  def predict(self, X):
    Y_pred = []
    for x in X:
      y_pred = self.forward_pass(x)
      Y_pred.append(y_pred)
    return np.array(Y_pred).squeeze()

In [0]:
X_train, X_val, Y_train, Y_val = train_test_split(data, labels_orig, stratify=labels_orig, random_state=0)
print(X_train.shape, X_val.shape, Y_train.shape, Y_val.shape)
print(labels_orig.shape)

In [0]:
enc = OneHotEncoder()
# Converts 0 => (1 0 0 0), 1 => (0 1 0 0), 2 => (0 0 1 0), 3 => (0 0 0 1)
y_OH_train = enc.fit_transform(np.expand_dims(Y_train, 1)).toarray()
y_OH_val = enc.fit_transform(np.expand_dims(Y_val, 1)).toarray()
print(y_OH_train.shape, y_OH_val.shape)
print(y_OH_train[:5])

In [0]:
ffsn_multi = FFSN_MultiClass(2, 4, [2, 3])
ffsn_multi.fit(X_train, y_OH_train, epochs=2000, lr=0.005, display_loss=True)

In [0]:
Y_pred_train = ffsn_multi.predict(X_train)
Y_pred_train = np.argmax(Y_pred_train, 1)

Y_pred_val = ffsn_multi.predict(X_val)
Y_pred_val = np.argmax(Y_pred_val, 1)

accuracy_train = accuracy_score(Y_pred_train, Y_train)
accuracy_val = accuracy_score(Y_pred_val, Y_val)

print('Accuracy train=',round(accuracy_train, 2))
print('Accuracy val=',round(accuracy_val, 2))

In [0]:
plt.scatter(X_train[:,0],X_train[:,1],c=Y_pred_train, cmap=my_cmap, s=15*(np.abs(Y_pred_train-Y_train)+.2))
plt.show()