In [27]:

import numpy as np
import matplotlib.pyplot as plt
from scipy.special import xlogy
import h5py
from autograd import grad
from autograd import elementwise_grad as egrad
import autograd.numpy as jnp
from sklearn.metrics import accuracy_score

In [28]:
class NN_Layer:
    def __init__(self, input_size, output_size,activation_fn):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size[1], output_size[1]) / np.sqrt(input_size[1] + output_size[1])
        self.bias = np.random.randn(output_size[1]) / np.sqrt(output_size[1])
        self.Z = None
        self.A = None
        self.activation_fn = activation_fn
        self.dL_dw = None
        self.dL_db = None

    def forward(self, input):
        self.input = input
        self.Z = jnp.dot(input, self.weights) + jnp.tile(self.bias,(input.shape[0],1))
        # print(self.Z.shape)
        # print(self.weights.shape)
        # print(self.bias.shape)
        self.A = self.get_activation_function(self.activation_fn)(self.Z)
        # print(self.A)

        return self.A

    def sigmoid(self,x):  
        a = -jnp.array(x,dtype=float)
        b = jnp.exp(a)
        return 1/(1+b)

    def relu(self,x):
        return jnp.maximum(x, 0.0)

    def softmax(self,x):
      return jnp.exp(x)/jnp.sum(jnp.exp(x))

    def identity(self,x):
      return x

    def get_activation_function(self,name):

      if name=='relu':
        return self.relu
      elif name=='sigmoid':
        return self.sigmoid
      elif name=='softmax':
        return self.softmax
      elif name=='identity':
        return self.identity
  
    def CrossE_multi(self, A,y):
      CrossE = 0
      for k in range(n_classes):
        CrossE -= jnp.dot((y == k).astype(float),jnp.log(A[:,k]))   
      return CrossE
    
    def rmse(self,A,y):
       return jnp.sum((jnp.square(jnp.subtract(A,y.reshape(-1,1)))))/len(A)


    
    

In [29]:
def load_dig_dataset():
    import numpy as np
    from sklearn.datasets import load_digits
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import MinMaxScaler
    import pandas as pd
    scaler = MinMaxScaler()
    
    n_labeled = 5
    n_classes = 10
    digits = load_digits(n_class=n_classes)  # consider binary case
    X = digits.data
    X = scaler.fit_transform(X)
    y = digits.target

    X = pd.DataFrame(X)
    y = pd.Series(y)
  
    return X,y,n_classes
X,y,n_classes = load_dig_dataset()

In [30]:
def forplusloss(weights, bias, X_train, y_train,network): #for back propagation using autograd
  input = X_train
  # print("HERE")
  for i in range(len(network)):
    Z = jnp.dot(input, jnp.array(weights[i])) + jnp.array(jnp.tile(bias[i],(input.shape[0],1)))

    A = network[i].get_activation_function(network[i].activation_fn)(Z)
    input = A
  loss = network[-1].CrossE_multi(A,y_train)
  return loss

In [31]:
def predict(network, input):
  output = input
  for layer in network:
      output = layer.forward(output)

  return output

In [32]:

accuracies = []
from sklearn.model_selection import KFold
kf = KFold(n_splits=3,shuffle=False)
kf.split(X)    

for train_index, test_index in kf.split(X):
    # Split train-test
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    X_train, X_test,y_train, y_test = np.array(X_train),np.array(X_test),np.array(y_train),np.array(y_test)
    from tqdm import trange


    network = [
        NN_Layer((X_train.shape[0],X_train.shape[1]), (X_train.shape[0],20),'sigmoid'),
        NN_Layer((X_train.shape[0],20),(X_train.shape[0],n_classes),'softmax'),
    ]
    m =  X_train.shape[0]
    epochs =300
    learning_rate = 2
  
    for i in trange(epochs):
      j= 0
      weights = []
      bias=[]
      for layer in network:
        # print(i,j)
        if j==0:
          A_value = layer.forward(X_train)
          weights.append(layer.weights)
          bias.append(layer.bias)
        else:
          A_value = layer.forward(A_value)
          weights.append(layer.weights)
          bias.append(layer.bias)
        j+=1

      dL_dw = egrad(forplusloss,0)(weights,bias,X_train,y_train,network)
      dL_db = egrad(forplusloss,1)(weights,bias,X_train,y_train,network)
      for i in range(len(network)):
        network[i].weights -= learning_rate * dL_dw[i]/len(X_train)
        network[i].bias -= learning_rate * dL_db[i]/len(X_train)
      
    Z = predict(network,X_test)
    A = np.argmax(Z,axis=1)

    acc = np.mean(A == y_test)
    accuracies.append(acc)
     
print("Accuracies for 3 fold model are ",accuracies)


100%|██████████| 300/300 [00:11<00:00, 26.84it/s]
100%|██████████| 300/300 [00:11<00:00, 27.02it/s]
100%|██████████| 300/300 [00:11<00:00, 26.99it/s]

Accuracies for 3 fold model are  [0.9515859766277128, 0.9415692821368948, 0.9131886477462438]





In [33]:
import pandas as pd
def load_bos_dataset():
    import numpy as np
    from sklearn.datasets import load_boston
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    boston = load_boston()  # consider binary case
    X = boston.data   
    X = scaler.fit_transform(X)
    y = boston.target
    X = pd.DataFrame(X)
    y = pd.Series(y)
  
    return X,y
X,y= load_bos_dataset()


In [34]:
def forplusloss(weights, bias, X_train, y_train,network): #for back propagation using autograd
  input = X_train
  for i in range(len(network)):
    Z = jnp.dot(input, jnp.array(weights[i])) + jnp.array(jnp.tile(bias[i],(input.shape[0],1)))

    A = network[i].get_activation_function(network[i].activation_fn)(Z)
    input = A
  loss = network[-1].rmse(A,y_train)
  return loss

In [35]:
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

In [36]:

errors = []
from sklearn.model_selection import KFold
kf = KFold(n_splits=3,shuffle=False)
kf.split(X)    

for train_index, test_index in kf.split(X):

  from tqdm import trange


  network = [
      NN_Layer((X_train.shape[0],X_train.shape[1]), (X_train.shape[0],20),'relu'),
      NN_Layer((X_train.shape[0],20),(X_train.shape[0],1),'relu'),
  ]
  m =  X_train.shape[0]
  # print(m)
  epochs =300
  learning_rate = 2

  for i in trange(epochs):
    j= 0
    weights = []
    bias=[]
    for layer in network:
      # print(i,j)
      if j==0:
        A_value = layer.forward(X_train)
        weights.append(layer.weights)
        bias.append(layer.bias)

      else:
        A_value = layer.forward(A_value)
        weights.append(layer.weights)
        bias.append(layer.bias)
      j+=1


    dL_dw = egrad(forplusloss,0)(weights,bias,X_train,y_train,network)
    dL_db = egrad(forplusloss,1)(weights,bias,X_train,y_train,network)
    for i in range(len(network)):
      network[i].weights -= learning_rate * dL_dw[i]/len(X_train)
      network[i].bias -= learning_rate * dL_db[i]/len(X_train)
    
  A = predict(network,X_test)
  error = (np.square(np.subtract(A,y_test.reshape(-1,1))).mean())**0.5
  errors.append(error)

print("RMSE for 3 fold model are ",errors)


100%|██████████| 300/300 [00:05<00:00, 51.22it/s]
100%|██████████| 300/300 [00:05<00:00, 51.19it/s]
100%|██████████| 300/300 [00:05<00:00, 51.12it/s]

RMSE for 3 fold model are  [2.5218012750073453, 2.4725650701693174, 2.4923906103031412]





Reference: https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65#:~:text=FC%20layers%20are%20the%20most,connected%20to%20every%20output%20neurons