by Suman Kumar Bera
skbera.iitkgp21@gmail.com
for Tutorial of AMLBS on 04/09/23

Problem taken from: https://www.javatpoint.com/pytorch-backpropagation-process-in-deep-neural-network

In [2]:
## NN

# Import the necessary libraries
import numpy as np

# define the class
class NN:

  def __init__(self, input_layer_size = 2, hidden_layer_size = 2, output_layer_size = 2, lr = 0.5, epoches = 1000):  # define the constructor
    self.input_layer_size = input_layer_size
    self.hidden_layer_size = hidden_layer_size
    self.output_layer_size = output_layer_size
    self.lr = lr
    self.number_of_epoches = epoches

    #initialize the weights
    #1. by random numbers
    # self.W1 = np.random.randn(self.input_layer_size, self.hidden_layer_size)
    # self.W2 = np.random.randn(self.hidden_layer_size, self.output_layer_size)
    # print(f'W1 = {self.W1}')
    # print(f'W2 = {self.W2}')

    #2. with zeros
    # self.W1 = np.zeros((self.input_layer_size, self.hidden_layer_size))
    # self.W2 = np.zeros((self.hidden_layer_size, self.output_layer_size))
    # print(f'W1 = {self.W1}')
    # print(f'W2 = {self.W2}')

    #3. specific numbers
    self.W1 = np.array([[0.15, 0.25], [0.20, 0.30]])  # ([[w1, w3], [w2, w4]])
    self.W2 = np.array([[0.40, 0.50], [0.45, 0.55]])
    # print(f'W1 = \n{self.W1}')
    # print(f'W2 = \n{self.W2}')

    # bias
    self.b1 = 0.35
    self.b2 = 0.60

  # define the activation function
  def activation(self, x):
    return (1 / (1 + np.exp(-x)))

  # forward pass
  def forward(self, X):
    self.hidden = self.activation(np.dot(X, self.W1) + self.b1)  # H = activation(X*W1 + b)
    self.output = self.activation(np.dot(self.hidden, self.W2) + self.b2)  # OP = activation(H*W2 + b)

    # print(f'H = {self.hidden}')
    # print(f'OP = {self.output}')
    return self.output

  # back propagation
  def back_prop(self, X, y):
    output = self.forward(X)
    error = np.sum((y - output)**2/2)
    # print(f'Error = {error}')
    self.W2 -= self.lr * (np.dot(self.hidden.T, np.multiply(-(y - output), output * (1 - output)))) # W2 = W2 - alpha * (H * ((y-OP) * (OP * (OP - 1))))
    print(f'New W2 Weights: {self.W2}')

    # self.W1 -= ?

  # training; update the weights for the given number of epoches
  # def train(self, X, y):
  #   for _ in range(self.number_of_epoches):
  #     self.back_prop(X, y)


# create an instance of the class
model = NN()
# define the input and output
X = np.array([[0.05, 0.10]]) #input
y = np.array([[0.01, 0.99]]) #output
model.forward(X)
# model.back_prop(X, y)

array([[0.75136507, 0.77292847]])

<h2>Solution with some updated code

Major updates:
1. Implemented Backpropagation
2. Implemented Scaling

In [27]:
# Importing necessary dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler

# Modified Neural Network class to take user-defined input/output layers
class FlexibleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        
        # Initializing weights
        self.weights_input_hidden = np.random.rand(input_size, hidden_size) - 0.5
        self.weights_hidden_output = np.random.rand(hidden_size, output_size) - 0.5
        
        # Initalizing biases
        self.bias_hidden = np.random.rand(1, hidden_size) - 0.5
        self.bias_output = np.random.rand(1, output_size) - 0.5
        
        # Learning rate
        self.learning_rate = learning_rate

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X):
        # Forward pass
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.sigmoid(self.hidden_input)
        
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.output = self.sigmoid(self.output_input)
        
        return self.output
    
    def backward(self, X, y, output):
        # Backward pass
        output_error = y - output
        output_delta = output_error * self.sigmoid_derivative(output)
        
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_output)
        
        # Updating weights and biases
        self.weights_hidden_output += self.learning_rate * np.dot(self.hidden_output.T, output_delta)
        self.bias_output += self.learning_rate * np.sum(output_delta, axis=0, keepdims=True)
        
        self.weights_input_hidden += self.learning_rate * np.dot(X.T, hidden_delta)
        self.bias_hidden += self.learning_rate * np.sum(hidden_delta, axis=0, keepdims=True)

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)

    def predict(self, X):
        return self.forward(X)


# Parameters for training
input_size = X_train.shape[1]
output_size = 1  # Single output for regression (house price)
epochs = 1000

# Function to configure and train the model based on user input and calculate MSE for training and validation
def configure_and_train_nn(input_neurons, hidden_neurons, output_neurons, learning_rate, k_folds=5, epochs=1000):
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
    training_losses = []
    validation_losses = []
    fold_validation_losses = []  # This will store the validation losses for each fold
    
    for train_idx, val_idx in kfold.split(X_train):
        X_fold_train, X_fold_val = X_train[train_idx], X_train[val_idx]
        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]
        
        # Initializing the neural network with user-defined parameters
        nn = FlexibleNeuralNetwork(input_neurons, hidden_neurons, output_neurons, learning_rate)
        
        # Training the model
        nn.train(X_fold_train, y_fold_train, epochs)
        
        # Predicting on the training set
        train_predictions = nn.predict(X_fold_train)
        train_loss = np.mean((y_fold_train - train_predictions)**2)  # MSE for training set
        training_losses.append(train_loss)
        
        # Predicting on the validation set
        val_predictions = nn.predict(X_fold_val)
        val_loss = np.mean((y_fold_val - val_predictions)**2)  # MSE for validation set
        validation_losses.append(val_loss)
        fold_validation_losses.append(val_loss)
    
    # Calculating the average training and validation losses across all folds
    avg_train_loss = np.mean(training_losses)
    avg_val_loss = np.mean(validation_losses)

    # Returning the original validation losses (fold-specific), average training and validation losses
    return fold_validation_losses, avg_train_loss, avg_val_loss


# Importing the data
housing_data = pd.read_csv('housing.csv')

# Preparing data for training
X = housing_data[['RM', 'LSTAT', 'PTRATIO']].values
y = housing_data[['MEDV']].values / 100000  # Scaling the output target

# Scaling the input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and validation sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=52)

# Example of how the user might input parameters to configure the network
input_neurons = X_train.shape[1]  # 3 input features (RM, LSTAT, PTRATIO)
output_neurons = 1  # Single output for regression


# Parameters for case (a)
hidden_neurons_a = 3
learning_rate_a = 0.01

# 5-fold cross-validation for case (a)
fold_val_losses_a_5folds, avg_train_loss_a_5folds, avg_val_loss_a_5folds = configure_and_train_nn(
    input_neurons, hidden_neurons_a, output_neurons, learning_rate_a)

# 10-fold cross-validation for case (a)
fold_val_losses_a_10folds, avg_train_loss_a_10folds, avg_val_loss_a_10folds = configure_and_train_nn(
    input_neurons, hidden_neurons_a, output_neurons, learning_rate_a, k_folds=10)


# Parameters for case (b)
hidden_neurons_b = 4
learning_rate_b = 0.001

# 5-fold cross-validation for case (b)
fold_val_losses_b_5folds, avg_train_loss_b_5folds, avg_val_loss_b_5folds = configure_and_train_nn(
    input_neurons, hidden_neurons_b, output_neurons, learning_rate_b)

# 10-fold cross-validation for case (b)
fold_val_losses_b_10folds, avg_train_loss_b_10folds, avg_val_loss_b_10folds = configure_and_train_nn(
    input_neurons, hidden_neurons_b, output_neurons, learning_rate_b, k_folds=10)


# Parameters for case (c)
hidden_neurons_c = 5
learning_rate_c = 0.0001

# 5-fold cross-validation for case (c)
fold_val_losses_c_5folds, avg_train_loss_c_5folds, avg_val_loss_c_5folds = configure_and_train_nn(
    input_neurons, hidden_neurons_c, output_neurons, learning_rate_c)

# 10-fold cross-validation for case (c)
fold_val_losses_c_10folds, avg_train_loss_c_10folds, avg_val_loss_c_10folds = configure_and_train_nn(
    input_neurons, hidden_neurons_c, output_neurons, learning_rate_c, k_folds=10)


# Printing the 5-fold cross-validation results
print('5-fold cross-validation results:')
print(f'Case (a) - Training MSE: {avg_train_loss_a_5folds}, Validation MSE: {avg_val_loss_a_5folds}')
print(f'Case (b) - Training MSE: {avg_train_loss_b_5folds}, Validation MSE: {avg_val_loss_b_5folds}')
print(f'Case (c) - Training MSE: {avg_train_loss_c_5folds}, Validation MSE: {avg_val_loss_c_5folds}')

print('\n')

# Printing the 10-fold cross-validation results
print('10-fold cross-validation results:')
print(f'Case (a) - Training MSE: {avg_train_loss_a_10folds}, Validation MSE: {avg_val_loss_a_10folds}')
print(f'Case (b) - Training MSE: {avg_train_loss_b_10folds}, Validation MSE: {avg_val_loss_b_10folds}')
print(f'Case (c) - Training MSE: {avg_train_loss_c_10folds}, Validation MSE: {avg_val_loss_c_10folds}')


5-fold cross-validation results:
Case (a) - Training MSE: 15.57576277056953, Validation MSE: 15.572468716645304
Case (b) - Training MSE: 15.577722421522335, Validation MSE: 15.574440397281316
Case (c) - Training MSE: 15.598655449682076, Validation MSE: 15.595456408730433


10-fold cross-validation results:
Case (a) - Training MSE: 15.57592712368706, Validation MSE: 15.572950108195789
Case (b) - Training MSE: 15.577612566030677, Validation MSE: 15.574674562404576
Case (c) - Training MSE: 15.595804760749497, Validation MSE: 15.592842765086981


# Potential reasons for not converging:
1. Inadequate learning rate (as asked in the problem statement)
2. Insufficient epochs
3. Network Structure (as asked in the problem statement)
4. Sigmoid activation function (as advised in the code above)
5. Poor weights initialization (as advised in the code above)

In [2]:
# difference between np.dot and np.multiply

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])

print(f'np.dot = \n', np.dot(a, b))
print(f'np.multiply = \n', np.multiply(a, b))

np.dot = 
 [[19 22]
 [43 50]]
np.multiply = 
 [[ 5 12]
 [21 32]]


In [3]:
# Dataset
import pandas as pd
import numpy as np


df = pd.read_csv('housing.csv')
df

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0
...,...,...,...,...
484,6.593,9.67,21.0,470400.0
485,6.120,9.08,21.0,432600.0
486,6.976,5.64,21.0,501900.0
487,6.794,6.48,21.0,462000.0


In [4]:
X_data = df.drop(columns=['MEDV'])
y_data = df['MEDV']
X_data = X_data.to_numpy()
y_data = y_data.to_numpy()
X_data = (X_data-np.min(X_data))/(np.max(X_data)-np.min(X_data))
y_data = (y_data-np.min(y_data))/(np.max(y_data)-np.min(y_data))
print(X_data.shape)
print(y_data.shape)

(489, 3)
(489,)
