# Name - SOHIT PATHAK

# Roll No: MA22M019

Neural network with BP and FP

Implement the two layer network for m-samples, n-features as we discussed in class (both FP and BP) and for N layers in the hidden layer. Split the data (you can use the log. reg. data or any other one) and train your network with 80% of the data. Test your network with the test data. Report the evaluation metrics for varying number of layers in the network. Also evaluate one more activation function (ReLU/tanh) other than sigmoid function.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X = iris.data
y = iris.target

In [2]:
# Convert targets to binary classification problem
y = (y > 0).astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def tanh(x):
    return np.tanh(x)


In [4]:
def forward_propagation(X, weights, biases, activation):
    Z1 = np.dot(X, weights[0]) + biases[0]
    A1 = activation(Z1)
    Z2 = np.dot(A1, weights[1]) + biases[1]
    A2 = sigmoid(Z2)
    return Z1, A1, Z2, A2


In [5]:
def compute_cost(A2, Y):
    m = Y.shape[0]
    cost = -(1/m) * np.sum(Y * np.log(A2) + (1-Y) * np.log(1-A2))
    return cost


In [6]:
def backward_propagation(X, Y, Z1, A1, Z2, A2, weights, activation):
    m = X.shape[0]
    dZ2 = A2 - Y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    
    if activation == sigmoid:
        dZ1 = np.dot(dZ2, weights[1].T) * A1 * (1 - A1)
    elif activation == relu:
        dZ1 = np.dot(weights[1].T, dZ2.T) * np.int64(A1 > 0)
    
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    
    return [dW1, dW2], [db1, db2]


In [9]:
def train_and_evaluate(X_train, X_test, y_train, y_test, num_hidden_layers=0,
                        hidden_layer_size=10,
                        learning_rate=0.01,
                        num_iterations=10000,
                        print_cost=False,
                        activation=sigmoid):
    
    # Convert targets to binary classification problem
    y_train = (y_train > 0).astype(int)
    y_test = (y_test > 0).astype(int)

    # Initialize network architecture
    input_size = X_train.shape[1]
    output_size = 1
    hidden_layer_sizes = [hidden_layer_size] * num_hidden_layers
    layer_sizes = [input_size] + hidden_layer_sizes + [output_size]

    # Initialize weights and biases
    weights = []
    biases = []
    
    for i in range(len(layer_sizes)-1):
        w_i = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01
        b_i = np.zeros((1, layer_sizes[i+1]))
        weights.append(w_i)
        biases.append(b_i)

    # Train network using gradient descent
    costs = []
    
    for i in range(num_iterations):
        # Forward propagation
        Zs = []
        As = []
        
        A_prev = X_train
        
        for j in range(len(layer_sizes)-2):
            W_j = weights[j]
            b_j = biases[j]
            Z_j = np.dot(A_prev,W_j) + b_j
            
            if activation == sigmoid:
                A_j = sigmoid(Z_j)
            elif activation == relu:
                A_j = relu(Z_j)
            elif activation == tanh:
                A_j = tanh(Z_j)
            
            Zs.append(Z_j)
            As.append(A_j)
            
            A_prev = A_j
        
        W_last_layer_index = len(layer_sizes)-2
        
        W_last_layer = weights[W_last_layer_index]
        b_last_layer = biases[W_last_layer_index]
        
        Z_last_layer = np.dot(A_prev, W_last_layer) + b_last_layer
        A_last_layer = sigmoid(Z_last_layer)
        
        # Compute cost
        cost = compute_cost(A_last_layer, y_train)
        costs.append(cost)
        

        

        # Print cost
        if i % 1000 == 0 and print_cost:
            print(f"Cost after iteration {i}: {cost}")
    
    # Predict on test set
    _, _, _, A_test = forward_propagation(X_test, weights, biases, activation)
    y_pred = (A_test > 0.5).astype(int)
    
    # Compute evaluation metrics
    accuracy = np.mean(y_pred == y_test)
    precision = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_pred == 1)
    recall = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_test == 1)
    f1_score = 2 * precision * recall / (precision + recall)
    
    # Print evaluation metrics
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1_score}")
    
    return {
        "weights": weights,
        "biases": biases,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1_score
    }



In [10]:
num_hidden_layers_list = [1, 2, 3]
activation_list = [sigmoid, relu, tanh]

for num_hidden_layers in num_hidden_layers_list:
    for activation in activation_list:
        accuracy, precision, recall = train_and_evaluate(X_train, X_test, y_train, y_test, num_hidden_layers=num_hidden_layers, activation=activation) , train_and_evaluate(X_train, X_test, y_train, y_test, num_hidden_layers=num_hidden_layers, activation=activation), train_and_evaluate(X_train, X_test, y_train, y_test, num_hidden_layers=num_hidden_layers, activation=activation)
        print(f"Hidden Layers: {num_hidden_layers}, Activation: {activation.__name__}")
        print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}")


  precision = np.sum((y_pred == 1) & (y_test == 1)) / np.sum(y_pred == 1)


Accuracy: 0.3333333333333333
Precision: nan
Recall: 0.0
F1 Score: nan
Accuracy: 0.6666666666666666
Precision: 20.0
Recall: 30.0
F1 Score: 24.0
Accuracy: 0.3333333333333333
Precision: nan
Recall: 0.0
F1 Score: nan
Hidden Layers: 1, Activation: sigmoid
Accuracy: {'weights': [array([[-0.01453912,  0.0091768 ,  0.00549071, -0.00110845, -0.00685835,
         0.00855496,  0.00504963, -0.00154029,  0.00140594,  0.00039532],
       [ 0.00287803, -0.00922575,  0.00218635, -0.00050692,  0.0177393 ,
        -0.00392756, -0.00640955, -0.0190437 ,  0.00378115,  0.02187042],
       [-0.00212676,  0.00409184, -0.01207853, -0.00210428, -0.0051991 ,
         0.00431893, -0.02215979, -0.00820454,  0.00564126, -0.00489774],
       [-0.00476246,  0.00046182, -0.00040207,  0.01181775,  0.00382744,
         0.00962202,  0.00061638,  0.02187565,  0.00199143,  0.00474243]]), array([[ 0.0055687 ],
       [ 0.00656888],
       [ 0.00132318],
       [-0.00859259],
       [ 0.00204927],
       [-0.00488822],
    

  accuracy = np.mean(y_pred == y_test)


ValueError: operands could not be broadcast together with shapes (30,10) (30,) 