In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

def load_data(file_path):
    data = pd.read_csv(file_path, header=None)
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values.reshape(-1, 1)
    return X, y

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def initialize_weights_zeros(input_size, hidden_size, output_size):
    # Initialize all weights with zeros
    weights = {
        'W1': np.zeros((input_size, hidden_size)),
        'W2': np.zeros((hidden_size, hidden_size)),
        'W3': np.zeros((hidden_size, output_size))
    }
    return weights

def learning_rate_schedule(gamma0, d, t):
    return gamma0 / (1 + (gamma0 / d) * t)

def forward_propagation(X, weights):
    z1 = np.dot(X, weights['W1'])
    a1 = sigmoid(z1)

    z2 = np.dot(a1, weights['W2'])
    a2 = sigmoid(z2)

    z3 = np.dot(a2, weights['W3'])
    a3 = sigmoid(z3)

    return a1, a2, a3

def backward_propagation(X, y, a1, a2, a3, weights, learning_rate):
    error = a3 - y
    delta3 = error * sigmoid_derivative(a3)
    dW3 = np.outer(a2, delta3)

    delta2 = np.dot(delta3, weights['W3'].T) * sigmoid_derivative(a2)
    dW2 = np.outer(a1, delta2)

    delta1 = np.dot(delta2, weights['W2'].T) * sigmoid_derivative(a1)
    dW1 = np.outer(X, delta1)

    weights['W1'] -= learning_rate * dW1
    weights['W2'] -= learning_rate * dW2
    weights['W3'] -= learning_rate * dW3

    return weights

def objective_function(X, y, weights):
    _, _, predictions = forward_propagation(X, weights)
    error = y - predictions
    return np.mean(error**2) / 2  # Mean squared error

def train_neural_network_sgd_zeros_init(X_train, y_train, X_test, y_test, hidden_size, gamma0, d, epochs):
    input_size = X_train.shape[1]
    output_size = 1

    weights = initialize_weights_zeros(input_size, hidden_size, output_size)
    
    training_errors = []
    test_errors = []

    for epoch in range(epochs):
        # Shuffle training data
        indices = np.arange(len(X_train))
        np.random.shuffle(indices)
        X_train_shuffled = X_train[indices]
        y_train_shuffled = y_train[indices]

        for i in range(len(X_train_shuffled)):
            t = epoch * len(X_train_shuffled) + i + 1
            learning_rate = learning_rate_schedule(gamma0, d, t)

            # Forward propagation
            a1, a2, a3 = forward_propagation(X_train_shuffled[i], weights)

            # Backward propagation
            weights = backward_propagation(X_train_shuffled[i], y_train_shuffled[i], a1, a2, a3, weights, learning_rate)

        # Compute training error
        training_error = objective_function(X_train, y_train, weights)
        training_errors.append(training_error)

        # Compute test error
        test_error = objective_function(X_test, y_test, weights)
        test_errors.append(test_error)

    # Print the final training and test errors
    print(f"Final Training Error for Hidden Size {hidden_size}: {training_error}, Final Test Error: {test_error}")

    return weights, training_errors, test_errors

# Load data

train_file_path = "C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Neural-Networks\\dataset\\bank-note\\train.csv"
test_file_path = "C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Neural-Networks\\dataset\\bank-note\\test.csv"


X_train, y_train = load_data(train_file_path)
X_test, y_test = load_data(test_file_path)

# Specify hyperparameters
hidden_sizes = [5, 10, 25, 50, 100]
gamma0 = 0.1
d = 0.009
epochs = 100

# Train the neural network for different hidden sizes
for hidden_size in hidden_sizes:
    weights_zeros_init, training_errors, test_errors = train_neural_network_sgd_zeros_init(X_train, y_train, X_test, y_test, hidden_size, gamma0, d, epochs)
print(f"Results for Hidden Size {hidden_size}:\nTraining Error: {training_errors[-1]}, Test Error: {test_errors[-1]}\n")


Final Training Error for Hidden Size 5: 0.12493893439206727, Final Test Error: 0.1249342383230362
Final Training Error for Hidden Size 10: 0.12491532206757816, Final Test Error: 0.12490878260116663
Final Training Error for Hidden Size 25: 0.12497751571795877, Final Test Error: 0.12497579832712365
Final Training Error for Hidden Size 50: 0.12495037045820206, Final Test Error: 0.12494656157187552
Final Training Error for Hidden Size 100: 0.12516020842633405, Final Test Error: 0.1251720791431973
Results for Hidden Size 100:
Training Error: 0.12516020842633405, Test Error: 0.1251720791431973

