# CS 559  Project 2: Neural Network GridSearch Implementation
### Name : Shrey Shah
### CWID : 20009523

Task 1 [80]: Neural Network GridSearch Implementation
Implement neural network grid search from scratch that tunes the hyperparameters (Number of
neurons, hidden layers, batch size, etc.).
 Other packages than NumPy are allowed but not in Neural Network Algorithm



Task 2 [20]: Train Model
*   Train your Neural Network GridSearch algorithm and report the result.





In [1]:
import numpy as np
#Define the neural network architecture
class NeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate):
        # Initialize hyperparameters
        self.input_size = input_size
        self.hidden_sizes = list(hidden_sizes)  # Convert tuple to list
        self.output_size = output_size
        self.learning_rate = learning_rate
        
        # Initialize weights and biases for each layer
        sizes = [input_size] + self.hidden_sizes + [output_size]
        self.weights = [np.random.randn(sizes[i], sizes[i+1]) for i in range(len(sizes)-1)]
        self.biases = [np.random.randn(sizes[i+1]) for i in range(len(sizes)-1)]
        
    def train(self, X, y, epochs, batch_size):
        # Train the neural network using mini-batch stochastic gradient descent
        for epoch in range(epochs):
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            for i in range(0, X.shape[0], batch_size):
                batch_indices = indices[i:i+batch_size]
                X_batch = X[batch_indices]
                y_batch = y[batch_indices]
                self._update_weights(X_batch, y_batch)
                
    def _update_weights(self, X, y):
        # Compute gradients of loss w.r.t. weights and biases using backpropagation
        activations = [X]
        for i in range(len(self.weights)):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            a = self._sigmoid(z)
            activations.append(a)
        error = (activations[-1] - y) / y.shape[0]
        deltas = [error * self._sigmoid_derivative(activations[-1])]
        for i in range(len(self.weights)-1, 0, -1):
            delta = np.dot(deltas[-1], self.weights[i].T) * self._sigmoid_derivative(activations[i])
            deltas.append(delta)
        deltas.reverse()
        
        # Update weights and biases using gradients
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * np.dot(activations[i].T, deltas[i])
            self.biases[i] -= self.learning_rate * np.sum(deltas[i], axis=0)
        
    def predict(self, X):
        # Use the trained neural network to predict classes for new data
        activations = [X]
        for i in range(len(self.weights)):
            z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
            a = self._sigmoid(z)
            activations.append(a)
        y_pred = activations[-1]
        return y_pred
    
    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def _sigmoid_derivative(self, a):
        return a * (1 - a)




In [2]:
# Define the hyperparameters to tune
input_size = 784
hidden_sizes = [(3,), (5,), (10,),(15,),(100,)]
output_size = 10
epochs = 10



from keras.datasets import mnist

# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Preprocess the data
X_train = X_train.reshape(-1, 784) / 255.0
X_test = X_test.reshape(-1, 784) / 255.0
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

X = X_train
y = y_train


# Define the grid of hyperparameters to search over
grid = {'hidden_sizes': hidden_sizes, 'batch_size': [16, 32,64,128,256,512,1024], 'learning_rate': [1,0.1, 0.01,0.001,0.0001]}

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# Define a function to perform grid search
def grid_search(X, y, X_val,y_val,grid, epochs):
    best_score = 0.0
    best_params = None
    for hidden_sizes in grid['hidden_sizes']:
        for batch_size in grid['batch_size']:
            for learning_rate in grid['learning_rate']:
                nn = NeuralNetwork(input_size, hidden_sizes, output_size, learning_rate)
                nn.train(X, y, epochs, batch_size)
                y_pred = nn.predict(X_val)
                score = np.mean(np.argmax(y_val ,axis=1) == np.argmax(y_pred, axis=1))
                print(f'hidden_sizes={hidden_sizes}, batch_size={batch_size}, learning_rate={learning_rate}, score={score}')
                if score > best_score:
                    best_score = score
                    best_params = {'hidden_sizes': hidden_sizes, 'batch_size': batch_size, 'learning_rate': learning_rate}
    return best_params, best_score

# Perform grid search
best_params, best_score = grid_search(X, y,X_val,y_val ,grid, epochs)
print(f'Best score: {best_score}, Best params: {best_params}')



hidden_sizes=(3,), batch_size=16, learning_rate=1, score=0.30525
hidden_sizes=(3,), batch_size=16, learning_rate=0.1, score=0.3485
hidden_sizes=(3,), batch_size=16, learning_rate=0.01, score=0.15066666666666667
hidden_sizes=(3,), batch_size=16, learning_rate=0.001, score=0.10025
hidden_sizes=(3,), batch_size=16, learning_rate=0.0001, score=0.098
hidden_sizes=(3,), batch_size=32, learning_rate=1, score=0.29875
hidden_sizes=(3,), batch_size=32, learning_rate=0.1, score=0.3739166666666667
hidden_sizes=(3,), batch_size=32, learning_rate=0.01, score=0.14783333333333334
hidden_sizes=(3,), batch_size=32, learning_rate=0.001, score=0.10475
hidden_sizes=(3,), batch_size=32, learning_rate=0.0001, score=0.09716666666666667
hidden_sizes=(3,), batch_size=64, learning_rate=1, score=0.5809166666666666
hidden_sizes=(3,), batch_size=64, learning_rate=0.1, score=0.3175
hidden_sizes=(3,), batch_size=64, learning_rate=0.01, score=0.2135
hidden_sizes=(3,), batch_size=64, learning_rate=0.001, score=0.103833

In [4]:
# Create a new instance of NeuralNetwork with the best parameters
nn = NeuralNetwork(input_size, best_params['hidden_sizes'], output_size, best_params['learning_rate'])

# Train the model on the entire training set
nn.train(X_train, y_train, epochs, best_params['batch_size'])

# Use the trained model to predict classes for the test set
y_pred = nn.predict(X_test)

# Compute the accuracy of the model on the test set
accuracy = np.mean(np.argmax(y_test, axis=1) == np.argmax(y_pred, axis=1))
print(f'Test set accuracy: {accuracy}')


Test set accuracy: 0.9202
