This notebook test the NN from scratch implementation of our group on two dataset. 
- We test on the Student Academic Success and 
- Congressional Voting
 
1. `build_nn` builds our models with the same architectures across all implementations. 
2. `analyze_model` gives memory statistics as well as info on wights, biases, activation.
3. `train_model` trains and tests the model and gives statistics. 

In [1]:
def build_nn(model_type, input_dim, output_dim):
    nn = NeuralNetwork()

    if model_type == "1_relu":
        nn.add_layer(input_dim, 6, "relu")
        nn.add_layer(6, output_dim, "softmax")

    elif model_type == "1_sigmoid":
        nn.add_layer(input_dim, 6, "sigmoid")
        nn.add_layer(6, output_dim, "softmax")

    elif model_type == "4_relu":
        nn.add_layer(input_dim, 32, "relu")
        nn.add_layer(32, 16, "relu")
        nn.add_layer(16, 8, "relu")
        nn.add_layer(8, 4, "relu")
        nn.add_layer(4, output_dim, "softmax")

    elif model_type == "4_sigmoid":
        nn.add_layer(input_dim, 32, "sigmoid")
        nn.add_layer(32, 16, "sigmoid")
        nn.add_layer(16, 8, "sigmoid")
        nn.add_layer(8, 4, "sigmoid")
        nn.add_layer(4, output_dim, "softmax")

    return nn

In [2]:
def train_model(model_name, X_train, y_train, X_test, y_test, epochs=100, lr=0.05):
    nn = build_nn(model_name, input_dim=X_train.shape[1], output_dim=len(np.unique(y_train)))
    
    total_params, ram_usage = analyze_model(nn, input_dim=X_train.shape[1], batch_size=X_train.shape[0])
    print(f"[{model_name.upper()}] Total Learnable Parameters: {total_params}")
    print(f"[{model_name.upper()}] Estimated Virtual RAM Usage: {ram_usage:.2f} MB\n")


    for epoch in range(epochs):
        y_pred = nn.forward(X_train)
        loss = LossFunction.categorical_cross_entropy(y_pred, y_train)
        grad = LossFunction.categorical_cross_entropy_derivative(y_pred, y_train)
        nn.backward(grad, learning_rate=lr)

        if epoch % 10 == 0 or epoch == epochs - 1:
            acc = np.mean(np.argmax(y_pred, axis=1) == y_train)
            print(f"[{model_name.upper()}] Epoch {epoch}: Loss = {loss:.4f}, Accuracy = {acc:.4f}")

    y_test_pred = nn.forward(X_test)
    acc_test = np.mean(np.argmax(y_test_pred, axis=1) == y_test)
    print(f"[{model_name.upper()}] Final Test Accuracy: {acc_test:.4f}")

In [3]:
def analyze_model(nn, input_dim, batch_size=1):
    total_params = 0
    total_memory_bytes = 0
    current_input_dim = input_dim

    for layer in nn.layers:
        W_shape = layer.W.shape
        b_shape = layer.b.shape

        # Count parameters
        num_weights = np.prod(W_shape)
        num_biases = np.prod(b_shape)
        layer_params = num_weights + num_biases
        total_params += layer_params

        # Estimate memory: weights, biases, activations, gradients
        weights_bytes = num_weights * 8
        biases_bytes = num_biases * 8
        activations_bytes = np.prod(layer.a.shape) * 8 * batch_size if layer.a is not None else layer.b.shape[1] * 8 * batch_size
        gradients_bytes = (num_weights + num_biases + current_input_dim * layer.W.shape[1]) * 8

        layer_memory = weights_bytes + biases_bytes + activations_bytes + gradients_bytes
        total_memory_bytes += layer_memory

        current_input_dim = layer.W.shape[1]  # Update input for next layer

    total_memory_mb = total_memory_bytes / (1024 ** 2)

    return total_params, total_memory_mb


## Student Dropout Dataset

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from nn_mir0 import NeuralNetwork, LossFunction

In [5]:
students_success_data_path = 'student_droupout_data.csv'
df = pd.read_csv(students_success_data_path, sep=';')
label_map = {'Dropout': 0, 'Enrolled': 1, 'Graduate': 2}

In [6]:
X = df.drop("Target", axis=1).values
y = np.array([label_map[label] for label in df["Target"]])

In [7]:
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
train_model("1_relu", X_train, y_train, X_test, y_test)

[1_RELU] Total Learnable Parameters: 116
[1_RELU] Estimated Virtual RAM Usage: 0.01 MB

[1_RELU] Epoch 0: Loss = 0.8053, Accuracy = 0.4172
[1_RELU] Epoch 10: Loss = 0.7301, Accuracy = 0.4371
[1_RELU] Epoch 20: Loss = 0.7075, Accuracy = 0.4305
[1_RELU] Epoch 30: Loss = 0.6850, Accuracy = 0.5629
[1_RELU] Epoch 40: Loss = 0.6568, Accuracy = 0.6689
[1_RELU] Epoch 50: Loss = 0.6193, Accuracy = 0.7020
[1_RELU] Epoch 60: Loss = 0.5705, Accuracy = 0.7616
[1_RELU] Epoch 70: Loss = 0.5108, Accuracy = 0.8411
[1_RELU] Epoch 80: Loss = 0.4466, Accuracy = 0.8477
[1_RELU] Epoch 90: Loss = 0.3872, Accuracy = 0.8543
[1_RELU] Epoch 99: Loss = 0.3434, Accuracy = 0.8675
[1_RELU] Final Test Accuracy: 0.9385


In [21]:
train_model("1_sigmoid", X_train, y_train, X_test, y_test)

[1_SIGMOID] Total Learnable Parameters: 116
[1_SIGMOID] Estimated Virtual RAM Usage: 0.01 MB

[1_SIGMOID] Epoch 0: Loss = 0.7031, Accuracy = 0.4172
[1_SIGMOID] Epoch 10: Loss = 0.6791, Accuracy = 0.5828
[1_SIGMOID] Epoch 20: Loss = 0.6703, Accuracy = 0.5828
[1_SIGMOID] Epoch 30: Loss = 0.6643, Accuracy = 0.5828
[1_SIGMOID] Epoch 40: Loss = 0.6585, Accuracy = 0.5828
[1_SIGMOID] Epoch 50: Loss = 0.6525, Accuracy = 0.5828
[1_SIGMOID] Epoch 60: Loss = 0.6458, Accuracy = 0.5828
[1_SIGMOID] Epoch 70: Loss = 0.6385, Accuracy = 0.5828
[1_SIGMOID] Epoch 80: Loss = 0.6305, Accuracy = 0.5828
[1_SIGMOID] Epoch 90: Loss = 0.6216, Accuracy = 0.5828
[1_SIGMOID] Epoch 99: Loss = 0.6128, Accuracy = 0.5828
[1_SIGMOID] Final Test Accuracy: 0.5846


In [22]:
train_model("4_relu", X_train, y_train, X_test, y_test)

[4_RELU] Total Learnable Parameters: 1254
[4_RELU] Estimated Virtual RAM Usage: 0.10 MB

[4_RELU] Epoch 0: Loss = 0.8034, Accuracy = 0.4172
[4_RELU] Epoch 10: Loss = 0.7342, Accuracy = 0.4172
[4_RELU] Epoch 20: Loss = 0.6922, Accuracy = 0.5364
[4_RELU] Epoch 30: Loss = 0.6444, Accuracy = 0.5828
[4_RELU] Epoch 40: Loss = 0.5833, Accuracy = 0.5894
[4_RELU] Epoch 50: Loss = 0.5180, Accuracy = 0.7152
[4_RELU] Epoch 60: Loss = 0.4262, Accuracy = 0.8609
[4_RELU] Epoch 70: Loss = 0.3567, Accuracy = 0.9007
[4_RELU] Epoch 80: Loss = 0.3272, Accuracy = 0.9205
[4_RELU] Epoch 90: Loss = 0.3045, Accuracy = 0.9139
[4_RELU] Epoch 99: Loss = 0.2854, Accuracy = 0.9139
[4_RELU] Final Test Accuracy: 0.9231


In [23]:
train_model("4_sigmoid", X_train, y_train, X_test, y_test)

[4_SIGMOID] Total Learnable Parameters: 1254
[4_SIGMOID] Estimated Virtual RAM Usage: 0.10 MB

[4_SIGMOID] Epoch 0: Loss = 0.7140, Accuracy = 0.4172
[4_SIGMOID] Epoch 10: Loss = 0.6900, Accuracy = 0.5828
[4_SIGMOID] Epoch 20: Loss = 0.6827, Accuracy = 0.5828
[4_SIGMOID] Epoch 30: Loss = 0.6804, Accuracy = 0.5828
[4_SIGMOID] Epoch 40: Loss = 0.6797, Accuracy = 0.5828
[4_SIGMOID] Epoch 50: Loss = 0.6795, Accuracy = 0.5828
[4_SIGMOID] Epoch 60: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 70: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 80: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 90: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 99: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Final Test Accuracy: 0.5846


## Congressional Voting Dataset

In [12]:
from sklearn.impute import SimpleImputer

congressional_voting_training_data_path = 'CongressionalVotingID.csv'
df = pd.read_csv(congressional_voting_training_data_path)
df.drop(df[np.isin(df['ID'], [184, 249])].index, inplace=True)
df.drop(columns=['ID'], inplace=True)

In [13]:
label_map = {'democrat': 0, 'republican': 1}
y = np.array([label_map[val] for val in df["class"]])

In [14]:
X = df.drop("class", axis=1).values
X = np.where(X == 'unknown', np.nan, X)
X = np.where(X == 'y', 1, X)
X = np.where(X == 'n', 0, X)

In [15]:
X = SimpleImputer(strategy='most_frequent').fit_transform(X).astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [24]:
train_model("1_relu", X_train, y_train, X_test, y_test)

[1_RELU] Total Learnable Parameters: 116
[1_RELU] Estimated Virtual RAM Usage: 0.01 MB

[1_RELU] Epoch 0: Loss = 0.6847, Accuracy = 0.5828
[1_RELU] Epoch 10: Loss = 0.6281, Accuracy = 0.7152
[1_RELU] Epoch 20: Loss = 0.5670, Accuracy = 0.8146
[1_RELU] Epoch 30: Loss = 0.4947, Accuracy = 0.9205
[1_RELU] Epoch 40: Loss = 0.4193, Accuracy = 0.9404
[1_RELU] Epoch 50: Loss = 0.3513, Accuracy = 0.9338
[1_RELU] Epoch 60: Loss = 0.2977, Accuracy = 0.9338
[1_RELU] Epoch 70: Loss = 0.2594, Accuracy = 0.9338
[1_RELU] Epoch 80: Loss = 0.2326, Accuracy = 0.9338
[1_RELU] Epoch 90: Loss = 0.2134, Accuracy = 0.9338
[1_RELU] Epoch 99: Loss = 0.2007, Accuracy = 0.9404
[1_RELU] Final Test Accuracy: 0.9231


In [25]:
train_model("1_sigmoid", X_train, y_train, X_test, y_test)

[1_SIGMOID] Total Learnable Parameters: 116
[1_SIGMOID] Estimated Virtual RAM Usage: 0.01 MB

[1_SIGMOID] Epoch 0: Loss = 0.6838, Accuracy = 0.5828
[1_SIGMOID] Epoch 10: Loss = 0.6771, Accuracy = 0.5828
[1_SIGMOID] Epoch 20: Loss = 0.6735, Accuracy = 0.5828
[1_SIGMOID] Epoch 30: Loss = 0.6703, Accuracy = 0.5828
[1_SIGMOID] Epoch 40: Loss = 0.6669, Accuracy = 0.5828
[1_SIGMOID] Epoch 50: Loss = 0.6634, Accuracy = 0.5828
[1_SIGMOID] Epoch 60: Loss = 0.6596, Accuracy = 0.5828
[1_SIGMOID] Epoch 70: Loss = 0.6554, Accuracy = 0.5828
[1_SIGMOID] Epoch 80: Loss = 0.6508, Accuracy = 0.5828
[1_SIGMOID] Epoch 90: Loss = 0.6457, Accuracy = 0.5828
[1_SIGMOID] Epoch 99: Loss = 0.6405, Accuracy = 0.5828
[1_SIGMOID] Final Test Accuracy: 0.5846


In [26]:
train_model("4_relu", X_train, y_train, X_test, y_test)

[4_RELU] Total Learnable Parameters: 1254
[4_RELU] Estimated Virtual RAM Usage: 0.10 MB

[4_RELU] Epoch 0: Loss = 0.6860, Accuracy = 0.5828
[4_RELU] Epoch 10: Loss = 0.6815, Accuracy = 0.5828
[4_RELU] Epoch 20: Loss = 0.6699, Accuracy = 0.5828
[4_RELU] Epoch 30: Loss = 0.6120, Accuracy = 0.7616
[4_RELU] Epoch 40: Loss = 0.4724, Accuracy = 0.8874
[4_RELU] Epoch 50: Loss = 0.3294, Accuracy = 0.9272
[4_RELU] Epoch 60: Loss = 0.2139, Accuracy = 0.9404
[4_RELU] Epoch 70: Loss = 0.1592, Accuracy = 0.9470
[4_RELU] Epoch 80: Loss = 0.1304, Accuracy = 0.9470
[4_RELU] Epoch 90: Loss = 0.1120, Accuracy = 0.9603
[4_RELU] Epoch 99: Loss = 0.0995, Accuracy = 0.9669
[4_RELU] Final Test Accuracy: 0.9692


In [27]:
train_model("4_sigmoid", X_train, y_train, X_test, y_test)

[4_SIGMOID] Total Learnable Parameters: 1254
[4_SIGMOID] Estimated Virtual RAM Usage: 0.10 MB

[4_SIGMOID] Epoch 0: Loss = 0.6803, Accuracy = 0.5828
[4_SIGMOID] Epoch 10: Loss = 0.6796, Accuracy = 0.5828
[4_SIGMOID] Epoch 20: Loss = 0.6795, Accuracy = 0.5828
[4_SIGMOID] Epoch 30: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 40: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 50: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 60: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 70: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 80: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 90: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Epoch 99: Loss = 0.6794, Accuracy = 0.5828
[4_SIGMOID] Final Test Accuracy: 0.5846


## Lastly we'll look at grid-search for the best parameters for the congressional voting dataset. 

In [28]:
def build_custom_nn(layer_config, input_dim):
    nn = NeuralNetwork()
    current_input_dim = input_dim
    for out_dim, activation in layer_config:
        nn.add_layer(current_input_dim, out_dim, activation)
        current_input_dim = out_dim
    return nn

In [32]:
from itertools import product

def grid_search_custom(X_train, y_train, X_test, y_test):
    architectures = {
        "shallow_relu": [(16, "relu"), (2, "softmax")],
        "deep_relu": [(64, "relu"), (32, "relu"), (16, "relu"), (8, "relu"), (2, "softmax")],
        "wide_sigmoid": [(128, "sigmoid"), (64, "sigmoid"), (2, "softmax")],
        "bottleneck": [(32, "relu"), (8, "relu"), (32, "relu"), (2, "softmax")],
        "minimal": [(2, "sigmoid"), (2, "softmax")]
    }
    learning_rates = [0.01, 0.05]
    epochs_list = [50, 100]

    results = []

    for (arch_name, layers), lr, epochs in product(architectures.items(), learning_rates, epochs_list):
        print(f"\nRunning model={arch_name}, lr={lr}, epochs={epochs}")
        nn = build_custom_nn(layers, input_dim=X_train.shape[1])

        for epoch in range(epochs):
            y_pred = nn.forward(X_train)
            loss = LossFunction.categorical_cross_entropy(y_pred, y_train)
            grad = LossFunction.categorical_cross_entropy_derivative(y_pred, y_train)
            nn.backward(grad, learning_rate=lr)

        y_test_pred = nn.forward(X_test)
        acc_test = np.mean(np.argmax(y_test_pred, axis=1) == y_test)
        print(f"[{arch_name}] Test Accuracy = {acc_test:.4f}")

        total_params, ram_mb = analyze_model(nn, input_dim=X_train.shape[1], batch_size=X_train.shape[0])
        results.append({
            'model': arch_name,
            'learning_rate': lr,
            'epochs': epochs,
            'test_accuracy': acc_test,
            'total_params': total_params,
            'estimated_ram_mb': round(ram_mb, 2)
        })

    results.sort(key=lambda x: x['test_accuracy'], reverse=True)
    print("\nTop 3 Configurations:")
    for r in results[:3]:
        print(f"Model: {r['model']:<12} | LR: {r['learning_rate']:.3f} | Epochs: {r['epochs']:>3} | "
              f"Accuracy: {r['test_accuracy']*100:.2f}% | Params: {r['total_params']:>5} | "
              f"RAM: {r['estimated_ram_mb']:.2f} MB")

    return results

In [33]:
architectures = {
    "shallow_relu": [(16, "relu"), (2, "softmax")],
    "deep_relu": [(64, "relu"), (32, "relu"), (16, "relu"), (8, "relu"), (2, "softmax")],
    "wide_sigmoid": [(128, "sigmoid"), (64, "sigmoid"), (2, "softmax")],
    "bottleneck": [(32, "relu"), (8, "relu"), (32, "relu"), (2, "softmax")],
    "minimal": [(2, "sigmoid"), (2, "softmax")]
}
learning_rates = [0.01, 0.05]
epochs_list = [50, 100]

In [34]:
results = grid_search_custom(X_train, y_train, X_test, y_test)


Running model=shallow_relu, lr=0.01, epochs=50
[shallow_relu] Test Accuracy = 0.9538

Running model=shallow_relu, lr=0.01, epochs=100
[shallow_relu] Test Accuracy = 0.9231

Running model=shallow_relu, lr=0.05, epochs=50
[shallow_relu] Test Accuracy = 0.9385

Running model=shallow_relu, lr=0.05, epochs=100
[shallow_relu] Test Accuracy = 0.9231

Running model=deep_relu, lr=0.01, epochs=50
[deep_relu] Test Accuracy = 0.8923

Running model=deep_relu, lr=0.01, epochs=100
[deep_relu] Test Accuracy = 0.7692

Running model=deep_relu, lr=0.05, epochs=50
[deep_relu] Test Accuracy = 0.9692

Running model=deep_relu, lr=0.05, epochs=100
[deep_relu] Test Accuracy = 0.9846

Running model=wide_sigmoid, lr=0.01, epochs=50
[wide_sigmoid] Test Accuracy = 0.5846

Running model=wide_sigmoid, lr=0.01, epochs=100
[wide_sigmoid] Test Accuracy = 0.5846

Running model=wide_sigmoid, lr=0.05, epochs=50
[wide_sigmoid] Test Accuracy = 0.5846

Running model=wide_sigmoid, lr=0.05, epochs=100
[wide_sigmoid] Test Accu