In [None]:
import sys, os
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

from functions import *

%reload_ext autoreload
%autoreload 2

In [3]:
# Fetch the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')

# Extract data (features) and target (labels)
X = mnist.data
y = mnist.target.astype(int)


X = X / 255.0


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

encoder = OneHotEncoder(sparse_output=False)
y_train_oh = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_oh  = encoder.transform(y_test.reshape(-1, 1))


In [None]:
depth = [1,2,3,4]
width = [2, 4, 8, 16, 32, 64, 128]
lr = [0.001, 0.01, 0.1]
information = {
    'depth' : [],
    'width' : [],
    'lr' : [],

    'acc' : []
}


Model = 1
for d in depth:
    for w in width:
        for r in lr:
            Reg_nn = NeuralNetwork(X_train.shape[1], [w]*d + [10], [sigmoid]*d + [softmax], [derivate(sigmoid)]*d + [derivate(softmax)], cross_entropy, cross_entropy_der)
            Reg_nn.train_SGD(X_train, y_train_oh, epochs=30, batch_size=1000, learning_rate=r, optimizer="Adam")
            y_pred_train = Reg_nn._feed_forward(X_train)
            y_pred_test = Reg_nn._feed_forward(X_test)
            y_pred_label_train = np.argmax(y_pred_train, axis=1)
            y_pred_label = np.argmax(y_pred_test, axis=1)
            train_accuracy = np.mean(y_train == y_pred_label_train)
            test_accuracy = np.mean(y_test == y_pred_label)

            information['depth'].append(d)
            information['width'].append(w)
            information['lr'].append(r)
            information['acc'].append((train_accuracy,test_accuracy))
              

            print(f"Model {Model} done. Train accuracy: {train_accuracy:.4f}. Test accuracy: {test_accuracy:.4f}")
            Model += 1


TypeError: 'float' object cannot be interpreted as an integer

In [None]:
information_Relu = {
    'depth' : [],
    'width' : [],
    'lr' : [],

    'acc' : []
}


Model = 1
for d in depth:
    for w in width:
        for r in lr:
            Reg_nn = NeuralNetwork(X_train.shape[1], [w]*d + [10], [RELU]*d + [softmax], [derivate(RELU)]*d + [derivate(softmax)], cross_entropy, cross_entropy_der)
            Reg_nn.train_SGD(X_train, y_train_oh, epochs=30, batch_size=1000, learning_rate=r, optimizer="Adam")
            y_pred_train = Reg_nn._feed_forward(X_train)
            y_pred_test = Reg_nn._feed_forward(X_test)
            y_pred_label_train = np.argmax(y_pred_train, axis=1)
            y_pred_label = np.argmax(y_pred_test, axis=1)
            train_accuracy = np.mean(y_train == y_pred_label_train)
            test_accuracy = np.mean(y_test == y_pred_label)

            information_Relu['depth'].append(d)
            information_Relu['width'].append(w)
            information_Relu['lr'].append(r)
            information_Relu['acc'].append((train_accuracy,test_accuracy))
              

            print(f"Model {Model} done. Train accuracy: {train_accuracy:.4f}. Test accuracy: {test_accuracy:.4f}")
            Model += 1

[5, 5, 5, 5, 5]


In [None]:
information_lRelu = {
    'depth' : [],
    'width' : [],
    'lr' : [],

    'acc' : []
}


Model = 1
for d in depth:
    for w in width:
        for r in lr:
            Reg_nn = NeuralNetwork(X_train.shape[1], [w]*d + [10], [LRELU]*d + [softmax], [derivate(LRELU)]*d + [derivate(softmax)], cross_entropy, cross_entropy_der)
            Reg_nn.train_SGD(X_train, y_train_oh, epochs=30, batch_size=1000, learning_rate=r, optimizer="Adam")
            y_pred_train = Reg_nn._feed_forward(X_train)
            y_pred_test = Reg_nn._feed_forward(X_test)
            y_pred_label_train = np.argmax(y_pred_train, axis=1)
            y_pred_label = np.argmax(y_pred_test, axis=1)
            train_accuracy = np.mean(y_train == y_pred_label_train)
            test_accuracy = np.mean(y_test == y_pred_label)

            information_lRelu['depth'].append(d)
            information_lRelu['width'].append(w)
            information_lRelu['lr'].append(r)
            information_lRelu['acc'].append((train_accuracy,test_accuracy))
              

            print(f"Model {Model} done. Train accuracy: {train_accuracy:.4f}. Test accuracy: {test_accuracy:.4f}")
            Model += 1

In [None]:
# Convert dict to DataFrame
df_sig = pd.DataFrame(information)
df_sig[['train_acc', 'test_acc']] = pd.DataFrame(df_sig['acc'].tolist(), index=df_sig.index)

# Compute mean accuracy across learning rates for each depth–width pair
df_sig_grouped = df_sig.groupby(['depth', 'width'], as_index=False)['train_acc'].mean()

# Pivot into matrix form for heatmap
heatmap_data = df_sig_grouped.pivot(index='depth', columns='width', values='train_acc')

# Plot heatmap
plt.figure(figsize=(8,6))
sns.heatmap(heatmap_data, annot=True, cmap="viridis")
plt.title("Mean Accuracy Heatmap for sigmoid (averaged over learning rates)")
plt.xlabel("Depth (# layers)")
plt.ylabel("Width (# neurons)")
plt.show()

In [None]:
# Convert dict to DataFrame
df_relu = pd.DataFrame(information_Relu)
df_relu[['train_acc', 'test_acc']] = pd.DataFrame(df_relu['acc'].tolist(), index=df_relu.index)

# Compute mean accuracy across learning rates for each depth–width pair
df_sig_grouped = df_relu.groupby(['depth', 'width'], as_index=False)['train_acc'].mean()

# Pivot into matrix form for heatmap
heatmap_data = df_sig_grouped.pivot(index='depth', columns='width', values='train_acc')

# Plot heatmap
plt.figure(figsize=(8,6))
sns.heatmap(heatmap_data, annot=True, cmap="viridis")
plt.title("Mean Accuracy Heatmap for sigmoid (averaged over learning rates)")
plt.xlabel("Depth (# layers)")
plt.ylabel("Width (# neurons)")
plt.show()

In [None]:
# Convert dict to DataFrame
df_lrelu = pd.DataFrame(information_lRelu)
df_lrelu[['train_acc', 'test_acc']] = pd.DataFrame(df_lrelu['acc'].tolist(), index=df_lrelu.index)

# Compute mean accuracy across learning rates for each depth–width pair
df_sig_grouped = df_lrelu.groupby(['depth', 'width'], as_index=False)['train_acc'].mean()

# Pivot into matrix form for heatmap
heatmap_data = df_sig_grouped.pivot(index='depth', columns='width', values='train_acc')

# Plot heatmap
plt.figure(figsize=(8,6))
sns.heatmap(heatmap_data, annot=True, cmap="viridis")
plt.title("Mean Accuracy Heatmap for sigmoid (averaged over learning rates)")
plt.xlabel("Depth (# layers)")
plt.ylabel("Width (# neurons)")
plt.show()

In [None]:
reg_type = ['L1', 'L2']
lamb = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
for i in reg_type:
    for j in lamb:
        Reg_nn = NeuralNetwork(X_train.shape[1], [128,10], [sigmoid,  softmax], [derivate(sigmoid), derivate(softmax)], cross_entropy, cross_entropy_der, lamb=j, cost_fun_type=i)
        Reg_nn.train_SGD(X_train, y_train_oh, epochs=30, batch_size=1000, learning_rate=0.01, optimizer='Adam')
        y_pred_train = Reg_nn._feed_forward(X_train)
        y_pred_test = Reg_nn._feed_forward(X_test)
        y_pred_label_train = np.argmax(y_pred_train, axis=1)
        y_pred_label = np.argmax(y_pred_test, axis=1)

        train_accuracy = np.mean(y_train == y_pred_label_train)
        test_accuracy = np.mean(y_test == y_pred_label)
        print(train_accuracy, test_accuracy)


0.996375 0.9699285714285715


In [None]:

lamb = [2e-2, 3e-2, 4e-2, 5e-2]
for i in reg_type:
    for j in lamb:
        Reg_nn = NeuralNetwork(X_train.shape[1], [128,10], [sigmoid,  softmax], [derivate(sigmoid), derivate(softmax)], cross_entropy, cross_entropy_der, lamb=j, cost_fun_type='L2')
        Reg_nn.train_SGD(X_train, y_train_oh, epochs=30, batch_size=1000, learning_rate=0.01, optimizer='Adam')
        y_pred_train = Reg_nn._feed_forward(X_train)
        y_pred_test = Reg_nn._feed_forward(X_test)
        y_pred_label_train = np.argmax(y_pred_train, axis=1)
        y_pred_label = np.argmax(y_pred_test, axis=1)

        train_accuracy = np.mean(y_train == y_pred_label_train)
        test_accuracy = np.mean(y_test == y_pred_label)
        print(train_accuracy, test_accuracy)