# Import Dataset

In [None]:
from lib.MLP import *
from lib.Graph import *
import random

import time
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_openml
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
mnist = fetch_openml('mnist_784', version=1, parser='auto')

In [None]:
X, y = mnist.data, mnist.target.astype(int)

X = X.to_numpy() / 255.0  
y = y.astype(int).to_numpy()  
def one_hot(y, num_classes=10):
    one_hot_encoded = np.zeros((y.shape[0], num_classes))
    one_hot_encoded[np.arange(y.shape[0]), y] = 1
    return one_hot_encoded

y = one_hot(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y,random_state=42)
print(f"Dataset loaded: {X_train.shape[0]} training samples, {X_test.shape[0]} test samples.")


# Training And Plotting

In [None]:
# Buat model
hidden_layer_1 = Layer(input_size=784, n_neurons=16, activation='relu', bias_init='zeros',seed=42,weight_init='he_uniform')
hidden_layer_2 = Layer(input_size=16, n_neurons=8, activation='relu', bias_init='zeros',seed=42,weight_init='he_uniform')
output_layer = Layer(input_size=8, n_neurons=10, activation='softmax', bias_init='zeros',seed=42,weight_init='he_uniform')

layers = [hidden_layer_1, hidden_layer_2,output_layer]

mlp = MLP(layers=layers, loss_function='mse',lr=0.1)

# Training model
mlp.train(X_train, y_train,X_val=X_test,y_val=y_test ,epochs=1, batch_size=1000,verbose=1)

# Evaluasi di test set
test_acc = mlp.accuracy(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}%")


In [None]:
print("\nEvaluating original model...")
original_predictions = mlp.predict(X_test[:100])
y_test_indices = np.argmax(y_test[:100], axis=1)
original_accuracy = accuracy_score(y_test_indices, original_predictions)
print(f"Original model accuracy: {original_accuracy:.4f}")

print("\nSaving model...")
save_path = 'mnist_model.pkl'
mlp.save(save_path)
print(f"Model saved to {save_path}")

print("\nLoading model...")
loaded_model = MLP.load(save_path)

print("\nValidating loaded model...")
loaded_predictions = loaded_model.predict(X_test[:100])
loaded_accuracy = accuracy_score(y_test_indices, loaded_predictions)
print(f"Loaded model accuracy: {loaded_accuracy:.4f}")

is_identical = np.array_equal(original_predictions, loaded_predictions)
print(f"\nPredictions identical: {is_identical}")

print("\nComparing model parameters:")
all_params_match = True

if len(mlp.layers) != len(loaded_model.layers):
    print(f"Different number of layers: {len(mlp.layers)} vs {len(loaded_model.layers)}")
    all_params_match = False
else:
    for i, (orig_layer, loaded_layer) in enumerate(zip(mlp.layers, loaded_model.layers)):
        weights_match = np.array_equal(orig_layer.weights, loaded_layer.weights)
        biases_match = np.array_equal(orig_layer.biases, loaded_layer.biases)
        
        if weights_match and biases_match:
            print(f"Layer {i}: All parameters match ✓")
        else:
            which_diff = []
            if not weights_match: which_diff.append("weights")
            if not biases_match: which_diff.append("biases")
            print(f"Layer {i}: Parameters differ ({', '.join(which_diff)}) ✗")
            all_params_match = False

print(f"\nOverall parameter comparison: {'PASSED' if all_params_match else 'FAILED'}")

In [None]:
mlp.plot_loss()

In [None]:
mlp.plot_weight_distribution()


In [None]:
mlp.plot_gradient_distribution()

## Graph Testing

In [None]:

# Buat data random untuk testing
np.random.seed(42)
X_train = np.random.rand(500, 5)  # 500 sampel, 5 fitur (input layer)
y_train = np.eye(3)[np.random.choice(3, 500)]  # 500 label one-hot, 3 kelas

X_test = np.random.rand(100, 5)  # 100 sampel untuk validasi
y_test = np.eye(3)[np.random.choice(3, 100)]  # 100 label validasi

# Buat model MLP
input_layer = Layer(input_size=5, n_neurons=8, activation='relu', bias_init='zeros', seed=42, weight_init='he_uniform')
hidden_layer_1 = Layer(input_size=8, n_neurons=8, activation='relu', bias_init='zeros', seed=42, weight_init='he_uniform')
hidden_layer_2 = Layer(input_size=8, n_neurons=4, activation='relu', bias_init='zeros', seed=42, weight_init='he_uniform')
output_layer = Layer(input_size=4, n_neurons=3, activation='softmax', bias_init='zeros', seed=42, weight_init='he_uniform')

layers = [input_layer, hidden_layer_1, hidden_layer_2, output_layer]

mlp = MLP(layers=layers, loss_function='cce', lr=0.1)

# Training model
mlp.train(X_train, y_train, X_val=X_test, y_val=y_test, epochs=5, batch_size=50)

# Evaluasi di test set
test_acc = mlp.accuracy(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}%")

# Plot loss
mlp.plot_loss()

# Visualisasi arsitektur model
dot = draw_mlp(mlp)

In [None]:
dot

# Analysis Experiment

#### 2.2.1 Pengaruh Depth and Width

In [None]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np

width_variations = [8, 16, 32]  
depth_variations = [1, 2, 3]  

results = []
all_mlps = {} 

for width in width_variations:
    for depth in depth_variations:
        model_name = f"width={width}_depth={depth}"
        print(f"\n\nTraining network with {model_name}")
        
        layers = []
        
        layers.append(Layer(input_size=784, n_neurons=width, activation='relu', 
                           bias_init='zeros', seed=42, weight_init='he_uniform'))
        
        for i in range(depth-1):
            layers.append(Layer(input_size=width, n_neurons=width, activation='relu', 
                               bias_init='zeros', seed=42, weight_init='he_uniform'))
        
        layers.append(Layer(input_size=width, n_neurons=10, activation='softmax', 
                           bias_init='zeros', seed=42, weight_init='he_uniform'))
        
        mlp = MLP(layers=layers, loss_function='cce', lr=0.1)
        
        start_time = time.time()
        history = mlp.train(X_train, y_train, X_val=X_test, y_val=y_test, 
                          epochs=10, batch_size=64)
        training_time = time.time() - start_time
        
        all_mlps[model_name] = mlp
        
        test_acc = mlp.accuracy(X_test, y_test)
        print(f"Test Accuracy: {test_acc:.2f}%")
        
        results.append({
            'Width': width,
            'Depth': depth,
            'Test Accuracy': test_acc,
            'Training Time': training_time,
        })

        
results_df = pd.DataFrame(results)
print("\n\nResults Summary:")
print(results_df)

best_model = results_df.loc[results_df['Test Accuracy'].idxmax()]
print(f"\nBest Model Configuration:")
print(f"Width: {best_model['Width']}, Depth: {best_model['Depth']}")
print(f"Accuracy: {best_model['Test Accuracy']:.2f}%")
print(f"Training Time: {best_model['Training Time']:.2f} seconds")

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

for depth in depth_variations:
    subset = results_df[results_df['Depth'] == depth]
    axes[0].plot(subset['Width'], subset['Test Accuracy'], marker='o', label=f'Depth={depth}')
    
axes[0].set_title('Width vs Accuracy')
axes[0].set_xlabel('Width')
axes[0].set_ylabel('Test Accuracy (%)')
axes[0].legend()
axes[0].grid(True)

for width in width_variations:
    subset = results_df[results_df['Width'] == width]
    axes[1].plot(subset['Depth'], subset['Test Accuracy'], marker='o', label=f'Width={width}')
    
axes[1].set_title('Depth vs Accuracy')
axes[1].set_xlabel('Depth')
axes[1].set_ylabel('Test Accuracy (%)')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(3, 3, figsize=(18, 15))
fig.suptitle('Loss Curves for All Model Configurations', fontsize=16)

def plot_loss_on_axis(mlp, ax):
    ax.plot(mlp.loss_graph, label="Training Loss", color="red")
    ax.plot(mlp.valid_graph, label="Validation Loss", color="blue")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.legend()

for i, depth in enumerate(depth_variations):
    for j, width in enumerate(width_variations):
        model_name = f"width={width}_depth={depth}"
        ax = axes[i, j]
        plot_loss_on_axis(all_mlps[model_name], ax)
        ax.set_title(f"Width={width}, Depth={depth}")
        ax.grid(True)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

#### 2.2.2 Pengaruh Aktivasi Hidden Layer

In [None]:
activation_functions = ["sigmoid", "relu", "tanh", "linear"]
results = {}

for activation in activation_functions:
    print(f"aktivasi {activation}")

    hidden_layer_1 = Layer(input_size=784, n_neurons=16, activation=activation, bias_init='zeros', seed=42, weight_init='random_uniform')
    hidden_layer_2 = Layer(input_size=16, n_neurons=8, activation=activation, bias_init='zeros', seed=42, weight_init='random_uniform')
    output_layer = Layer(input_size=8, n_neurons=10, activation='sigmoid', bias_init='zeros', seed=42, weight_init='random_uniform')
    layers = [hidden_layer_1, hidden_layer_2, output_layer]
    mlp = MLP(layers=layers, loss_function='mse', lr=0.1, verbose=1)
    mlp.train(X_train, y_train, X_val=X_test, y_val=y_test, epochs=50, batch_size=1000)
    test_acc = mlp.accuracy(X_test, y_test)
    print(f"Test Accuracy ({activation}): {test_acc:.2f}%")

    results[activation] = {
        "loss_graph": mlp.loss_graph,
        "valid_graph": mlp.valid_graph,
        "weights_history": mlp.weights_history,
        "gradients_history": mlp.gradients_history
    }


def plot_comparison(results):
    fig, axes = plt.subplots(4, 3, figsize=(15, 20))
    
    for i, (activation, res) in enumerate(results.items()):
        ax = axes[i, 0]
        ax.plot(res["loss_graph"], label=f"{activation}")
        ax.set_xlabel("Epochs")
        ax.set_ylabel("Training Loss")
        ax.set_title(f"Training Loss - {activation}")
        ax.legend()
        ax.grid(True)
        
        ax = axes[i, 1]
        weights = np.concatenate([np.ravel(w[-1]) for w in res["weights_history"].values()])
        ax.hist(weights, bins=30, alpha=0.5, density=True)
        ax.set_xlabel("Weight Values")
        ax.set_ylabel("Density")
        ax.set_title(f"Weight Distribution - {activation}")
        ax.grid(True)
        
        ax = axes[i, 2]
        grads = np.concatenate([np.ravel(g[-1]) for g in res["gradients_history"].values()])
        ax.hist(grads, bins=30, alpha=0.5, density=True)
        ax.set_xlabel("Gradient Values")
        ax.set_ylabel("Density")
        ax.set_title(f"Gradient Distribution - {activation}")
        ax.grid(True)
    
    plt.tight_layout()
    plt.show()

plot_comparison(results)


#### 2.2.3 Pengaruh Learning Rate

In [None]:
LR = [1,0.1,0.01]
results = {}

for a in LR:
    print(f"LR:  {a}")

    hidden_layer_1 = Layer(input_size=784, n_neurons=16, activation='relu', bias_init='zeros', seed=42, weight_init='he_uniform')
    hidden_layer_2 = Layer(input_size=16, n_neurons=8, activation='relu', bias_init='zeros', seed=42, weight_init='he_uniform')
    output_layer = Layer(input_size=8, n_neurons=10, activation='softmax', bias_init='zeros', seed=42, weight_init='he_uniform')
    layers = [hidden_layer_1, hidden_layer_2, output_layer]
    mlp = MLP(layers=layers, loss_function='cce', lr=a, verbose=1)
    mlp.train(X_train, y_train, X_val=X_test, y_val=y_test, epochs=50, batch_size=1000)
    test_acc = mlp.accuracy(X_test, y_test)
    print(f"Test Accuracy ({a}): {test_acc:.2f}%")

    results[a] = {
        "loss_graph": mlp.loss_graph,
        "valid_graph": mlp.valid_graph,
        "weights_history": mlp.weights_history,
        "gradients_history": mlp.gradients_history
    }


def plot_comparison(results):
    fig, axes = plt.subplots(3, 3, figsize=(15, 20))
    
    for i, (activation, res) in enumerate(results.items()):
        ax = axes[i, 0]
        ax.plot(res["loss_graph"], label=f"{activation}")
        ax.set_xlabel("Epochs")
        ax.set_ylabel("Training Loss")
        ax.set_title(f"Training Loss - {activation}")
        ax.legend()
        ax.grid(True)
        
        ax = axes[i, 1]
        weights = np.concatenate([np.ravel(w[-1]) for w in res["weights_history"].values()])
        ax.hist(weights, bins=30, alpha=0.5, density=True)
        ax.set_xlabel("Weight Values")
        ax.set_ylabel("Density")
        ax.set_title(f"Weight Distribution - {activation}")
        ax.grid(True)
        
        ax = axes[i, 2]
        grads = np.concatenate([np.ravel(g[-1]) for g in res["gradients_history"].values()])
        ax.hist(grads, bins=30, alpha=0.5, density=True)
        ax.set_xlabel("Gradient Values")
        ax.set_ylabel("Density")
        ax.set_title(f"Gradient Distribution - {activation}")
        ax.grid(True)
    
    plt.tight_layout()
    plt.show()

plot_comparison(results)


### 2.2.4 Pengaruh Bobot

In [None]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np
from lib.MLP import MLP, Layer  

weight_init_methods = ['zeros', 'random_uniform', 'random_normal', 'xavier_uniform', 'xavier_normal', 'he_uniform', 'he_normal']

results = []
all_mlps = {}
weight_distributions_before = {}
weight_distributions_after = {}

for init_method in weight_init_methods:
    model_name = f"init={init_method}"
    print(f"\n\nTraining network with {model_name}")
    
    hidden_layer_1 = Layer(input_size=784, n_neurons=16, activation='relu', 
                          bias_init='zeros', seed=42, weight_init=init_method)
    hidden_layer_2 = Layer(input_size=16, n_neurons=8, activation='relu', 
                          bias_init='zeros', seed=42, weight_init=init_method)
    output_layer = Layer(input_size=8, n_neurons=10, activation='softmax', 
                         bias_init='zeros', seed=42, weight_init=init_method)

    layers = [hidden_layer_1, hidden_layer_2, output_layer]
    
    # Store initial weights
    weight_distributions_before[model_name] = {}
    for i, layer in enumerate(layers):
        weight_distributions_before[model_name][f'layer_{i}'] = layer.weights.copy()
    
    # Create and train model
    mlp = MLP(layers=layers, loss_function='cce', lr=0.1, verbose=1)
    
    start_time = time.time()
    mlp.train(X_train, y_train, X_val=X_test, y_val=y_test, epochs=10, batch_size=1000)
    training_time = time.time() - start_time
    
    all_mlps[model_name] = mlp
    
    weight_distributions_after[model_name] = {}
    for i, layer in enumerate(layers):
        weight_distributions_after[model_name][f'layer_{i}'] = layer.weights.copy()
    
    # Evaluate model
    test_acc = mlp.accuracy(X_test, y_test)
    print(f"Test Accuracy: {test_acc:.2f}%")
    
    # Store results
    results.append({
        'Initialization': init_method,
        'Test Accuracy': test_acc,
        'Training Time': training_time,
    })
    

    print(f"\nGradient Distribution for {init_method}:")
    
    mlp.plot_gradient_distribution()
    

results_df = pd.DataFrame(results)
print("\n\nResults Summary:")
print(results_df)

best_model = results_df.loc[results_df['Test Accuracy'].idxmax()]
print(f"\nBest Model Configuration:")
print(f"Initialization: {best_model['Initialization']}")
print(f"Accuracy: {best_model['Test Accuracy']:.2f}%")
print(f"Training Time: {best_model['Training Time']:.2f} seconds")


# Bobot Awal
fig, axes = plt.subplots(3, len(weight_init_methods), figsize=(20, 12))
fig.suptitle('Initial Weight Distributions by Layer and Initialization Method', fontsize=16)

for layer_idx in range(3):  
    for i, init_method in enumerate(weight_init_methods):
        model_name = f"init={init_method}"
        ax = axes[layer_idx, i]
        
        initial_weights = weight_distributions_before[model_name][f'layer_{layer_idx}'].flatten()
        ax.hist(initial_weights, bins=50, alpha=0.7)
        
        if layer_idx == 0:
            ax.set_title(init_method)
        
        if i == 0:
            if layer_idx == 0:
                ax.set_ylabel("Hidden Layer 1")
            elif layer_idx == 1:
                ax.set_ylabel("Hidden Layer 2")
            else:
                ax.set_ylabel("Output Layer")

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()

n_rows = (len(weight_init_methods) + 2) // 3
fig, axes = plt.subplots(n_rows, 3, figsize=(18, 5*n_rows))
fig.suptitle('Loss Curves by Initialization Method', fontsize=16)

if n_rows > 1:
    axes = axes.flatten()

for i, init_method in enumerate(weight_init_methods):
    model_name = f"init={init_method}"
    
    if n_rows == 1:
        if i < 3:
            ax = axes[i]
            ax.plot(all_mlps[model_name].loss_graph, label="Training", color="red")
            ax.plot(all_mlps[model_name].valid_graph, label="Validation", color="blue")
            ax.set_title(f"Init: {init_method}")
            ax.set_xlabel("Epoch")
            ax.set_ylabel("Loss")
            ax.legend()
            ax.grid(True)
    else:
        if i < len(axes):
            ax = axes[i]
            ax.plot(all_mlps[model_name].loss_graph, label="Training", color="red")
            ax.plot(all_mlps[model_name].valid_graph, label="Validation", color="blue")
            ax.set_title(f"Init: {init_method}")
            ax.set_xlabel("Epoch")
            ax.set_ylabel("Loss")
            ax.legend()
            ax.grid(True)

if n_rows > 1:
    for j in range(i+1, len(axes)):
        fig.delaxes(axes[j])

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()