# ðŸ”¬ MyTorch Master Benchmark: Complete Analysis

This notebook performs a comprehensive visual and numerical analysis of the **MyTorch** framework.

## ðŸŽ¯ Analytical Objectives
1. **Softmax Impact**: Compare CrossEntropy (with Softmax) vs MSE (without Softmax) on classification.
2. **Optimization Benchmark**: Performance of SGD, Momentum, and Adam.
3. **Topological Resilience**: Testing on 5 diverse 2D datasets.
4. **Convergence Metrics**: Which combination is the fastest and most accurate?

---

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time, sys, os
from sklearn.datasets import make_moons, make_circles, make_blobs

SEED = 245573
np.random.seed(SEED)
sys.path.append(os.getcwd())

from mytorch.nn import Sequential, Linear, GeLU, ReLU, Sigmoid, Tanh, CrossEntropyLoss, MSELoss
from mytorch.optim import SGD, Adam

print(f"Environment Ready. Analyzing 120 unique experiments (60 Softmax vs 60 No-Softmax).")

ImportError: cannot import name 'MSELoss' from 'mytorch.nn' (/Users/juanmanuelprieto/Documents/fundamentos-redes-neuronales/tarea-1/HW1P1/mytorch/nn/__init__.py)

In [None]:
def generate_spiral(n=300, noise=0.3):
    t = np.sqrt(np.random.rand(n, 1)) * 780 * (2 * np.pi) / 360
    dx = -np.cos(t) * t + np.random.randn(n, 1) * noise
    dy = np.sin(t) * t + np.random.randn(n, 1) * noise
    return np.vstack((np.hstack((dx, dy)), np.hstack((-dx, -dy)))), np.hstack((np.zeros(n), np.ones(n))).astype(int)

datasets = {
    "Moons": make_moons(n_samples=300, noise=0.15, random_state=SEED),
    "Circles": make_circles(n_samples=300, noise=0.1, factor=0.5, random_state=SEED),
    "Blobs": make_blobs(n_samples=300, centers=2, cluster_std=1.2, random_state=SEED),
    "Spiral": generate_spiral(n=200),
    "Noisy": make_moons(n_samples=300, noise=0.3, random_state=SEED)
}

def train_model(model, optimizer, criterion, X, y, epochs=250):
    Y_oh = np.eye(2)[y]
    history = []
    start_time = time.time()
    for i in range(epochs):
        out = model.forward(X)
        loss = criterion.forward(out, Y_oh)
        history.append(loss)
        model.backward(criterion.backward())
        optimizer.step()
        optimizer.zero_grad()
        # Early stopping proxy: if loss is very low, consider it converged
        if loss < 0.01: break
    
    duration = time.time() - start_time
    preds = np.argmax(model.forward(X), axis=1)
    acc = np.mean(preds == y)
    return acc, loss, i+1, duration

def get_boundary(model, X):
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    Z = np.argmax(model.forward(np.c_[xx.ravel(), yy.ravel()]), axis=1).reshape(xx.shape)
    return xx, yy, Z

## ðŸ§ª Master Experiment: Softmax vs No-Softmax
We run 120 experiments to compare results. We record accuracy, final loss, and convergence speed.

In [None]:
results = []
opts_def = [("SGD", lambda m: SGD(m, lr=0.1)), ("Momentum", lambda m: SGD(m, lr=0.1, momentum=0.9)), ("Adam", lambda m: Adam(m, lr=0.01))]
acts_def = [("ReLU", ReLU), ("GeLU", GeLU), ("Sigmoid", Sigmoid), ("Tanh", Tanh)]
criteria_def = [("CrossEntropy (Softmax)", CrossEntropyLoss), ("MSE (No Softmax)", MSELoss)]

for d_name, (X, y) in datasets.items():
    print(f"Processing Dataset: {d_name}...")
    for c_name, c_cls in criteria_def:
        for o_name, o_fn in opts_def:
            for a_name, a_cls in acts_def:
                model = Sequential(Linear(2, 16), a_cls(), Linear(16, 2))
                opt = o_fn(model)
                crit = c_cls()
                acc, loss, epochs, dur = train_model(model, opt, crit, X, y)
                results.append({
                    "Dataset": d_name, "Loss Function": c_name, "Optimizer": o_name, 
                    "Activation": a_name, "Accuracy": acc, "Final Loss": loss, 
                    "Epochs": epochs, "Time (s)": dur
                })

df = pd.DataFrame(results)
print("âœ… Benchmark Complete.")

## ðŸ“Š Summary Metrics Table
Top 10 performing combinations across all datasets.

In [None]:
summary = df.groupby(["Loss Function", "Optimizer", "Activation"]).agg({"Accuracy": "mean", "Epochs": "mean", "Time (s)": "mean"})
display(summary.sort_values("Accuracy", ascending=False).head(10))

## ðŸŒ€ Convergence Speed Analysis
How many epochs does it take for each optimizer/loss combination to reach steady state?

In [None]:
pivot_speed = df.pivot_table(index="Dataset", columns=["Loss Function", "Optimizer"], values="Epochs", aggfunc="mean")
plt.figure(figsize=(12, 6))
pivot_speed.plot(kind="bar", ax=plt.gca())
plt.title("Convergence Speed (Epochs) per Configuration")
plt.ylabel("Epochs")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.tight_layout()
plt.show()

## ðŸŽ¨ Visual Atlas: Softmax Impact (CrossEntropy vs MSE)
Visualizing the spiral dataset specifically.

In [None]:
d_name = "Spiral"
X, y = datasets[d_name]
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
plt.suptitle(f"Impact of Softmax on Decision Boundaries (Adam Optimizer) - Dataset: {d_name}", fontsize=20, y=1.02)

for r, (c_name, c_cls) in enumerate(criteria_def):
    for c, (a_name, a_cls) in enumerate(acts_def):
        model = Sequential(Linear(2, 16), a_cls(), Linear(16, 2))
        train_model(model, Adam(model, lr=0.01), c_cls(), X, y)
        xx, yy, Z = get_boundary(model, X)
        ax = axes[r, c]
        ax.contourf(xx, yy, Z, alpha=0.5, cmap='Spectral')
        ax.scatter(X[:, 0], X[:, 1], c=y, s=10, cmap='Spectral', edgecolors='k')
        ax.set_title(f"{c_name}\nAct: {a_name}")
        ax.set_xticks([]); ax.set_yticks([])
plt.tight_layout()
plt.show()