In [None]:
import numpy as np
import torch
import torch.nn as nn
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelBinarizer

# --- פונקציות עזר (לפי פרק 11) ---
def sigmoid(z):
    return 1. / (1. + np.exp(-z))

def int_to_onehot(y, num_labels):
    ary = np.zeros((y.shape[0], num_labels))
    for i, val in enumerate(y):
        ary[i, val] = 1
    return ary

def minibatch_generator(X, y, minibatch_size):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    for start_idx in range(0, indices.shape[0], minibatch_size):
        batch_idx = indices[start_idx:start_idx + minibatch_size]
        yield X[batch_idx], y[batch_idx]

# --- 1. המודל המקורי (1 HL) - Section 3 ---
class NeuralNetMLP_1HL:
    def __init__(self, num_features, num_hidden, num_classes):
        rng = np.random.RandomState(123)
        self.num_classes = num_classes
        self.weight_h = rng.normal(0., 0.1, (num_hidden, num_features))
        self.bias_h = np.zeros(num_hidden)
        self.weight_out = rng.normal(0., 0.1, (num_classes, num_hidden))
        self.bias_out = np.zeros(num_classes)

    def forward(self, x):
        z_h = np.dot(x, self.weight_h.T) + self.bias_h
        a_h = sigmoid(z_h)
        z_out = np.dot(a_h, self.weight_out.T) + self.bias_out
        a_out = sigmoid(z_out)
        return a_h, a_out

    def backward(self, x, a_h, a_out, y):
        y_onehot = int_to_onehot(y, self.num_classes)
        d_loss_a_out = 2.*(a_out - y_onehot) / y.shape[0]
        delta_out = d_loss_a_out * a_out * (1. - a_out)
        grad_w_out = np.dot(delta_out.T, a_h)
        grad_b_out = np.sum(delta_out, axis=0)
        delta_h = np.dot(delta_out, self.weight_out) * a_h * (1. - a_h)
        grad_w_h = np.dot(delta_h.T, x)
        grad_b_h = np.sum(delta_h, axis=0)
        return grad_w_out, grad_b_out, grad_w_h, grad_b_h

# --- 2. המודל המורחב (2 HL) - הפתרון הנדרש במטלה ---
class NeuralNetMLP_2HL:
    def __init__(self, num_features, num_hidden, num_classes):
        rng = np.random.RandomState(123)
        self.num_classes = num_classes
        self.w1 = rng.normal(0, 0.1, (num_hidden, num_features))
        self.b1 = np.zeros(num_hidden)
        self.w2 = rng.normal(0, 0.1, (num_hidden, num_hidden))
        self.b2 = np.zeros(num_hidden)
        self.w_out = rng.normal(0, 0.1, (num_classes, num_hidden))
        self.b_out = np.zeros(num_classes)

    def forward(self, x):
        a1 = sigmoid(np.dot(x, self.w1.T) + self.b1)
        a2 = sigmoid(np.dot(a1, self.w2.T) + self.b2)
        out = sigmoid(np.dot(a2, self.w_out.T) + self.b_out)
        return a1, a2, out

    def backward(self, x, a1, a2, out, y):
        y_onehot = int_to_onehot(y, self.num_classes)
        d_out = (2.*(out - y_onehot) / y.shape[0]) * (out * (1. - out))
        g_w_out, g_b_out = np.dot(d_out.T, a2), np.sum(d_out, axis=0)
        d_h2 = np.dot(d_out, self.w_out) * (a2 * (1. - a2))
        g_w_h2, g_b_h2 = np.dot(d_h2.T, a1), np.sum(d_h2, axis=0)
        d_h1 = np.dot(d_h2, self.w2) * (a1 * (1. - a1))
        g_w_h1, g_b_h1 = np.dot(d_h1.T, x), np.sum(d_h1, axis=0)
        return g_w_out, g_b_out, g_w_h2, g_b_h2, g_w_h1, g_b_h1

# --- שלב 1: טעינה ועיבוד נתונים (70/30 Split) ---
print("--- Step 1: Loading MNIST Dataset ---")
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
X, y = ((X / 255.0) - 0.5) * 2.0, y.astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)
print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")

# --- פונקציית הרצת אימון ---
def run_training(model, name, is_2hl=False):
    print(f"\n--- Training {name} ---")
    lr = 0.1
    for e in range(20):
        gen = minibatch_generator(X_train, y_train, 100)
        costs = []
        for xi, yi in gen:
            if not is_2hl:
                ah, out = model.forward(xi)
                gw_o, gb_o, gw_h, gb_h = model.backward(xi, ah, out, yi)
                model.weight_out -= lr*gw_o; model.bias_out -= lr*gb_o
                model.weight_h -= lr*gw_h; model.bias_h -= lr*gb_h
                costs.append(np.mean((int_to_onehot(yi, 10) - out)**2))
            else:
                a1, a2, out = model.forward(xi)
                gw_o, gb_o, gw_h2, gb_h2, gw_h1, gb_h1 = model.backward(xi, a1, a2, out, yi)
                model.w_out -= lr*gw_o; model.b_out -= lr*gb_o
                model.w2 -= lr*gw_h2; model.b2 -= lr*gb_h2
                model.w1 -= lr*gw_h1; model.b1 -= lr*gb_h1
                costs.append(np.mean((int_to_onehot(yi, 10) - out)**2))

        if (e+1) % 5 == 0:
            print(f"Epoch {e+1:03d}/020 | Avg MSE: {np.mean(costs):.4f}")

    if not is_2hl: _, probas = model.forward(X_test)
    else: _, _, probas = model.forward(X_test)
    auc = roc_auc_score(LabelBinarizer().fit_transform(y_test), probas, average='macro', multi_class='ovr')
    return auc

# הרצת האימונים
auc_1hl = run_training(NeuralNetMLP_1HL(784, 50, 10), "Original (Section 3) 1-HL")
auc_2hl = run_training(NeuralNetMLP_2HL(784, 50, 10), "Extended (Assignment) 2-HL", is_2hl=True)

# --- מודל PyTorch (Framework) להשוואה ---
print("\n--- Training PyTorch (Framework) ---")
pt_model = nn.Sequential(nn.Linear(784, 50), nn.Sigmoid(), nn.Linear(50, 50), nn.Sigmoid(), nn.Linear(50, 10), nn.Sigmoid())
opt = torch.optim.SGD(pt_model.parameters(), lr=0.1)
for e in range(20):
    gen = minibatch_generator(X_train, y_train, 100)
    for xi, yi in gen:
        opt.zero_grad()
        loss = nn.MSELoss()(pt_model(torch.FloatTensor(xi)), torch.FloatTensor(int_to_onehot(yi, 10)))
        loss.backward(); opt.step()
    if (e+1) % 5 == 0:
        print(f"PyTorch Epoch {e+1:03d}/020 complete.")

with torch.no_grad():
    pt_p = pt_model(torch.FloatTensor(X_test)).numpy()
    auc_pt = roc_auc_score(LabelBinarizer().fit_transform(y_test), pt_p, average='macro', multi_class='ovr')

print(f"\n{'='*30}\nFINAL COMPARISON RESULTS\n{'='*30}")
print(f"1. Original Code (1-HL):   AUC = {auc_1hl:.4f}")
print(f"2. Extended Code (2-HL):   AUC = {auc_2hl:.4f}")
print(f"3. PyTorch Implementation: AUC = {auc_pt:.4f}")

--- Step 1: Loading MNIST Dataset ---
Train size: 49000, Test size: 21000

--- Training Original (Section 3) 1-HL ---
Epoch 005/020 | Avg MSE: 0.0208
Epoch 010/020 | Avg MSE: 0.0152
Epoch 015/020 | Avg MSE: 0.0131
