# <center>Lab Sheet 7</center> <center>Generalization, Pruning, and Cross-Validation</center>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim

np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x797ae9d76330>

# Dataset: 2D Moons (small + non-linear, good for demo)

In [2]:
X, y = make_moons(n_samples=500, noise=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_val_t   = torch.tensor(X_val, dtype=torch.float32)
y_val_t   = torch.tensor(y_val, dtype=torch.long)

**Q1.Train MLP on a small dataset with 5-fold cross-validation.**

In [3]:
class SimpleMLP(nn.Module):
    def __init__(self, hidden=16, dropout=0.0):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(2, hidden),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden, 2)
        )
    def forward(self,x):
        return self.net(x)

def train_eval(model, Xtr, ytr, Xval, yval, epochs=100, lr=0.01):
    opt = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    train_losses, val_accs = [], []
    for ep in range(epochs):
        # train
        opt.zero_grad()
        out = model(Xtr)
        loss = loss_fn(out, ytr)
        loss.backward()
        opt.step()
        train_losses.append(loss.item())

        # validation accuracy
        with torch.no_grad():
            val_pred = model(Xval).argmax(1)
            acc = accuracy_score(yval, val_pred.numpy())
            val_accs.append(acc)
    return train_losses, val_accs

kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_accs = []
for fold,(tr,val) in enumerate(kf.split(X,y)):
    Xtr, Xv = torch.tensor(X[tr],dtype=torch.float32), torch.tensor(X[val],dtype=torch.float32)
    ytr, yv = torch.tensor(y[tr],dtype=torch.long), torch.tensor(y[val],dtype=torch.long)
    model = SimpleMLP()
    _, val_acc = train_eval(model,Xtr,ytr,Xv,yv,epochs=200)
    cv_accs.append(val_acc[-1])
    print(f"Fold {fold+1}: final val acc={val_acc[-1]:.3f}")
print("Q1: 5-fold CV mean accuracy:", np.mean(cv_accs))

Fold 1: final val acc=0.970
Fold 2: final val acc=0.980
Fold 3: final val acc=0.980
Fold 4: final val acc=0.990
Fold 5: final val acc=0.930
Q1: 5-fold CV mean accuracy: 0.97


**2. Add dropout layers to improve generalization — compare accuracy.**

In [4]:
drop_model = SimpleMLP(dropout=0.3)
train_loss, val_acc = train_eval(drop_model, X_train_t,y_train_t,X_val_t,y_val_t,epochs=200)
print("Q2: Final validation acc with dropout:", val_acc[-1])

plt.figure()
plt.plot(val_acc,label="Dropout=0.3")
plt.xlabel("Epoch"); plt.ylabel("Validation Accuracy")
plt.title("Q2: Dropout Effect")
plt.legend()
plt.savefig("lab7_q2_dropout.png",dpi=150,bbox_inches="tight"); plt.close()

Q2: Final validation acc with dropout: 0.88


**3. Implement weight pruning (zeroing low-magnitude weights) and re-train.**

In [5]:
prune_model = SimpleMLP()
_ , base_acc = train_eval(prune_model,X_train_t,y_train_t,X_val_t,y_val_t,epochs=200)
print("Before pruning val acc:", base_acc[-1])

# prune small weights
with torch.no_grad():
    for p in prune_model.parameters():
        mask = torch.abs(p) < 0.05
        p[mask] = 0.0

_ , pruned_acc = train_eval(prune_model,X_train_t,y_train_t,X_val_t,y_val_t,epochs=200)
print("After pruning + retrain val acc:", pruned_acc[-1])

Before pruning val acc: 0.96
After pruning + retrain val acc: 0.99


**4. Use a validation set to choose model hyperparameters (early stopping).**

In [6]:
early_model = SimpleMLP()
opt = optim.Adam(early_model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
best_acc, patience, counter = 0, 20, 0
history = []
for ep in range(300):
    opt.zero_grad()
    out = early_model(X_train_t)
    loss = loss_fn(out,y_train_t)
    loss.backward()
    opt.step()

    with torch.no_grad():
        acc = accuracy_score(y_val, early_model(X_val_t).argmax(1).numpy())
        history.append(acc)
        if acc > best_acc:
            best_acc, counter = acc, 0
        else:
            counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {ep}, best val acc={best_acc:.3f}")
            break

plt.figure()
plt.plot(history)
plt.title("Q4: Early Stopping (Val Accuracy)")
plt.xlabel("Epoch"); plt.ylabel("Accuracy")
plt.savefig("lab7_q4_early_stopping.png",dpi=150,bbox_inches="tight"); plt.close()

Early stopping at epoch 30, best val acc=0.870


**5. Visualize overfitting vs generalization with increasing model size.**

In [7]:
hidden_sizes = [4,16,64,128]
train_accs, val_accs = [], []
for h in hidden_sizes:
    model = SimpleMLP(hidden=h)
    _, val_acc = train_eval(model,X_train_t,y_train_t,X_val_t,y_val_t,epochs=300)
    with torch.no_grad():
        tr_pred = model(X_train_t).argmax(1)
        train_acc = accuracy_score(y_train,tr_pred.numpy())
    train_accs.append(train_acc)
    val_accs.append(val_acc[-1])

plt.figure()
plt.plot(hidden_sizes,train_accs,"-o",label="Train Acc")
plt.plot(hidden_sizes,val_accs,"-o",label="Val Acc")
plt.xlabel("Hidden Layer Size")
plt.ylabel("Accuracy")
plt.title("Q5: Overfitting vs Generalization")
plt.legend()
plt.savefig("lab7_q5_overfitting.png",dpi=150,bbox_inches="tight"); plt.close()

print("Q5 Results: Train accs:", train_accs)
print("Q5 Results: Val accs:", val_accs)

Q5 Results: Train accs: [0.865, 0.98, 0.9875, 0.99]
Q5 Results: Val accs: [0.86, 0.98, 0.98, 0.98]
