### Over Fitting/Under Fitting

In [15]:
import torch

def default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')   
    if torch.backends.mps.is_available():
        return torch.device('mps')
    return torch.device('cpu')

device = default_device()

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [17]:
np.random.seed(32)

In [None]:
num_samples = 100
x = np.random.uniform(-5, 5, (num_samples, 1))
y = x * x + 1 + np.random.normal(0, 1, (num_samples, 1))

x = torch.from_numpy(x).float().to(device)
y = torch.from_numpy(y).float().to(device)

plt.scatter(x.cpu(), y.cpu())
plt.show()

In [20]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=0)
train_dataloader = DataLoader(TensorDataset(train_x, train_y), batch_size=32, shuffle=True)
test_dataloader = DataLoader(TensorDataset(test_x, test_y), batch_size=32, shuffle=False)


In [21]:
# Under fitting
class LinearRegression(nn.Module):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        return self.linear(x)

# well fitting
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(1, 8)
        self.output = nn.Linear(8, 1)

    def forward(self, x):
        x = torch.relu(self.hidden(x))
        return self.output(x)
    
# Over fitting
class MLP2(nn.Module):
    def __init__(self):
        super(MLP2, self).__init__()
        self.hidden1 = nn.Linear(1, 256)
        self.hidden2 = nn.Linear(256, 256)
        self.output = nn.Linear(256, 1)

    def forward(self, x):
        x = torch.relu(self.hidden1(x))
        x = torch.relu(self.hidden2(x))
        return self.output(x)

In [23]:
def plot_error(models, n_epochs, train_dataloader, test_dataloader):
    loss_fn = nn.MSELoss()
    train_losses = []
    test_losses = []

    for model in models:
        model.to(device)
        optimizer = torch.optim.SGD(model.parameters(), lr=0.005)
        criterion = nn.MSELoss()
        train_losses_per_model = []
        test_losses_per_model = []

        for epoch in range(n_epochs):
            model.train()
            train_loss = 0
            for x_batch, y_batch in train_dataloader:
                optimizer.zero_grad()
                y_pred = model(x_batch)
                loss = criterion(y_pred, y_batch)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
            train_losses_per_model.append(train_loss / len(train_dataloader))
            model.eval()
            test_loss = 0
            with torch.no_grad():
                test_loss = sum(criterion(model(x_batch), y_batch).item() for x_batch, y_batch in test_dataloader)
                test_losses_per_model.append(test_loss / len(test_dataloader))
        train_losses.append(train_losses_per_model)
        test_losses.append(test_losses_per_model)
    return train_losses, test_losses


In [None]:
n_epochs = 200
models = [LinearRegression(), MLP(), MLP2()]
train_losses, test_losses = plot_error(models, n_epochs, train_dataloader, test_dataloader)
train_losses = np.array(train_losses)
test_losses = np.array(test_losses)
train_losses, test_losses

In [None]:
for i, model in enumerate(models):
    plt.figure(figsize=(8, 4))
    plt.plot(range(n_epochs), train_losses[i], label=f'Train {model.__class__.__name__}')
    plt.plot(range(n_epochs), test_losses[i], label=f'Test {model.__class__.__name__}')
    plt.legend()
    plt.ylim(0, 200)
    plt.show()