### Regularization

In [None]:
import torch

def default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')   
    if torch.backends.mps.is_available():
        return torch.device('mps')
    return torch.device('cpu')

device = default_device()

In [None]:
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np


In [None]:
torch.manual_seed(42)
n_samples = 20
hidden_size = 200
n_epochs = 500

In [None]:
x_train = torch.unsqueeze(torch.linspace(-1, 1, n_samples), 1).to(device)
y_train = x_train + 0.3 * torch.randn(n_samples, 1).to(device)

x_test = torch.unsqueeze(torch.linspace(-1, 1, n_samples), 1).to(device)
y_test = x_test + 0.3 * torch.randn(n_samples, 1).to(device)

plt.scatter(x_train.cpu(), y_train.cpu(), c='r', alpha=0.5, label='train')
plt.scatter(x_test.cpu(), y_test.cpu(), c='b', alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim(-2, 2)
plt.show()

In [None]:
net_overfit = nn.Sequential(
    nn.Linear(1, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, 1)
).to(device)

net_dropout = nn.Sequential(
    nn.Linear(1, hidden_size),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.Dropout(0.5),
    nn.ReLU(),
    nn.Linear(hidden_size, 1)
).to(device)


In [None]:
optimizer_overfit = torch.optim.Adam(net_overfit.parameters(), lr=0.01)
optimizer_dropout = torch.optim.Adam(net_dropout.parameters(), lr=0.01)
criterion = nn.MSELoss()

for i in range(n_epochs):
    pred_overfit = net_overfit(x_train)
    loss_overfit = criterion(pred_overfit, y_train)
    optimizer_overfit.zero_grad()
    loss_overfit.backward()
    optimizer_overfit.step()

    pred_dropout = net_dropout(x_train)
    loss_dropout = criterion(pred_dropout, y_train)
    optimizer_dropout.zero_grad()
    loss_dropout.backward()
    optimizer_dropout.step()

In [None]:
net_overfit.eval()
net_dropout.eval()

test_pred_overfit = net_overfit(x_test)
test_dropout_overfit = net_dropout(x_test)

plt.scatter(x_train.cpu(), y_train.cpu(), c='r', alpha=0.5, label='train')
plt.scatter(x_test.cpu(), y_test.cpu(), c='b', alpha=0.5, label='test')
plt.plot(x_test.cpu(), test_pred_overfit.cpu().detach(), 'r', label='overfit')
plt.plot(x_test.cpu(), test_dropout_overfit.cpu().detach(), 'g--', label='dropout')
plt.legend(loc='upper left')
plt.ylim(-2, 2)
plt.show()
