In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Check if GPU is available and use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# Set up training parameters
batch_size = 64
learning_rate = 0.01
weight_decay = 1e-4  # L2 regularization parameter

# Load the dataset
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())
# Note that we purposefully limit the number of training data to overfit the model
train_data, val_data = train_test_split(train_dataset, test_size=0.99, random_state=42)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)


# Initialize the model, loss function, and optimizers (with and without L2 regularization)
model_with_l2 = SimpleCNN().to(device)
model_without_l2 = SimpleCNN().to(device)
criterion = nn.NLLLoss()
optimizer_with_l2 = optim.SGD(model_with_l2.parameters(), lr=learning_rate, weight_decay=weight_decay)
optimizer_without_l2 = optim.SGD(model_without_l2.parameters(), lr=learning_rate, weight_decay=0)

# Training loop for both optimizers
num_epochs = 500
train_losses_with_l2 = []
val_losses_with_l2 = []
train_losses_without_l2 = []
val_losses_without_l2 = []

# Training with L2 Regularization
model_with_l2.train()
for epoch in tqdm(range(num_epochs), desc="L2 Reg Model Epoch Progress", position=0):
    total_train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer_with_l2.zero_grad()  # Zero the gradients
        output = model_with_l2(data)  # Forward pass
        loss = criterion(output, target)  # Compute the loss
        loss.backward()  # Backpropagate the gradients
        optimizer_with_l2.step()  # Update the weights
        total_train_loss += loss.item()
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses_with_l2.append(avg_train_loss)
    #print(f'Epoch {epoch + 1} [With L2]: Train Loss: {avg_train_loss:.6f}')

    # Validation loss
    model_with_l2.eval()
    total_val_loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model_with_l2(data)
            loss = criterion(output, target)
            total_val_loss += loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses_with_l2.append(avg_val_loss)
    #print(f'Epoch {epoch + 1} [With L2]: Validation Loss: {avg_val_loss:.6f}')

# Training without L2 Regularization
model_without_l2.train()
for epoch in tqdm(range(num_epochs), desc="Unreg Model Epoch Progress", position=0):
    total_train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer_without_l2.zero_grad()  # Zero the gradients
        output = model_without_l2(data)  # Forward pass
        loss = criterion(output, target)  # Compute the loss
        loss.backward()  # Backpropagate the gradients
        optimizer_without_l2.step()  # Update the weights
        total_train_loss += loss.item()
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses_without_l2.append(avg_train_loss)
    #print(f'Epoch {epoch + 1} [Without L2]: Train Loss: {avg_train_loss:.6f}')

    # Validation loss
    model_without_l2.eval()
    total_val_loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model_without_l2(data)
            loss = criterion(output, target)
            total_val_loss += loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses_without_l2.append(avg_val_loss)
    #print(f'Epoch {epoch + 1} [Without L2]: Validation Loss: {avg_val_loss:.6f}')

        


In [None]:
# Plotting Train and Validation Loss vs Epochs for both cases
plt.plot(range(1, num_epochs + 1), train_losses_with_l2, label='Train Loss With L2 Regularization')
plt.plot(range(1, num_epochs + 1), val_losses_with_l2, label='Validation Loss With L2 Regularization')
plt.plot(range(1, num_epochs + 1), train_losses_without_l2, label='Train Loss Without L2 Regularization')
plt.plot(range(1, num_epochs + 1), val_losses_without_l2, label='Validation Loss Without L2 Regularization')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train and Validation Loss vs Epochs With and Without L2 Regularization')
plt.legend()
plt.grid(True)
plt.show()



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = self.dropout(x)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = self.dropout(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# Set up training parameters
batch_size = 64
learning_rate = 0.01
weight_decay = 1e-4  # L2 regularization parameter

# Load the dataset
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())
train_data, val_data = train_test_split(train_dataset, test_size=0.99, random_state=42)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Check if GPU is available and use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Training loop
num_epochs = 500
train_losses = []
val_losses = []

for epoch in tqdm(range(num_epochs), desc="Epoch Progress", position=0):
    model.train()
    total_train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # Zero the gradients
        output = model(data)  # Forward pass
        loss = criterion(output, target)  # Compute the loss
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update the weights
        total_train_loss += loss.item()
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    #print(f'Epoch {epoch + 1}: Train Loss: {avg_train_loss:.6f}')

    # Validation loss
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_val_loss += loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    #print(f'Epoch {epoch + 1}: Validation Loss: {avg_val_loss:.6f}')

# Plotting Train and Validation Loss vs Epochs
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train and Validation Loss vs Epochs with Dropout')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = self.dropout(x)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = self.dropout(x)
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# Set up training parameters
batch_size = 64
learning_rate = 0.01
weight_decay = 1e-4  # L2 regularization parameter
patience = 3  # Early stopping patience

# Load the dataset
train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor())
train_data, val_data = train_test_split(train_dataset, test_size=0.95, random_state=42)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False)

# Check if GPU is available and use it
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Training loop with Early Stopping
num_epochs = 500
train_losses = []
val_losses = []
min_val_loss = np.inf
patience_counter = 0

for epoch in tqdm(range(num_epochs), desc="Epoch Progress", position=0):
    model.train()
    total_train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # Zero the gradients
        output = model(data)  # Forward pass
        loss = criterion(output, target)  # Compute the loss
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update the weights
        total_train_loss += loss.item()
    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    #print(f'Epoch {epoch + 1}: Train Loss: {avg_train_loss:.6f}')

    # Validation loss
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_val_loss += loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    #print(f'Epoch {epoch + 1}: Validation Loss: {avg_val_loss:.6f}')

    # Early stopping check
    if avg_val_loss < min_val_loss:
        min_val_loss = avg_val_loss
        patience_counter = 0
        best_model_state = model.state_dict()
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping triggered after {epoch + 1} epochs.')
            break

# Load the best model state (if early stopping was triggered)
model.load_state_dict(best_model_state)

# Plotting Train and Validation Loss vs Epochs
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Train and Validation Loss vs Epochs with Dropout and Early Stopping')
plt.legend()
plt.grid(True)
plt.show()