In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import os

### 1. CIFAR10

In [1]:

# Define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 1)  # Adjusted for 3 input channels, no padding
        self.conv2 = nn.Conv2d(32, 64, 3, 1)  # No padding
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 6 * 6, 128)  # Correct calculation of the flattened size
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)  # Reduces size to 15x15
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)  # Reduces size to 6x6
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


# Load and transform data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training function
def train(model, device, train_loader, optimizer, criterion, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch_idx % 500 == 499:
                print(f'Epoch: {epoch+1}, Batch: {batch_idx+1}, Loss: {running_loss / 500:.6f}')
                running_loss = 0.0

# Testing function
def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy of the model on the test images: {accuracy:.2f}%')

# Execute training and testing
train(model, device, train_loader, optimizer, criterion, num_epochs=10)
test(model, device, test_loader)


Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, Batch: 500, Loss: 2.247912
Epoch: 1, Batch: 1000, Loss: 2.079398
Epoch: 1, Batch: 1500, Loss: 1.990046
Epoch: 1, Batch: 2000, Loss: 1.870743
Epoch: 1, Batch: 2500, Loss: 1.787618
Epoch: 1, Batch: 3000, Loss: 1.735243
Epoch: 1, Batch: 3500, Loss: 1.696775
Epoch: 1, Batch: 4000, Loss: 1.669765
Epoch: 1, Batch: 4500, Loss: 1.636031
Epoch: 1, Batch: 5000, Loss: 1.630080
Epoch: 1, Batch: 5500, Loss: 1.595795
Epoch: 1, Batch: 6000, Loss: 1.512604
Epoch: 1, Batch: 6500, Loss: 1.518107
Epoch: 1, Batch: 7000, Loss: 1.532138
Epoch: 1, Batch: 7500, Loss: 1.479841
Epoch: 1, Batch: 8000, Loss: 1.533455
Epoch: 1, Batch: 8500, Loss: 1.453073
Epoch: 1, Batch: 9000, Loss: 1.477682
Epoch: 1, Batch: 9500, Loss: 1.442765
Epoch: 1, Batch: 10000, Loss: 1.453797
Epoch: 1, Batch: 10500, Loss: 1.379644
Epoch: 1, Batch: 11000, Loss: 1.399510
Epoch: 1, Batch: 11500, Loss: 1.388594
Epoch: 1, Batch: 12000, Loss: 1.391164
Epoch: 1

### Task 2.1

In [None]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()
# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Model setup
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
model.features[0].weight = nn.init.kaiming_normal_(model.features[0].weight, mode='fan_out', nonlinearity='relu')

# Optimizer and loss
optimizer = optim.Adam(model.parameters(), lr=2e-4, betas=(0.5, 0.999))
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


Files already downloaded and verified
Files already downloaded and verified




Epoch: 1, Loss: 1.624929
Epoch: 2, Loss: 1.057229
Epoch: 3, Loss: 0.780996
Epoch: 4, Loss: 0.590915
Epoch: 5, Loss: 0.433436
Epoch: 6, Loss: 0.319630
Epoch: 7, Loss: 0.229362
Epoch: 8, Loss: 0.178540
Epoch: 9, Loss: 0.139547
Epoch: 10, Loss: 0.125846
Epoch: 11, Loss: 0.105844
Epoch: 12, Loss: 0.090893
Epoch: 13, Loss: 0.084409
Epoch: 14, Loss: 0.082470
Epoch: 15, Loss: 0.077677
Epoch: 16, Loss: 0.068802
Epoch: 17, Loss: 0.069172
Epoch: 18, Loss: 0.065412
Epoch: 19, Loss: 0.065732
Epoch: 20, Loss: 0.058172
Epoch: 21, Loss: 0.056878
Epoch: 22, Loss: 0.062114
Epoch: 23, Loss: 0.056140


### Task 2.1 b

In [None]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Model setup with Batch Normalization
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_bn = torchvision.models.vgg11_bn(pretrained=False, num_classes=10).to(device)  # Switch to vgg11_bn
model_bn.features[0].weight = nn.init.kaiming_normal_(model_bn.features[0].weight, mode='fan_out', nonlinearity='relu')

# Optimizer and loss
optimizer = optim.Adam(model_bn.parameters(), lr=2e-4, betas=(0.5, 0.999))
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_bn, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_bn, device, test_loader)


# Plotting and Saving the Plot
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


## Task 2.1c

In [None]:

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Replace ReLU with LeakyReLU in the model
def replace_relu_with_leaky(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, nn.LeakyReLU(negative_slope=0.01))
        elif isinstance(child, nn.Sequential):
            for sub_child_name, sub_child in child.named_children():
                if isinstance(sub_child, nn.ReLU):
                    setattr(child, sub_child_name, nn.LeakyReLU(negative_slope=0.01))
                replace_relu_with_leaky(sub_child)

# Model setup with LeakyReLU
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_leaky_relu = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
replace_relu_with_leaky(model_leaky_relu)
model_leaky_relu.features[0].weight = nn.init.kaiming_normal_(model_leaky_relu.features[0].weight, mode='fan_out', nonlinearity='leaky_relu')

# Optimizer and loss
optimizer = optim.Adam(model_leaky_relu.parameters(), lr=2e-4, betas=(0.5, 0.999))
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_leaky_relu, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_leaky_relu, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.show()

## Task 2.2 a

In [None]:

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Replace ReLU with LeakyReLU in the model
def replace_relu_with_leaky(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, nn.LeakyReLU(negative_slope=0.01))
        elif isinstance(child, nn.Sequential):
            for sub_child_name, sub_child in child.named_children():
                if isinstance(sub_child, nn.ReLU):
                    setattr(child, sub_child_name, nn.LeakyReLU(negative_slope=0.01))
                replace_relu_with_leaky(sub_child)

# Model setup with LeakyReLU
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_leaky_relu = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
replace_relu_with_leaky(model_leaky_relu)
model_leaky_relu.features[0].weight = nn.init.kaiming_normal_(model_leaky_relu.features[0].weight, mode='fan_out', nonlinearity='leaky_relu')

# Optimizer and loss - Change from Adam to SGD
optimizer = optim.SGD(model_leaky_relu.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_leaky_relu, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_leaky_relu, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.show()

## Task 2.2b

In [None]:

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading with adjusted batch size
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)  # Changed batch size to 256

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=2)  # Changed batch size to 256

# Replace ReLU with LeakyReLU in the model
def replace_relu_with_leaky(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, nn.LeakyReLU(negative_slope=0.01))
        elif isinstance(child, nn.Sequential):
            for sub_child_name, sub_child in child.named_children():
                if isinstance(sub_child, nn.ReLU):
                    setattr(child, sub_child_name, nn.LeakyReLU(negative_slope=0.01))
                replace_relu_with_leaky(sub_child)

# Model setup with LeakyReLU
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_leaky_relu = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
replace_relu_with_leaky(model_leaky_relu)
model_leaky_relu.features[0].weight = nn.init.kaiming_normal_(model_leaky_relu.features[0].weight, mode='fan_out', nonlinearity='leaky_relu')

# Optimizer and loss - Using SGD as previously set
optimizer = optim.SGD(model_leaky_relu.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_leaky_relu, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_leaky_relu, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.show()

## Task 2.2c

In [None]:

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=2)

# Replace ReLU with LeakyReLU in the model and apply Xavier Uniform initialization
def replace_relu_with_leaky_and_init_xavier(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, nn.LeakyReLU(negative_slope=0.01))
        elif isinstance(child, nn.Sequential):
            for sub_child_name, sub_child in child.named_children():
                if isinstance(sub_child, nn.ReLU):
                    setattr(child, sub_child_name, nn.LeakyReLU(negative_slope=0.01))
                replace_relu_with_leaky_and_init_xavier(sub_child)
        if isinstance(child, nn.Conv2d):
            nn.init.xavier_uniform_(child.weight)

# Model setup with LeakyReLU and Xavier Uniform initialization
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_xavier = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
replace_relu_with_leaky_and_init_xavier(model_xavier)

# Optimizer and loss - Using SGD as previously set
optimizer = optim.SGD(model_xavier.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_xavier, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_xavier, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.show()

## Task 2.2d

In [None]:

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

torch.cuda.empty_cache()

# Data loading
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(testset, batch_size=256, shuffle=False, num_workers=2)

# Replace ReLU with LeakyReLU, apply Xavier Uniform initialization, and disable dropout
def modify_model(model):
    for child_name, child in model.named_children():
        if isinstance(child, nn.ReLU):
            setattr(model, child_name, nn.LeakyReLU(negative_slope=0.01))
        elif isinstance(child, nn.Sequential):
            for sub_child_name, sub_child in child.named_children():
                if isinstance(sub_child, nn.ReLU):
                    setattr(child, sub_child_name, nn.LeakyReLU(negative_slope=0.01))
                if isinstance(sub_child, nn.Dropout):
                    setattr(child, sub_child_name, nn.Dropout(p=0))  # Disable dropout
                modify_model(sub_child)
        if isinstance(child, nn.Conv2d):
            nn.init.xavier_uniform_(child.weight)

# Model setup with modifications
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
torch.cuda.memory_summary(device=None, abbreviated=False)
model_no_dropout = torchvision.models.vgg11(pretrained=False, num_classes=10).to(device)
modify_model(model_no_dropout)

# Optimizer and loss - Using SGD as previously set
optimizer = optim.SGD(model_no_dropout.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, device, train_loader, optimizer, criterion, num_epochs):
    model.train()
    train_losses = []
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, targets) in enumerate(train_loader):
            data, targets = data.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        train_losses.append(running_loss / len(train_loader))
        print(f'Epoch: {epoch+1}, Loss: {running_loss / len(train_loader):.6f}')
    return train_losses

def test(model, device, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Run training and testing
num_epochs = 100
train_losses = train(model_no_dropout, device, train_loader, optimizer, criterion, num_epochs)
test_accuracy = test(model_no_dropout, device, test_loader)

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(range(num_epochs), train_losses, label='Train Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Over Time')
plt.legend()
plt.show()