### Libraries:

In [22]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import statistics

### Version

In [3]:
torch.__version__

'2.8.0+cpu'

In [15]:
criterion = nn.BCEWithLogitsLoss()

In [16]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 20073473


### Seeds for reproducibility

In [17]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


### Data Preparation

In [5]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip data.zip

'wget' is not recognized as an internal or external command,
operable program or batch file.
'unzip' is not recognized as an internal or external command,
operable program or batch file.


In [6]:
train_dir = 'data/train'
test_dir = 'data/test'

### Model
For this homework we will use Convolutional Neural Network (CNN). We'll use PyTorch.

You need to develop the model with following structure:

The shape for input should be (3, 200, 200) (channels first format in PyTorch)\
Next, create a convolutional layer (nn.Conv2d):\
Use 32 filters (output channels)\
Kernel size should be (3, 3) (that's the size of the filter)\
Use 'relu' as activation\
Reduce the size of the feature map with max pooling (nn.MaxPool2d)\
Set the pooling size to (2, 2)\
Turn the multi-dimensional result into vectors using flatten or view\
Next, add a nn.Linear layer with 64 neurons and 'relu' activation\
Finally, create the nn.Linear layer with 1 neuron - this will be the output\
The output layer should have an activation - use the appropriate activation for the binary classification case\
As optimizer use torch.optim.SGD with the following parameters:\

torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

In [18]:
class CNNBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        # Use padding=1 to preserve spatial dims before pooling
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  # 200x200 -> 200x200
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)                           # -> 100x100

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # 100x100 -> 100x100
        self.bn2 = nn.BatchNorm2d(64)
        # -> after pool -> 50x50

        # Reduce final spatial map with adaptive pooling to a small fixed size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))  # yields 64 x 4 x 4 => 1024 features

        self.fc1 = nn.Linear(64 * 4 * 4, 64)
        self.dropout = nn.Dropout(p=0.4)   # help generalization
        self.fc2 = nn.Linear(64, 1)        # logits output (no sigmoid here)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)

        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)

        x = self.adaptive_pool(x)
        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        logits = self.fc2(x)
        return logits  # return raw logits

In [19]:
# instantiate model once and move to device
model = CNNBinaryClassifier().to(device)
print(model)

# criterion and optimizer as required
criterion = nn.BCEWithLogitsLoss()  # stable: combines sigmoid+BCEloss internally
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

# Data transforms (base, no augmentation yet)
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
val_transforms = train_transforms

# dataset paths (adjust if different)
train_dir = 'data/train'
test_dir  = 'data/test'

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
validation_dataset = datasets.ImageFolder(root=test_dir, transform=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

CNNBinaryClassifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (adaptive_pool): AdaptiveAvgPool2d(output_size=(4, 4))
  (fc1): Linear(in_features=1024, out_features=64, bias=True)
  (dropout): Dropout(p=0.4, inplace=False)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)


In [20]:
# training loop - record history
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)  # shape (B,1)

        optimizer.zero_grad()
        outputs = model(images)           # logits shape (B,1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

        # compute predicted labels from logits
        probs = torch.sigmoid(outputs)
        predicted = (probs > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    # validation
    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * images.size(0)

            probs = torch.sigmoid(outputs)
            predicted = (probs > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}  Loss: {epoch_loss:.4f}  Acc: {epoch_acc:.4f}  Val Loss: {val_epoch_loss:.4f}  Val Acc: {val_epoch_acc:.4f}")

# After training you can save:
torch.save(model.state_dict(), "cnn_binary_fixed.pth")

Epoch 1/10  Loss: 0.6700  Acc: 0.5575  Val Loss: 0.6561  Val Acc: 0.7015
Epoch 2/10  Loss: 0.6150  Acc: 0.7100  Val Loss: 0.6095  Val Acc: 0.7114
Epoch 3/10  Loss: 0.5634  Acc: 0.7475  Val Loss: 0.5574  Val Acc: 0.7313
Epoch 4/10  Loss: 0.5180  Acc: 0.7725  Val Loss: 0.5515  Val Acc: 0.7711
Epoch 5/10  Loss: 0.4983  Acc: 0.7837  Val Loss: 0.5291  Val Acc: 0.7711
Epoch 6/10  Loss: 0.4795  Acc: 0.7975  Val Loss: 0.5190  Val Acc: 0.7562
Epoch 7/10  Loss: 0.4607  Acc: 0.8063  Val Loss: 0.5104  Val Acc: 0.7711
Epoch 8/10  Loss: 0.4316  Acc: 0.8100  Val Loss: 0.5226  Val Acc: 0.7562
Epoch 9/10  Loss: 0.4407  Acc: 0.8125  Val Loss: 0.5573  Val Acc: 0.7214
Epoch 10/10  Loss: 0.4320  Acc: 0.8213  Val Loss: 0.4864  Val Acc: 0.7711


In [23]:
median_acc = statistics.median(history['acc'])
print("Median training accuracy:", median_acc)

Median training accuracy: 0.7906249999999999


In [24]:
train_losses = history['loss']
std_loss = statistics.stdev(train_losses)

print("Standard deviation of training loss:", std_loss)

Standard deviation of training loss: 0.0817053844190638


In [25]:
# Augmented train transforms (as requested)
aug_train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9,1.0), ratio=(0.9,1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

# Replace the train dataset and loader (keep the same model and optimizer)
train_dataset.transform = aug_train_transforms
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)

# Continue training for 10 more epochs
num_more_epochs = 10
# We'll append new history entries; keep previous history if needed
for epoch in range(num_more_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)

        optimizer.zero_grad()
        outputs = model(images)  # logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        probs = torch.sigmoid(outputs)
        predicted = (probs > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    # validation (test) step
    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item() * images.size(0)
            probs = torch.sigmoid(outputs)
            predicted = (probs > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"[AUG] Epoch {epoch+1}/{num_more_epochs}  Loss: {epoch_loss:.4f}  Acc: {epoch_acc:.4f}  Val Loss: {val_epoch_loss:.4f}  Val Acc: {val_epoch_acc:.4f}")

# After training, metrics for Q5 and Q6:

# Q5: mean of test loss for all epochs (the new 'val_loss' array contains the 
#      losses for the validation set for ALL epochs including previous ones and the newly added ones).
import numpy as np
mean_test_loss_all_epochs = float(np.mean(history['val_loss']))
print("Mean test (val) loss over all recorded epochs:", mean_test_loss_all_epochs)

# Q6: average of test accuracy for the last 5 epochs (from 6 to 10 of the *augmentation training session*).
# If you continued for 10 more epochs, those are the last 10 appended items in history['val_acc'].
# To get last-5 of the augmentation block, do:
aug_block_val_acc = history['val_acc'][-10:]   # last 10 epochs (augmentation block)
avg_last5_aug = float(np.mean(aug_block_val_acc[-5:]))
print("Average val accuracy for last 5 epochs of augmentation block:", avg_last5_aug)

[AUG] Epoch 1/10  Loss: 0.5073  Acc: 0.7750  Val Loss: 0.4694  Val Acc: 0.7811
[AUG] Epoch 2/10  Loss: 0.4956  Acc: 0.7688  Val Loss: 0.4457  Val Acc: 0.8010
[AUG] Epoch 3/10  Loss: 0.4711  Acc: 0.7863  Val Loss: 0.4724  Val Acc: 0.7861
[AUG] Epoch 4/10  Loss: 0.4811  Acc: 0.7863  Val Loss: 0.4429  Val Acc: 0.8010
[AUG] Epoch 5/10  Loss: 0.4755  Acc: 0.7650  Val Loss: 0.4530  Val Acc: 0.7960
[AUG] Epoch 6/10  Loss: 0.4698  Acc: 0.7950  Val Loss: 0.4474  Val Acc: 0.7662
[AUG] Epoch 7/10  Loss: 0.4544  Acc: 0.7887  Val Loss: 0.5863  Val Acc: 0.7363
[AUG] Epoch 8/10  Loss: 0.4711  Acc: 0.7963  Val Loss: 0.5214  Val Acc: 0.7363
[AUG] Epoch 9/10  Loss: 0.4588  Acc: 0.8025  Val Loss: 0.4197  Val Acc: 0.7960
[AUG] Epoch 10/10  Loss: 0.4499  Acc: 0.7975  Val Loss: 0.4936  Val Acc: 0.7562
Mean test (val) loss over all recorded epochs: 0.512553000924599
Average val accuracy for last 5 epochs of augmentation block: 0.7582089552238805
