In [None]:
# Apply the suggested random number generation process set by the question for reproduceability sake:

import numpy as np
import torch

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [2]:
# Install torchsummary if not available (for Google Colab)
try:
    from torchsummary import summary
except ImportError:
    print("Installing torchsummary...")
    %pip install torchsummary -q
    from torchsummary import summary


In [3]:
# Import necessary libraries
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
import numpy as np
from tqdm import tqdm
from PIL import Image

# Try to import torchsummary, but it's optional
try:
    from torchsummary import summary
    HAS_TORCHSUMMARY = True
except ImportError:
    HAS_TORCHSUMMARY = False
    print("torchsummary not available. Install with: pip install torchsummary")



## Model Architecture

For this homework we will use Convolutional Neural Network (CNN). We'll use PyTorch.

Model structure:
* The shape for input should be `(3, 200, 200)` (channels first format in PyTorch)
* Next, create a convolutional layer (`nn.Conv2d`):
    * Use 32 filters (output channels)
    * Kernel size should be `(3, 3)` (that's the size of the filter)
    * Use `'relu'` as activation
* Reduce the size of the feature map with max pooling (`nn.MaxPool2d`)
    * Set the pooling size to `(2, 2)`
* Turn the multi-dimensional result into vectors using `flatten` or `view`
* Next, add a `nn.Linear` layer with 64 neurons and `'relu'` activation
* Finally, create the `nn.Linear` layer with 1 neuron - this will be the output
    * The output layer should have an activation - use the appropriate activation for the binary classification case

As optimizer use `torch.optim.SGD` with the following parameters:
* `torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)`


In [4]:
# Define the CNN model
class HairClassifierMobileNet(nn.Module):
    def __init__(self):
        super(HairClassifierMobileNet, self).__init__()

        # Convolutional layer: (3, 200, 200) -> (32, 198, 198)
        # padding=0, stride=1 (default)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3), padding=0, stride=1)
        self.relu1 = nn.ReLU()

        # Max pooling: (32, 198, 198) -> (32, 99, 99)
        self.pool = nn.MaxPool2d(kernel_size=(2, 2))

        # Flatten: (32, 99, 99) -> (32 * 99 * 99,)
        # First linear layer: 32 * 99 * 99 -> 64
        self.fc1 = nn.Linear(32 * 99 * 99, 64)
        self.relu2 = nn.ReLU()

        # Output layer: 64 -> 1 (binary classification)
        self.fc2 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Convolution + ReLU
        x = self.conv1(x)
        x = self.relu1(x)

        # Max pooling
        x = self.pool(x)

        # Flatten
        x = x.view(x.size(0), -1)

        # First fully connected layer + ReLU
        x = self.fc1(x)
        x = self.relu2(x)

        # Output layer + Sigmoid
        x = self.fc2(x)
        x = self.sigmoid(x)

        return x


In [5]:
# Define HairDataset

class HairDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

## Question 1: Which loss function you will use?

For binary classification with sigmoid activation, we use **BCELoss** (Binary Cross Entropy Loss).

Answer: **nn.BCELoss()** (or **nn.BCEWithLogitsLoss()** if we didn't use sigmoid)


In [6]:
# Loss function for binary classification
criterion = nn.BCELoss()


## Question 2: Total number of parameters


In [7]:
# Question 2: Total number of parameters
# Create model first to count parameters
model_temp = HairClassifierMobileNet()

# Method 1: Using torchsummary (if available)
if HAS_TORCHSUMMARY:
    try:
        summary(model_temp, (3, 200, 200))
    except Exception as e:
        print(f"Error using torchsummary: {e}")
        print("Counting manually...")
else:
    print("torchsummary not available, counting manually...")

# Method 2: Count manually
total_params = sum(p.numel() for p in model_temp.parameters())
trainable_params = sum(p.numel() for p in model_temp.parameters() if p.requires_grad)

print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Error using torchsummary: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
Counting manually...

Total parameters: 20,073,473
Trainable parameters: 20,073,473


## Data Preparation

We'll use the **straight/curly hair** dataset from
`http://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip`.

Once unzipped, the dataset has the following folder structure:
- `data/train/`
- `data/test/`

Each split contains two subfolders (e.g. `curly/` and `straight/`) with images.
We'll:
- Download and unzip the dataset (if not already present)
- Resize images to 200x200 as required
- Keep it as a **binary** problem (curly vs straight).


In [8]:
# Download and unzip straight/curly hair dataset if not already present
import urllib.request
import zipfile

DATA_URL = "http://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip"
DATA_ZIP_PATH = "./data.zip"
DATA_DIR = "./data"

if not os.path.exists(DATA_DIR):
    print("Downloading dataset...")
    urllib.request.urlretrieve(DATA_URL, DATA_ZIP_PATH)
    print("Unzipping dataset...")
    with zipfile.ZipFile(DATA_ZIP_PATH, "r") as zip_ref:
        zip_ref.extractall(".")
    print("Done!")
else:
    print("Dataset already present, skipping download.")

Downloading dataset...
Unzipping dataset...
Done!


## Generators and Training
- We don't need to do any additional pre-processing for the images.
- Use batch_size=20
- Use shuffle=True for both training, but False for test.

Now fit the model.

In [9]:
# @title
# Path to straight/curly hair dataset (after unzipping data.zip)
data_dir = "./data"

# Define transforms for training (without augmentation initially)
input_size = 200

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    ) # ImageNet normalization
])

test_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Load dataset splits using ImageFolder
# Expected structure:
# ./data/train/curly
# ./data/train/straight
# ./data/test/curly
# ./data/test/straight
train_dir = os.path.join(data_dir, "train")
test_dir = os.path.join(data_dir, "test")

#train_dataset = ImageFolder(train_dir, transform=transform_train)
#test_dataset = ImageFolder(test_dir, transform=transform_test)

#print(f"Classes: {train_dataset.classes}")

# Since this dataset is already binary (curly vs straight),
# we don't need to remap labels; ImageFolder will give 0/1 labels.

# Create data loaders
#train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
#test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

#print(f"Training samples: {len(train_dataset)}")
#print(f"Test samples: {len(test_dataset)}")


In [10]:
from torch.utils.data import DataLoader

train_dataset = HairDataset(
    data_dir='./data/train',
    transform=train_transforms
)

test_dataset = HairDataset(
    data_dir='./data/test',
    transform=test_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)

In [11]:
# Model is already defined in cell 4 above
# Create model instance for training

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create model instance
model = HairClassifierMobileNet()
model.to(device)

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.8)


In [None]:
# Fitting model (using code from homework requirements)
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification, apply sigmoid to outputs before thresholding for accuracy
        # (Note: model already has sigmoid, but outputs are already sigmoided)
        predicted = (outputs > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (outputs > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(test_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")


Epoch 1/10, Loss: 0.6679, Acc: 0.6230, Val Loss: 0.6749, Val Acc: 0.6617
Epoch 2/10, Loss: 0.5356, Acc: 0.7228, Val Loss: 0.7888, Val Acc: 0.5672
Epoch 3/10, Loss: 0.5513, Acc: 0.6979, Val Loss: 0.6553, Val Acc: 0.6418
Epoch 4/10, Loss: 0.4598, Acc: 0.7878, Val Loss: 0.6579, Val Acc: 0.6318
Epoch 5/10, Loss: 0.4137, Acc: 0.8102, Val Loss: 0.7789, Val Acc: 0.6567
Epoch 6/10, Loss: 0.4719, Acc: 0.7940, Val Loss: 0.5974, Val Acc: 0.7114
Epoch 7/10, Loss: 0.3054, Acc: 0.8714, Val Loss: 1.2037, Val Acc: 0.5522
Epoch 8/10, Loss: 0.5292, Acc: 0.7678, Val Loss: 1.2173, Val Acc: 0.5373
Epoch 9/10, Loss: 0.5383, Acc: 0.7615, Val Loss: 0.8751, Val Acc: 0.6020
Epoch 10/10, Loss: 0.4018, Acc: 0.8227, Val Loss: 0.6093, Val Acc: 0.7264


## Question 3: Median of training accuracy for all epochs


In [14]:
# Question 3: Median of training accuracy
median_train_acc = np.median(history['acc'])
print(f"Median training accuracy: {median_train_acc:.4f}")
print(f"\nTraining accuracies: {[f'{acc:.4f}' for acc in history['acc']]}")
print(f"\nAnswer: {median_train_acc:.2f}")


Median training accuracy: 0.7778

Training accuracies: ['0.6230', '0.7228', '0.6979', '0.7878', '0.8102', '0.7940', '0.8714', '0.7678', '0.7615', '0.8227']

Answer: 0.78


## Question 4: Standard deviation of training loss for all epochs


In [15]:
# Question 4: Standard deviation of training loss
std_train_loss = np.std(history['loss'])
print(f"Standard deviation of training loss: {std_train_loss:.4f}")
print(f"\nTraining losses: {[f'{loss:.4f}' for loss in history['loss']]}")
print(f"\nAnswer: {std_train_loss:.3f}")


Standard deviation of training loss: 0.0950

Training losses: ['0.6679', '0.5356', '0.5513', '0.4598', '0.4137', '0.4719', '0.3054', '0.5292', '0.5383', '0.4018']

Answer: 0.095


## Data Augmentation

Add the augmentations to your training data generator

In [16]:
# Data augmentation for training (augmentations BEFORE ToTensor and Normalize)
transform_train_aug = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    )
])

# Test transforms should NOT have augmentations
transform_test_aug = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Reload training dataset with augmentation from straight/curly hair data
train_dataset_aug = HairDataset(
    data_dir='./data/train',
    transform=transform_train_aug
)

# Test dataset should use the same transforms as before (no augmentation)
test_dataset_aug = HairDataset(
    data_dir='./data/test',
    transform=transform_test_aug
)

# Create new data loaders with augmented training data
train_loader_aug = DataLoader(train_dataset_aug, batch_size=20, shuffle=True)
test_loader_aug = DataLoader(test_dataset_aug, batch_size=20, shuffle=False)

In [None]:
# Train for 10 more epochs with augmentation
# Note: We're continuing to train the same model, not creating a new one
num_epochs_aug = 10
history_aug = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs_aug):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in tqdm(train_loader_aug, desc=f"Epoch {epoch+1}/{num_epochs_aug} [Train+Aug]"):
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        predicted = (outputs > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset_aug)
    epoch_acc = correct_train / total_train
    history_aug['loss'].append(epoch_loss)
    history_aug['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader_aug, desc=f"Epoch {epoch+1}/{num_epochs_aug} [Val]"):
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (outputs > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(test_dataset_aug)
    val_epoch_acc = correct_val / total_val
    history_aug['val_loss'].append(val_epoch_loss)
    history_aug['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs_aug} (with augmentation), "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")


Epoch 1/10 (with augmentation), Loss: 0.5987, Acc: 0.6854, Val Loss: 0.5535, Val Acc: 0.7313
Epoch 2/10 (with augmentation), Loss: 0.5414, Acc: 0.7129, Val Loss: 0.6085, Val Acc: 0.6766
Epoch 3/10 (with augmentation), Loss: 0.5053, Acc: 0.7541, Val Loss: 0.5838, Val Acc: 0.7214
Epoch 4/10 (with augmentation), Loss: 0.5000, Acc: 0.7703, Val Loss: 0.5043, Val Acc: 0.7463
Epoch 5/10 (with augmentation), Loss: 0.4951, Acc: 0.7690, Val Loss: 0.8445, Val Acc: 0.6020
Epoch 6/10 (with augmentation), Loss: 0.5657, Acc: 0.7029, Val Loss: 0.5101, Val Acc: 0.7662
Epoch 7/10 (with augmentation), Loss: 0.5681, Acc: 0.7041, Val Loss: 0.7918, Val Acc: 0.6020
Epoch 8/10 (with augmentation), Loss: 0.4878, Acc: 0.7603, Val Loss: 0.9479, Val Acc: 0.5970
Epoch 9/10 (with augmentation), Loss: 0.4967, Acc: 0.7566, Val Loss: 0.5562, Val Acc: 0.7214
Epoch 10/10 (with augmentation), Loss: 0.4617, Acc: 0.7715, Val Loss: 0.5664, Val Acc: 0.7264


## Question 5: Mean of test loss for all epochs (with augmentation)


In [18]:
# Question 5: Mean of test loss for all epochs with augmentation
mean_test_loss_aug = np.mean(history_aug['val_loss'])
print(f"Mean test loss (with augmentation): {mean_test_loss_aug:.4f}")
print(f"\nTest losses: {[f'{loss:.4f}' for loss in history_aug['val_loss']]}")
print(f"\nAnswer: {mean_test_loss_aug:.3f}")


Mean test loss (with augmentation): 0.6467

Test losses: ['0.5535', '0.6085', '0.5838', '0.5043', '0.8445', '0.5101', '0.7918', '0.9479', '0.5562', '0.5664']

Answer: 0.647


## Question 6: Average of test accuracy for the last 5 epochs (epochs 6-10) with augmentation


In [19]:
# Question 6: Average of test accuracy for last 5 epochs (epochs 6-10, indices 5-9)
last_5_test_acc = history_aug['val_acc'][5:10]  # Epochs 6-10 (0-indexed: 5-9)
avg_last_5_test_acc = np.mean(last_5_test_acc)

print(f"Test accuracies for epochs 6-10: {[f'{acc:.4f}' for acc in last_5_test_acc]}")
print(f"Average test accuracy (last 5 epochs): {avg_last_5_test_acc:.4f}")
print(f"\nAnswer: {avg_last_5_test_acc:.2f}")


Test accuracies for epochs 6-10: ['0.7662', '0.6020', '0.5970', '0.7214', '0.7264']
Average test accuracy (last 5 epochs): 0.6826

Answer: 0.68
