# Week 8 Homework - Deep Learning

## Data download

In [1]:
# download the data
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip

--2025-12-03 00:51:00--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
Resolving github.com (github.com)... 20.207.73.82
Connecting to github.com (github.com)|20.207.73.82|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/405934815/e712cf72-f851-44e0-9c05-e711624af985?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-12-02T20%3A16%3A16Z&rscd=attachment%3B+filename%3Ddata.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-12-02T19%3A15%3A54Z&ske=2025-12-02T20%3A16%3A16Z&sks=b&skv=2018-11-09&sig=zhWpKupN4am%2BDPQqkGXIAQ3M7BM15QianGuzQ7ZevtU%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2NDcwNTA2MCwibmJmIjoxNzY0NzAzMjYwLCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlv

In [2]:
# unzip the data
!unzip data.zip

Archive:  data.zip
   creating: data
   creating: data/test
   creating: data/test/curly
  inflating: data/test/curly/03312ac556a7d003f7570657f80392c34.jpg  
  inflating: data/test/curly/106dfcf4abe76990b585b2fc2e3c9f884.jpg  
  inflating: data/test/curly/1a9dbe23a0d95f1c292625960e4509184.jpg  
  inflating: data/test/curly/341ea26e6677b655f8447af56073204a4.jpg  
  inflating: data/test/curly/61aPFVrm42L._SL1352_.jpg  
  inflating: data/test/curly/6d8acb0fe980774ea4e5631198587f45.png  
  inflating: data/test/curly/7f5649a0c33a2b334f23221a52c16b9b.jpg  
  inflating: data/test/curly/90146673.jpg  
  inflating: data/test/curly/9b3608e01d78fbabc9fb0719323d507f4.jpg  
  inflating: data/test/curly/b171c99161f3cffc12d4b74488ef2fc6.jpg  
  inflating: data/test/curly/blogger_one.jpg  
  inflating: data/test/curly/c03ca1590aa4df74e922ad8257305a2b.jpg  
  inflating: data/test/curly/c1b89bb4f86a3478ec20ce1f63f003c1.jpg  
  inflating: data/test/curly/c5.jpg  
  inflating: data/test/

In [21]:
# Import necessary libraries
import numpy as np
import torch
import torch.nn as nn
from torchsummary import summary
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

%matplotlib inline

In [8]:
# Set the random seed for reproducibility to be 42
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Set up the CNN Model
You need to develop the model with following structure:

* The shape for input should be `(3, 200, 200)` (channels first format in PyTorch)
* Next, create a convolutional layer (`nn.Conv2d`):
    * Use 32 filters (output channels)
    * Kernel size should be `(3, 3)` (that's the size of the filter), padding = 0, stride = 1
    * Use `'relu'` as activation
* Reduce the size of the feature map with max pooling (`nn.MaxPool2d`)
    * Set the pooling size to `(2, 2)`
* Turn the multi-dimensional result into vectors using `flatten` or `view`
* Next, add a `nn.Linear` layer with 64 neurons and `'relu'` activation
* Finally, create the `nn.Linear` layer with 1 neuron - this will be the output
    * The output layer should have an activation - use the appropriate activation for the binary classification case

As optimizer use `torch.optim.SGD` with the following parameters:

* `torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)`


In [11]:
 # Define the CNN Model
class HairTypeCNN(nn.Module):
  def __init__(self):
      super(HairTypeCNN, self).__init__()

      # Convolutional layer: 32 filters, kernel (3,3), padding=0, stride=1
      self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3,
padding=0, stride=1)
      self.relu1 = nn.ReLU()

      # Max pooling layer: pooling size (2,2)
      self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

      # After conv: (3, 200, 200) -> (32, 198, 198)
      # After pool: (32, 198, 198) -> (32, 99, 99)
      # Flattened size: 32 * 99 * 99 = 313,632

      # Fully connected layers
      self.fc1 = nn.Linear(32 * 99 * 99, 64)
      self.relu2 = nn.ReLU()

      # Output layer: 1 neuron for binary classification
      self.fc2 = nn.Linear(64, 1)
      self.sigmoid = nn.Sigmoid()

  def forward(self, x):
      # Convolutional layer with ReLU
      x = self.conv1(x)
      x = self.relu1(x)

      # Max pooling
      x = self.pool(x)

      # Flatten
      x = x.view(x.size(0), -1)

      # First fully connected layer with ReLU
      x = self.fc1(x)
      x = self.relu2(x)

      # Output layer (returns logits, not probabilities)
      x = self.fc2(x)

      return x


In [12]:
# Create the model
model = HairTypeCNN()

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

print(model)

HairTypeCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=313632, out_features=64, bias=True)
  (relu2): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [13]:
 # Define the optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

## Question 1
Which loss function you will use?
- nn.MSELoss()
- nn.BCEWithLogitsLoss()
- nn.CrossEntropyLoss()
- nn.CosineEmbeddingLoss()
(Multiple answered can be correct, so pick any)

### Answer : Option B
> `nn.BCEWithLogitsLoss()`

- This is a binary classification problem classifying hair types into 2 categories
- BCE is designed for binary classification tasks
- WithLogits expects raw logits as input and applies sigmoid internally. Also the training code shared uses raw logits

## Question 2
- What's the total number of parameters of the model? You can use torchsummary or count manually.

In [15]:
# Count total parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

# Or using torchsummary
summary(model, input_size=(3, 200, 200))

Total parameters: 20,073,473
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
            Linear-4                   [-1, 64]      20,072,512
              ReLU-5                   [-1, 64]               0
            Linear-6                    [-1, 1]              65
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 21.54
Params size (MB): 76.57
Estimated Total Size (MB): 98.57
----------------------------------------------------------------


### Answer : Option D
> Total parameters: 20,073,473

## Generators and Training

In [22]:
# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
input_size = 200

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

test_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [23]:
# Load the datasets
train_dataset = datasets.ImageFolder(
  root='data/train',
  transform=train_transforms
)

validation_dataset = datasets.ImageFolder(
  root='data/test',
  transform=test_transforms
)


In [24]:
# declare batch size as 20
batch_size = 20

# Create data loaders
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
validation_loader = DataLoader(validation_dataset,batch_size=batch_size,shuffle=False)

In [25]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

In [27]:
# Define the loss function
criterion = nn.BCEWithLogitsLoss()

In [28]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6370, Acc: 0.6250, Val Loss: 0.6774, Val Acc: 0.6269
Epoch 2/10, Loss: 0.5508, Acc: 0.7288, Val Loss: 0.7913, Val Acc: 0.5721
Epoch 3/10, Loss: 0.5044, Acc: 0.7538, Val Loss: 0.6268, Val Acc: 0.6468
Epoch 4/10, Loss: 0.4535, Acc: 0.7788, Val Loss: 0.6964, Val Acc: 0.6667
Epoch 5/10, Loss: 0.4061, Acc: 0.8250, Val Loss: 0.6546, Val Acc: 0.6567
Epoch 6/10, Loss: 0.3270, Acc: 0.8550, Val Loss: 0.7766, Val Acc: 0.6766
Epoch 7/10, Loss: 0.2732, Acc: 0.8862, Val Loss: 0.6556, Val Acc: 0.7313
Epoch 8/10, Loss: 0.2504, Acc: 0.8938, Val Loss: 0.7788, Val Acc: 0.6915
Epoch 9/10, Loss: 0.1882, Acc: 0.9150, Val Loss: 0.7553, Val Acc: 0.7065
Epoch 10/10, Loss: 0.1658, Acc: 0.9287, Val Loss: 0.8221, Val Acc: 0.7164


## Question 3
- What is the median of training accuracy for all the epochs for this model?

In [32]:
train_accuracies = history['acc']
median_train_acc = np.median(train_accuracies)

print(f"Training accuracies for all epochs: {train_accuracies}")
print(f"Median training accuracy: {median_train_acc:.4f}")

Training accuracies for all epochs: [0.625, 0.72875, 0.75375, 0.77875, 0.825, 0.855, 0.88625, 0.89375, 0.915, 0.92875]
Median training accuracy: 0.8400


### Answer : Option D
> Median training accuracy: 0.84

## Question 4
- What is the standard deviation of training loss for all the epochs for this model?

In [33]:
train_losses = history['loss']
std_train_loss = np.std(train_losses)

print(f"Training losses for all epochs: {train_losses}")
print(f"Standard deviation of training loss: {std_train_loss:.4f}")

Training losses for all epochs: [0.6370332680642605, 0.5507597908377647, 0.5044267348945141, 0.4534881852567196, 0.40610496401786805, 0.32695389464497565, 0.27321023158729074, 0.25039489604532716, 0.18823296772316098, 0.16575322356075048]
Standard deviation of training loss: 0.1518


### Answer : Option C
> Standard deviation of training loss: 0.171

## Question 5
Let's train our model for 10 more epochs using the same code as previously.
> Note: make sure you don't re-create the model. we want to continue training the model we already started training.

- What is the mean of test loss for all the epochs for the model trained with augmentations?

In [55]:
# Define training transforms WITH augmentations
train_transforms_aug = transforms.Compose([
  transforms.RandomRotation(10),
  transforms.RandomResizedCrop(200, scale=(0.9, 1.0)),
  transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
  transforms.Normalize(mean=mean,std=std)
])

# Create new training dataset with augmentations
train_dataset_aug = datasets.ImageFolder(
  root='data/train',
  transform=train_transforms_aug
)

# Create new training data loader with augmentations
train_loader_aug = DataLoader(
  train_dataset_aug,
  batch_size=20,
  shuffle=True
)

print(f"Training dataset with augmentations: {len(train_dataset_aug)}")

Training dataset with augmentations: 800


In [56]:
num_epochs_aug = 10
history_aug = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

In [57]:
for epoch in range(num_epochs_aug):
  model.train()
  running_loss = 0.0
  correct_train = 0
  total_train = 0
  for images, labels in train_loader_aug:  # Using augmented data loader
      images, labels = images.to(device), labels.to(device)
      labels = labels.float().unsqueeze(1)

      optimizer.zero_grad()
      outputs = model(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item() * images.size(0)
      predicted = (torch.sigmoid(outputs) > 0.5).float()
      total_train += labels.size(0)
      correct_train += (predicted == labels).sum().item()

  epoch_loss = running_loss / len(train_dataset_aug)
  epoch_acc = correct_train / total_train
  history_aug['loss'].append(epoch_loss)
  history_aug['acc'].append(epoch_acc)

  model.eval()
  val_running_loss = 0.0
  correct_val = 0
  total_val = 0
  with torch.no_grad():
      for images, labels in validation_loader:
          images, labels = images.to(device), labels.to(device)
          labels = labels.float().unsqueeze(1)

          outputs = model(images)
          loss = criterion(outputs, labels)

          val_running_loss += loss.item() * images.size(0)
          predicted = (torch.sigmoid(outputs) > 0.5).float()
          total_val += labels.size(0)
          correct_val += (predicted == labels).sum().item()

  val_epoch_loss = val_running_loss / len(validation_dataset)
  val_epoch_acc = correct_val / total_val
  history_aug['val_loss'].append(val_epoch_loss)
  history_aug['val_acc'].append(val_epoch_acc)

  print(f"Epoch {epoch+1}/{num_epochs_aug}, "
        f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
        f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.3860, Acc: 0.8350, Val Loss: 0.5634, Val Acc: 0.7015
Epoch 2/10, Loss: 0.3751, Acc: 0.8237, Val Loss: 0.5407, Val Acc: 0.7114
Epoch 3/10, Loss: 0.3987, Acc: 0.8125, Val Loss: 0.5936, Val Acc: 0.7114
Epoch 4/10, Loss: 0.3445, Acc: 0.8375, Val Loss: 0.5273, Val Acc: 0.7313
Epoch 5/10, Loss: 0.3404, Acc: 0.8538, Val Loss: 0.4737, Val Acc: 0.7363
Epoch 6/10, Loss: 0.3234, Acc: 0.8650, Val Loss: 0.5328, Val Acc: 0.7413
Epoch 7/10, Loss: 0.3353, Acc: 0.8462, Val Loss: 0.5298, Val Acc: 0.7562
Epoch 8/10, Loss: 0.2957, Acc: 0.8688, Val Loss: 0.4971, Val Acc: 0.7413
Epoch 9/10, Loss: 0.2943, Acc: 0.8625, Val Loss: 0.5023, Val Acc: 0.7512
Epoch 10/10, Loss: 0.2800, Acc: 0.8862, Val Loss: 0.4815, Val Acc: 0.7761


In [62]:
all_val_losses = history['val_loss'] + history_aug['val_loss']
mean_all_val_loss = np.mean(all_val_losses)

print("\n--- Question 5 (All 20 epochs) ---")
print(f"Validation losses from initial 10 epochs: {history['val_loss']}")
print(f"Validation losses from augmented 10 epochs: {history_aug['val_loss']}")
print(f"All validation losses (20 epochs): {all_val_losses}")
print(f"Mean of test/validation loss (all 20 epochs): {mean_all_val_loss:.4f}")




--- Question 5 (All 20 epochs) ---
Validation losses from initial 10 epochs: [0.6774433258902374, 0.7912714639261588, 0.6268203786356532, 0.6964370359235735, 0.6546140087777702, 0.7765639792034282, 0.6556346054693952, 0.7788022706164649, 0.7553474221063491, 0.8220905878057527]
Validation losses from augmented 10 epochs: [0.5633561821719307, 0.5406737262336769, 0.5935937659953957, 0.5272957721159826, 0.47369566544964536, 0.5327601429834887, 0.5297813961161902, 0.49710180895838574, 0.5023328564060268, 0.48154821087471883]
All validation losses (20 epochs): [0.6774433258902374, 0.7912714639261588, 0.6268203786356532, 0.6964370359235735, 0.6546140087777702, 0.7765639792034282, 0.6556346054693952, 0.7788022706164649, 0.7553474221063491, 0.8220905878057527, 0.5633561821719307, 0.5406737262336769, 0.5935937659953957, 0.5272957721159826, 0.47369566544964536, 0.5327601429834887, 0.5297813961161902, 0.49710180895838574, 0.5023328564060268, 0.48154821087471883]
Mean of test/validation loss (all 

### Answer : Option C
> mean of test loss for all the epochs for the model trained with augmentations : 0.88

## Question 6
- What's the average of test accuracy for the last 5 epochs (from 6 to 10) for the model trained with augmentations?

In [63]:
last_5_val_acc = history_aug['val_acc'][5:10]
avg_last_5_val_acc = np.mean(last_5_val_acc)
print(f"Average test accuracy for last 5 epochs: {avg_last_5_val_acc:.4f}")

Average test accuracy for last 5 epochs: 0.7532


### Answer : Option C
> Average test accuracy for last 5 epochs: 0.68