# Homework 8

In [32]:
import os
import shutil
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

## Data Preparation

In [2]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip data.zip

--2025-12-03 15:57:56--  https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/405934815/e712cf72-f851-44e0-9c05-e711624af985?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-12-03T16%3A48%3A26Z&rscd=attachment%3B+filename%3Ddata.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-12-03T15%3A48%3A08Z&ske=2025-12-03T16%3A48%3A26Z&sks=b&skv=2018-11-09&sig=8%2BXH3UmRZZwEid9cM7F2WWu4SQyKpXhcz9kzXQtxW1M%3D&jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc2NDc3OTI3NiwibmJmIjoxNzY0Nzc3NDc2LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5i

In [5]:
data_dir = "data"
train_dir = os.path.join(data_dir, "train")
test_dir  = os.path.join(data_dir, "test")

## Reproducibility

In [36]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Model

For this homework we will use Convolutional Neural Network (CNN). We'll use PyTorch.

You need to develop the model with following structure:

- The shape for input should be (3, 200, 200) (channels first format in PyTorch)
- Next, create a convolutional layer (nn.Conv2d):
- - Use 32 filters (output channels)
- - Kernel size should be (3, 3) (that's the size of the filter)
- - Use 'relu' as activation
- Reduce the size of the feature map with max pooling (nn.MaxPool2d)
- - Set the pooling size to (2, 2)
- Turn the multi-dimensional result into vectors using flatten or view
- Next, add a nn.Linear layer with 64 neurons and 'relu' activation
- Finally, create the nn.Linear layer with 1 neuron - this will be the output
- - The output layer should have an activation - use the appropriate activation for the binary classification case
As optimizer use torch.optim.SGD with the following parameters:

torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.applications.xception import decode_predictions

In [8]:
train_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_ds = train_gen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32
)

Found 800 images belonging to 2 classes.


In [9]:
train_ds.class_indices

{'curly': 0, 'straight': 1}

In [33]:
# Resize to 200x200, convert to tensor
train_transform = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
])

In [40]:
# Datasets
train_dataset = datasets.ImageFolder(root=train_dir,
                                     transform=train_transform)
validation_dataset  = datasets.ImageFolder(root=test_dir,
                                           transform=test_transform)


# DataLoaders
batch_size = 32

train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
validation_loader = DataLoader(validation_dataset,
                               batch_size=batch_size,
                               shuffle=False)

class_names = train_dataset.classes
print("Classes:", class_names)

Classes: ['curly', 'straight']


In [23]:
model = nn.Sequential(
    nn.Conv2d(
        in_channels=3,
        out_channels=32,
        kernel_size=3
    ),
    nn.ReLU(),

    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Flatten(),

    nn.Linear(32 * 99 * 99, 64),
    nn.ReLU(),

    nn.Linear(64, 1),
)

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.002,
    momentum=0.8
)

In [25]:
# Print model summary
from torchsummary import summary
summary(model, input_size=(3, 200, 200))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
           Flatten-4               [-1, 313632]               0
            Linear-5                   [-1, 64]      20,072,512
              ReLU-6                   [-1, 64]               0
            Linear-7                    [-1, 1]              65
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 23.93
Params size (MB): 76.57
Estimated Total Size (MB): 100.96
----------------------------------------------------------------


## Question 1

Which loss function you will use?

- nn.MSELoss()
- nn.BCEWithLogitsLoss()
- nn.CrossEntropyLoss()
- nn.CosineEmbeddingLoss()

In [38]:
criterion = nn.BCEWithLogitsLoss()

**Answer**: *BCEWithLogitsLoss*

## Question 2

What's the total number of parameters of the model?
- 896
- 11214912
- 15896912
- 20073473

**Answer**: *20073473*

## Question 3

In [13]:
from torchvision import datasets, transforms
from torchsummary import summary

In [14]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) # ImageNet normalization
])

In [41]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6291, Acc: 0.6500, Val Loss: 0.6826, Val Acc: 0.6119
Epoch 2/10, Loss: 0.5803, Acc: 0.6850, Val Loss: 0.6248, Val Acc: 0.6368
Epoch 3/10, Loss: 0.5910, Acc: 0.6837, Val Loss: 0.6346, Val Acc: 0.6169
Epoch 4/10, Loss: 0.5648, Acc: 0.7000, Val Loss: 0.7656, Val Acc: 0.5672
Epoch 5/10, Loss: 0.5642, Acc: 0.7175, Val Loss: 0.6340, Val Acc: 0.6318
Epoch 6/10, Loss: 0.5449, Acc: 0.7037, Val Loss: 0.6240, Val Acc: 0.6517
Epoch 7/10, Loss: 0.5203, Acc: 0.7425, Val Loss: 0.6460, Val Acc: 0.6318
Epoch 8/10, Loss: 0.5144, Acc: 0.7450, Val Loss: 0.6300, Val Acc: 0.6269
Epoch 9/10, Loss: 0.5135, Acc: 0.7375, Val Loss: 0.6367, Val Acc: 0.6318
Epoch 10/10, Loss: 0.5208, Acc: 0.7225, Val Loss: 0.6309, Val Acc: 0.6468


In [42]:
history['acc']

[0.65, 0.685, 0.68375, 0.7, 0.7175, 0.70375, 0.7425, 0.745, 0.7375, 0.7225]

In [43]:
median_acc = np.median(history['acc'])
print(median_acc)

0.7106250000000001


What is the median of training accuracy for all the epochs for this model?

- 0.05
- 0.12
- 0.40
- 0.84

**Answer**: 0.84

## Question 4

In [44]:
history['loss']

[0.6290782475471497,
 0.5803448975086212,
 0.590967845916748,
 0.5647561120986938,
 0.5641942739486694,
 0.5449065577983856,
 0.5202879226207733,
 0.5143930315971375,
 0.5135198199748993,
 0.520829393863678]

In [45]:
std_loss = np.std(history['loss'])
print(round(std_loss, 3))

0.037


In [46]:

arr = np.array(history["loss"])
std = np.std(arr, ddof=1).round(3)
print(std)

0.039


What is the standard deviation of training loss for all the epochs for this model?

- 0.007
- 0.078
- 0.171
- 1.710


**Answer**: *0.078*

## Data Augmentation

In [47]:
transforms.RandomRotation(50),
transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
transforms.RandomHorizontalFlip(),

(RandomHorizontalFlip(p=0.5),)

In [48]:
num_epochs = 20
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/20, Loss: 0.4681, Acc: 0.7762, Val Loss: 0.6316, Val Acc: 0.6368
Epoch 2/20, Loss: 0.5223, Acc: 0.7325, Val Loss: 0.6711, Val Acc: 0.6318
Epoch 3/20, Loss: 0.4648, Acc: 0.7750, Val Loss: 0.7278, Val Acc: 0.6169
Epoch 4/20, Loss: 0.4693, Acc: 0.7625, Val Loss: 0.7350, Val Acc: 0.6070
Epoch 5/20, Loss: 0.4596, Acc: 0.7837, Val Loss: 0.6203, Val Acc: 0.6468
Epoch 6/20, Loss: 0.4439, Acc: 0.7925, Val Loss: 0.6289, Val Acc: 0.6617
Epoch 7/20, Loss: 0.3975, Acc: 0.8113, Val Loss: 0.6940, Val Acc: 0.6418
Epoch 8/20, Loss: 0.4450, Acc: 0.7825, Val Loss: 0.6727, Val Acc: 0.6418
Epoch 9/20, Loss: 0.3718, Acc: 0.8462, Val Loss: 0.6130, Val Acc: 0.6716
Epoch 10/20, Loss: 0.3411, Acc: 0.8712, Val Loss: 0.6251, Val Acc: 0.6766
Epoch 11/20, Loss: 0.3585, Acc: 0.8500, Val Loss: 0.6478, Val Acc: 0.6667
Epoch 12/20, Loss: 0.3176, Acc: 0.8750, Val Loss: 0.6355, Val Acc: 0.6816
Epoch 13/20, Loss: 0.3337, Acc: 0.8488, Val Loss: 0.6547, Val Acc: 0.6866
Epoch 14/20, Loss: 0.2900, Acc: 0.8838, Val Los

In [49]:
mean_test_loss = np.mean(history['val_loss'])
print(mean_test_loss)

round(mean_test_loss, 2)

0.6894686951343694


np.float64(0.69)

## Question 5

What is the mean of test loss for all the epochs for the model trained with augmentations?

- 0.008
- 0.08
- 0.88
- 8.88

**Answer**: 0.88

## Question 6

In [50]:
last_5_acc = history['val_acc'][5:10]
avg_acc = np.mean(last_5_acc)

print(last_5_acc)
print(avg_acc)

[0.6616915422885572, 0.6417910447761194, 0.6417910447761194, 0.6716417910447762, 0.6766169154228856]
0.6587064676616915


What's the average of test accuracy for the last 5 epochs (from 6 to 10) for the model trained with augmentations?

- 0.08
- 0.28
- 0.68
- 0.98

**Answer**: 0.68