# homework - week 8 - deep learning

### setup

In [1]:
import torch
import torchvision.models as models
import torch.optim as optim
import numpy as np
import os
import glob

from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from PIL import Image
from torchsummary import summary


In [None]:
!wget https://github.com/SVizor42/ML_Zoomcamp/releases/download/straight-curly-data/data.zip
!unzip data.zip

In [2]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


### data processing

In [4]:
class HairDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [5]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) # ImageNet normalization
])

In [6]:
train_dataset = HairDataset(
    data_dir='./data/train',
    transform=train_transforms
)

validation_dataset = HairDataset(
    data_dir='./data/test',
    transform=train_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

### model setup

In [7]:
class HairClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(HairClassifier, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3, 3))
        self.relu1 = nn.ReLU()
        self.pool = nn.MaxPool2d((2, 2))
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(in_features=313632, out_features=64)
        self.relu2 = nn.ReLU()
        self.fc2 = nn.Linear(in_features=64, out_features=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu2(x)
        x = self.fc2(x)

        return x

### Q1

In [8]:
model = HairClassifier().to(device)

# Define loss function
criterion = nn.BCEWithLogitsLoss()

# Define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

print(model)

HairClassifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=313632, out_features=64, bias=True)
  (relu2): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)


### Q2

In [9]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

Total parameters: 20073473


In [10]:
model = model.to(device)
summary(model, input_size=(3, 200, 200))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
           Flatten-4               [-1, 313632]               0
            Linear-5                   [-1, 64]      20,072,512
              ReLU-6                   [-1, 64]               0
            Linear-7                    [-1, 1]              65
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 23.93
Params size (MB): 76.57
Estimated Total Size (MB): 100.96
----------------------------------------------------------------


### model training

In [11]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6204, Acc: 0.6542, Val Loss: 0.7231, Val Acc: 0.6070
Epoch 2/10, Loss: 0.5821, Acc: 0.6667, Val Loss: 0.6047, Val Acc: 0.6517
Epoch 3/10, Loss: 0.5090, Acc: 0.7428, Val Loss: 0.6072, Val Acc: 0.6368
Epoch 4/10, Loss: 0.4558, Acc: 0.7715, Val Loss: 0.9366, Val Acc: 0.6418
Epoch 5/10, Loss: 0.5393, Acc: 0.7441, Val Loss: 0.5883, Val Acc: 0.6716
Epoch 6/10, Loss: 0.4120, Acc: 0.8015, Val Loss: 0.8257, Val Acc: 0.6269
Epoch 7/10, Loss: 0.4234, Acc: 0.7978, Val Loss: 0.6895, Val Acc: 0.6517
Epoch 8/10, Loss: 0.2663, Acc: 0.8839, Val Loss: 0.6653, Val Acc: 0.7015
Epoch 9/10, Loss: 0.2252, Acc: 0.9151, Val Loss: 0.6730, Val Acc: 0.7114
Epoch 10/10, Loss: 0.2699, Acc: 0.8851, Val Loss: 0.7895, Val Acc: 0.6866


### Q3

In [12]:
median_acc = np.median(history['acc'])
print(f"Median training accuracy: {median_acc}")

Median training accuracy: 0.7846441947565543


### Q4

In [13]:
std_loss = np.std(history['loss'])
print(f"Std of training loss: {std_loss}")

Std of training loss: 0.13152912585390664


### Q5 & Q6

In [14]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

validation_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) # ImageNet normalization
])

In [15]:
train_dataset = HairDataset(
    data_dir='./data/train',
    transform=train_transforms
)

validation_dataset = HairDataset(
    data_dir='./data/test',
    transform=validation_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

In [16]:
num_epochs = 10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6677, Acc: 0.6542, Val Loss: 0.6660, Val Acc: 0.6318
Epoch 2/10, Loss: 0.5901, Acc: 0.6729, Val Loss: 0.7362, Val Acc: 0.6567
Epoch 3/10, Loss: 0.5572, Acc: 0.7091, Val Loss: 0.6019, Val Acc: 0.6816
Epoch 4/10, Loss: 0.5275, Acc: 0.7278, Val Loss: 0.5984, Val Acc: 0.7015
Epoch 5/10, Loss: 0.5437, Acc: 0.7216, Val Loss: 0.5475, Val Acc: 0.7512
Epoch 6/10, Loss: 0.5534, Acc: 0.7191, Val Loss: 0.5556, Val Acc: 0.6915
Epoch 7/10, Loss: 0.5401, Acc: 0.7278, Val Loss: 0.5691, Val Acc: 0.7313
Epoch 8/10, Loss: 0.5022, Acc: 0.7428, Val Loss: 0.6425, Val Acc: 0.6816
Epoch 9/10, Loss: 0.5027, Acc: 0.7690, Val Loss: 0.7372, Val Acc: 0.6617
Epoch 10/10, Loss: 0.5137, Acc: 0.7566, Val Loss: 0.7677, Val Acc: 0.6766


In [17]:
mean_val_loss = np.mean(history['val_loss'][-10:])  # Last 10 epochs
print(f"Mean validation loss: {mean_val_loss}")

Mean validation loss: 0.642218875243741


In [18]:
avg_val_acc_last5 = np.mean(history['val_acc'][-5:])  # Last 5 epochs
print(f"Average validation accuracy (last 5): {avg_val_acc_last5}")

Average validation accuracy (last 5): 0.6885572139303483
