In [None]:
import numpy as np
import torch

SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
from torch import nn
from torch import optim
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import os

In [None]:
transform = transforms.Compose([
    transforms.Resize((200, 200)),  # resize to 200x200
    transforms.ToTensor(),          # converts to tensor AND changes HWC â†’ CHW
])

In [None]:
path = '/home/lautivuelos/machine-learning-zoomcamp-homework/08-deep-learning/data'

In [None]:
img = Image.open(path + '/train/curly/00cbad1ffe22d900018e5a2e7376daed4.jpg')

In [None]:
transform(img)

tensor([[[0.3725, 0.3804, 0.3804,  ..., 0.5725, 0.5725, 0.5804],
         [0.3804, 0.3882, 0.3843,  ..., 0.5804, 0.5765, 0.5804],
         [0.3882, 0.3922, 0.3922,  ..., 0.5882, 0.5765, 0.5765],
         ...,
         [0.4549, 0.4588, 0.4627,  ..., 0.8627, 0.8667, 0.8745],
         [0.4588, 0.4667, 0.4667,  ..., 0.8667, 0.8706, 0.8667],
         [0.4667, 0.4706, 0.4667,  ..., 0.8549, 0.8667, 0.8667]],

        [[0.3373, 0.3451, 0.3451,  ..., 0.5529, 0.5529, 0.5608],
         [0.3412, 0.3490, 0.3451,  ..., 0.5608, 0.5569, 0.5608],
         [0.3412, 0.3490, 0.3451,  ..., 0.5686, 0.5569, 0.5569],
         ...,
         [0.3216, 0.3255, 0.3294,  ..., 0.8588, 0.8627, 0.8706],
         [0.3255, 0.3333, 0.3333,  ..., 0.8627, 0.8667, 0.8627],
         [0.3333, 0.3373, 0.3333,  ..., 0.8510, 0.8627, 0.8627]],

        [[0.2784, 0.2863, 0.2863,  ..., 0.4667, 0.4667, 0.4745],
         [0.2745, 0.2824, 0.2824,  ..., 0.4745, 0.4706, 0.4745],
         [0.2627, 0.2706, 0.2784,  ..., 0.4824, 0.4706, 0.

In [None]:
np.array(transform(img)).shape

  np.array(transform(img)).shape


(3, 200, 200)

In [None]:
class ClothingDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
input_size = 200
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
])
test_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
])

train_dataset = ClothingDataset(
    data_dir=f'{path}/train',
    transform=train_transforms
)

test_dataset = ClothingDataset(
    data_dir=f'{path}/test',
    transform=test_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch
import torch.nn as nn

class HairClassifier(nn.Module):
    def __init__(self):
        super(HairClassifier, self).__init__()

        # Convolutional layer
        self.conv = nn.Conv2d(
            in_channels=3,
            out_channels=32,
            kernel_size=(3, 3),
            stride=1,
            padding=0
        )

        # Activation
        self.relu = nn.ReLU()

        # Max pooling
        self.pool = nn.MaxPool2d(kernel_size=(2, 2))

        # Fully connected layers
        self.inner = nn.Linear(32 * 99 * 99, 64)
        self.output = nn.Linear(64, 1)

    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x)

        x = torch.flatten(x, 1)

        x = self.inner(x)
        x = self.relu(x)

        x = self.output(x)

        return x


In [None]:
# Why is the input for the inner layer so?

x = torch.randn(1, 3, 200, 200)
model = HairClassifier()

with torch.no_grad():
    x = model.conv(x)
    x = model.relu(x)
    x = model.pool(x)
    x = torch.flatten(x)
    print(x.shape)

torch.Size([313632])


In [None]:
# Option 1: Using torchsummary (install with: pip install torchsummary)
from torchsummary import summary
summary(model, input_size=(3, 200, 200), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
            Linear-4                   [-1, 64]      20,072,512
              ReLU-5                   [-1, 64]               0
            Linear-6                    [-1, 1]              65
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 21.54
Params size (MB): 76.57
Estimated Total Size (MB): 98.57
----------------------------------------------------------------


In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ), # ImageNet normalization
])

test_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ) # ImageNet normalization
])

train_dataset = ClothingDataset(
    data_dir=f'{path}/train',
    transform=train_transforms
)

test_dataset = ClothingDataset(
    data_dir=f'{path}/test',
    transform=test_transforms
)

train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = HairClassifier()
model.to(device)

optimizer = torch.optim.SGD(
    model.parameters(),
    lr=0.002,
    momentum=0.8
)

criterion = nn.BCEWithLogitsLoss() # We use BCEWithLogitsLoss

In [None]:
def train_model():
    num_epochs = 10
    history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = correct_train / total_train
        history['loss'].append(epoch_loss)
        history['acc'].append(epoch_acc)

        model.eval()
        val_running_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                labels = labels.float().unsqueeze(1)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item() * images.size(0)
                predicted = (torch.sigmoid(outputs) > 0.5).float()
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_epoch_loss = val_running_loss / len(test_dataset)
        val_epoch_acc = correct_val / total_val
        history['val_loss'].append(val_epoch_loss)
        history['val_acc'].append(val_epoch_acc)

        print(f"Epoch {epoch+1}/{num_epochs}, "
            f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
            f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6742, Acc: 0.5993, Val Loss: 0.6326, Val Acc: 0.6368
Epoch 2/10, Loss: 0.6282, Acc: 0.6242, Val Loss: 0.6016, Val Acc: 0.6517
Epoch 3/10, Loss: 0.6095, Acc: 0.6429, Val Loss: 0.5924, Val Acc: 0.6617
Epoch 4/10, Loss: 0.5980, Acc: 0.6592, Val Loss: 0.6013, Val Acc: 0.6667


In [None]:
train_model()

In [None]:
import pandas as pd

df_history = pd.DataFrame(history)

In [None]:
df_history.mean()

acc         0.096118
loss        0.149470
val_acc     0.057963
val_loss    0.201978
dtype: float64

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((200, 200)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ), # ImageNet normalization
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
])

In [None]:
train_model()