In [1]:
import numpy as np
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision.models as models
from torchvision.io import read_image
from torchvision import transforms
from torchvision import datasets
from torchsummary import summary


from PIL import Image

%matplotlib inline

In [2]:
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt -O imagenet_classes.txt

--2025-11-23 19:03:50--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8001::154, 2606:50c0:8002::154, 2606:50c0:8003::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8001::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt’


2025-11-23 19:03:50 (4.72 MB/s) - ‘imagenet_classes.txt’ saved [10472/10472]



In [3]:
!tree -d ./data

[1;36m./data[0m
├── [1;36mtest[0m
│   ├── [1;36mcurly[0m
│   └── [1;36mstraight[0m
└── [1;36mtrain[0m
    ├── [1;36mcurly[0m
    └── [1;36mstraight[0m

7 directories


Reproducibility

Use Set Randomm Seed Generator

In [4]:
#Setup Library
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
def top5_classes_from(indices):
    with open("imagenet_classes.txt", "r") as f:
        categories = [s.strip() for s in f.readlines()]
    
    # Get top 5 predictions
    top5_indices = indices[0, :5].tolist()
    top5_classes = [categories[i] for i in top5_indices]
    
    print("Top 5 predictions:")
    for i, class_name in enumerate(top5_classes):
        print(f"{i+1}: {class_name}")

In [7]:
#PIL Load Images Example (Not on Colab)
preprocess_example = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

curly_img = "data/train/curly/images78.jpg"
straight_img = "data/train/straight/s8.jpg"

#Use predefined model and predict without any further training.
# Load pre-trained model
model = models.mobilenet_v2(weights='IMAGENET1K_V1')
model.eval()

for img_path in [curly_img, straight_img]:
    img = Image.open(img_path)
    # Resize to target size
    img = preprocess_example(img)

    batch_t = torch.unsqueeze(img, 0)
    
    # Make prediction
    with torch.no_grad():
        output = model(batch_t)

    _, sorted_indices = torch.sort(output, descending=True)
    top5_classes_from(sorted_indices)


    
    

Top 5 predictions:
1: wig
2: abaya
3: chain mail
4: wool
5: stole
Top 5 predictions:
1: wig
2: neck brace
3: suit
4: brassiere
5: lab coat


# Prepare Training and Validation Datasets

- Resize
- Permute
- Batch Size
- Shuffle or Not
- Normalize
- Augment (optional regularization control to be applied only to training datasets

# Homework - Model 
You need to develop the model with following structure:

The shape for input should be (3, 200, 200) (channels first format in PyTorch)

Next, create a convolutional layer (nn.Conv2d):
Use 32 filters (output channels)
Kernel size should be (3, 3) (that's the size of the filter)
Use 'relu' as activation
Reduce the size of the feature map with max pooling (nn.MaxPool2d)
Set the pooling size to (2, 2)
Turn the multi-dimensional result into vectors using flatten or view
Next, add a nn.Linear layer with 64 neurons and 'relu' activation
Finally, create the nn.Linear layer with 1 neuron - this will be the output
The output layer should have an activation - use the appropriate activation for the binary classification case
As optimizer use torch.optim.SGD with the following parameters:

torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)

In [8]:
import torch
import torch.nn as nn


class BinaryCNN(nn.Module):
    def __init__(self, input_shape=(3, 200, 200)):
        super(BinaryCNN, self).__init__()

        C, H, W = input_shape

        #Layers
        self.conv1 = nn.Conv2d(
            in_channels=C,
            out_channels=32,
            kernel_size=3
        )
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # Activation modules
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

        #Some ChatGPT magic to calculate this
        dummy = torch.zeros(1, C, H, W)
        dummy_out = self.pool(self.relu(self.conv1(dummy)))
        self.flattened_size = dummy_out.numel()
        print("flattened_size:", self.flattened_size)

        # Flatten and Dense
        self.fc1 = nn.Linear(self.flattened_size, 64)
        self.fc2 = nn.Linear(64, 1)  # output neuron

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)

        x = x.view(x.size(0), -1)  # flatten

        x = self.fc1(x)
        x = self.relu(x)

        x = self.fc2(x)
        x = self.sigmoid(x)  # binary output probability

        return x


In [10]:
# ---------- Example Usage ----------

model = BinaryCNN(input_shape=(3, 200, 200))
model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)


from torchsummary import summary
summary(model, input_size=(3, 200, 200))

# Option 2: Manual counting
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")


flattened_size: 313632
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
            Linear-4                   [-1, 64]      20,072,512
              ReLU-5                   [-1, 64]               0
            Linear-6                    [-1, 1]              65
           Sigmoid-7                    [-1, 1]               0
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 21.54
Params size (MB): 76.57
Estimated Total Size (MB): 98.57
----------------------------------------------------------------
Total parameters: 20073473


In [11]:
import os
from torch.utils.data import Dataset
from PIL import Image

class BinaryDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [12]:
#Hard-code everything ...
def make_model():
    model = BinaryCNN(input_shape=(3, 200, 200))
    model.to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.8)
    return model, optimizer, criterion

In [13]:
input_size = 200

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    ) 
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    ) 
])


In [14]:
train_dataset = BinaryDataset(
    data_dir='./data/train',
    transform=train_transforms
)

validation_dataset = BinaryDataset(
    data_dir='./data/test',
    transform=val_transforms
)

#test
test_image, test_label = train_dataset.__getitem__(0)
print(test_image.shape)


train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

torch.Size([3, 200, 200])


In [15]:
# Option 1: Using torchsummary (install with: pip install torchsummary)
model, optimizer, criterion = make_model()
summary(model, input_size=(3, 200, 200))

# Option 2: Manual counting
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

flattened_size: 313632
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 198, 198]             896
              ReLU-2         [-1, 32, 198, 198]               0
         MaxPool2d-3           [-1, 32, 99, 99]               0
            Linear-4                   [-1, 64]      20,072,512
              ReLU-5                   [-1, 64]               0
            Linear-6                    [-1, 1]              65
           Sigmoid-7                    [-1, 1]               0
Total params: 20,073,473
Trainable params: 20,073,473
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.46
Forward/backward pass size (MB): 21.54
Params size (MB): 76.57
Estimated Total Size (MB): 98.57
----------------------------------------------------------------
Total parameters: 20073473


In [16]:
num_epochs=10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6963, Acc: 0.4869, Val Loss: 0.6926, Val Acc: 0.4876
Epoch 2/10, Loss: 0.6905, Acc: 0.4869, Val Loss: 0.6848, Val Acc: 0.4876
Epoch 3/10, Loss: 0.6923, Acc: 0.4869, Val Loss: 0.6920, Val Acc: 0.4876
Epoch 4/10, Loss: 0.6858, Acc: 0.4869, Val Loss: 0.6765, Val Acc: 0.4876
Epoch 5/10, Loss: 0.6559, Acc: 0.4869, Val Loss: 0.6611, Val Acc: 0.4876
Epoch 6/10, Loss: 0.6928, Acc: 0.5993, Val Loss: 0.6919, Val Acc: 0.5323
Epoch 7/10, Loss: 0.6808, Acc: 0.5693, Val Loss: 0.6849, Val Acc: 0.4925
Epoch 8/10, Loss: 0.6514, Acc: 0.5019, Val Loss: 0.6628, Val Acc: 0.4925
Epoch 9/10, Loss: 0.6434, Acc: 0.4981, Val Loss: 0.6607, Val Acc: 0.4925
Epoch 10/10, Loss: 0.6370, Acc: 0.4969, Val Loss: 0.6656, Val Acc: 0.4876


In [17]:
np.median(history['val_acc'])

np.float64(0.48756218905472637)

In [18]:
np.median(history['acc'])

np.float64(0.4918851435705368)

In [19]:
np.std(history['loss'])

np.float64(0.021832754604145015)

In [20]:
np.std(history['val_loss'])

np.float64(0.012907196471526222)

In [22]:

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.RandomRotation(50),
    transforms.RandomResizedCrop(200, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    ) 
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=mean,
        std=std
    ) 
])


train_dataset = BinaryDataset(
    data_dir='./data/train',
    transform=train_transforms
)

validation_dataset = BinaryDataset(
    data_dir='./data/test',
    transform=val_transforms
)

#test
test_image, test_label = train_dataset.__getitem__(0)
print(test_image.shape)


train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=20, shuffle=False)

torch.Size([3, 200, 200])


In [23]:
num_epochs=10
history = {'acc': [], 'loss': [], 'val_acc': [], 'val_loss': []}
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float().unsqueeze(1) # Ensure labels are float and have shape (batch_size, 1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        # For binary classification with BCEWithLogitsLoss, apply sigmoid to outputs before thresholding for accuracy
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = correct_train / total_train
    history['loss'].append(epoch_loss)
    history['acc'].append(epoch_acc)

    model.eval()
    val_running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            labels = labels.float().unsqueeze(1)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_epoch_loss = val_running_loss / len(validation_dataset)
    val_epoch_acc = correct_val / total_val
    history['val_loss'].append(val_epoch_loss)
    history['val_acc'].append(val_epoch_acc)

    print(f"Epoch {epoch+1}/{num_epochs}, "
          f"Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}, "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.4f}")

Epoch 1/10, Loss: 0.6593, Acc: 0.4881, Val Loss: 0.6506, Val Acc: 0.5075
Epoch 2/10, Loss: 0.6503, Acc: 0.4906, Val Loss: 0.6612, Val Acc: 0.4925
Epoch 3/10, Loss: 0.6571, Acc: 0.4906, Val Loss: 0.6481, Val Acc: 0.5075
Epoch 4/10, Loss: 0.6519, Acc: 0.4956, Val Loss: 0.6474, Val Acc: 0.5124
Epoch 5/10, Loss: 0.6467, Acc: 0.4969, Val Loss: 0.6535, Val Acc: 0.5025
Epoch 6/10, Loss: 0.6624, Acc: 0.4931, Val Loss: 0.6477, Val Acc: 0.5075
Epoch 7/10, Loss: 0.6575, Acc: 0.4881, Val Loss: 0.6643, Val Acc: 0.5373
Epoch 8/10, Loss: 0.6557, Acc: 0.4956, Val Loss: 0.6460, Val Acc: 0.5174
Epoch 9/10, Loss: 0.6448, Acc: 0.4931, Val Loss: 0.6456, Val Acc: 0.5174
Epoch 10/10, Loss: 0.6470, Acc: 0.4969, Val Loss: 0.6483, Val Acc: 0.5025


In [25]:
np.mean(history['val_loss'])

np.float64(0.6512589687879999)

In [26]:
history['val_acc'][5:]

[0.5074626865671642,
 0.5373134328358209,
 0.5174129353233831,
 0.5174129353233831,
 0.5024875621890548]

In [27]:
np.mean(history['val_acc'][5:])

np.float64(0.5164179104477611)