In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
import torch
import numpy as np
import random
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt
import gc
import pandas as pd

In [28]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True  # cuda
    torch.backends.cudnn.benchmark = False  # ensure repeated result for restart session

In [29]:
class EarlyStopping:

    def __init__(self, patience=10, verbose=False, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.delta = delta
        self.path = path  # Save path for checkpoint

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model)
        elif val_loss >= self.best_loss - self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            if self.verbose:
                print(f'Validation loss decreased ({self.best_loss:.4f} --> {val_loss:.4f}). Saving model ...')
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decreases."""
        torch.save(model.state_dict(), self.path)

In [30]:
seed = 42
set_seed(seed)

In [31]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #as pre-train used
])

In [32]:
data_dir = '/content/drive/MyDrive/image_class_task_dataset'
train_dataset = datasets.ImageFolder(root=f'{data_dir}/train', transform=transform)
valid_dataset = datasets.ImageFolder(root=f'{data_dir}/valid', transform=transform)
test_dataset = datasets.ImageFolder(root=f'{data_dir}/test', transform=transform)

In [33]:
def train_with_early_stopping(model, criterion, optimizer, train_loader, valid_loader, scheduler=None,
                       num_epochs=50, grad_clip=None, device='cpu'):

    history = {'train_loss': [], 'valid_loss': [], 'train_acc': [], 'valid_acc': []}

    patience = 10
    save_path = 'best_model.pt'
    early_stopping = EarlyStopping(patience=patience, path=save_path)

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct_train, total_train = 0.0, 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()

            if grad_clip:
                torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct_train += (preds == labels).sum().item()
            total_train += labels.size(0)

        # average loss and acc per whole dataset
        train_loss = running_loss / total_train
        train_acc = 100 * correct_train / total_train
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)

        # validation step
        model.eval()
        valid_loss, correct_valid, total_valid = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                valid_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                correct_valid += (preds == labels).sum().item()
                total_valid += labels.size(0)

        # as above
        valid_loss /= total_valid
        valid_acc = 100 * correct_valid / total_valid
        history['valid_loss'].append(valid_loss)
        history['valid_acc'].append(valid_acc)

        # change lr if have
        if scheduler:
            if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(valid_loss)  # Pass validation loss for ReduceLROnPlateau
            else:
                scheduler.step()  # Step for other scheduler types

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {valid_loss:.4f}")

        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    # Load the best model weights
    model.load_state_dict(torch.load(save_path))
    return model

# ResNet

In [52]:
batch_size = 32
learning_rate = 1e-3
model_type = 'resnet'
optimizer_type = 'SGD'
weight_decay = 5e-4
schedulers = None
grad_clip=None

In [53]:
set_seed(seed=seed)

In [54]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
num_classes = len(train_loader.dataset.classes)

In [55]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [56]:
experiment_results_resnet = []

In [57]:
# Release RAM
torch.cuda.empty_cache()
gc.collect()

# Set seed for reproducibility
set_seed(42)

# Initialize model
model = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)  # Adjust for the number of classes all for training
model = model.to(device) # all layer in training

# Define loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)
scheduler = None

# Print current configuration
print(f"\nTraining {model_type} with Batch Size={batch_size}, LR={learning_rate}, "
      f"Optimizer={optimizer_type}, Weight Decay={weight_decay}, Scheduler={scheduler}\n")

# Train and validate
history = train_with_early_stopping(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_loader,
    valid_loader=valid_loader,
    scheduler=scheduler,
    num_epochs=100,
    grad_clip=grad_clip,
    device=device
)

# Record results
experiment_results_resnet.append({
    'model_type': model_type,
    'batch_size': batch_size,
    'learning_rate': learning_rate,
    'optimizer_type': optimizer_type,
    'weight_decay': weight_decay,
    'scheduler': scheduler,
    'history': history
})


Training resnet with Batch Size=32, LR=0.001, Optimizer=SGD, Weight Decay=0.0005, Scheduler=None

Epoch 1/100, Train Loss: 1.4738, Val Loss: 1.1899
Epoch 2/100, Train Loss: 1.1315, Val Loss: 1.2030
Epoch 3/100, Train Loss: 1.0342, Val Loss: 1.0407
Epoch 4/100, Train Loss: 0.8574, Val Loss: 0.8150
Epoch 5/100, Train Loss: 0.7204, Val Loss: 0.7001
Epoch 6/100, Train Loss: 0.5849, Val Loss: 0.4905
Epoch 7/100, Train Loss: 0.4730, Val Loss: 0.4748
Epoch 8/100, Train Loss: 0.4330, Val Loss: 0.4128
Epoch 9/100, Train Loss: 0.3785, Val Loss: 0.4248
Epoch 10/100, Train Loss: 0.2952, Val Loss: 0.3741
Epoch 11/100, Train Loss: 0.2736, Val Loss: 0.4190
Epoch 12/100, Train Loss: 0.2370, Val Loss: 0.3556
Epoch 13/100, Train Loss: 0.2126, Val Loss: 0.3020
Epoch 14/100, Train Loss: 0.1953, Val Loss: 0.3042
Epoch 15/100, Train Loss: 0.1882, Val Loss: 0.2769
Epoch 16/100, Train Loss: 0.1547, Val Loss: 0.2607
Epoch 17/100, Train Loss: 0.1451, Val Loss: 0.2302
Epoch 18/100, Train Loss: 0.1140, Val Loss:

  model.load_state_dict(torch.load(save_path))


In [67]:
def load_and_test_model(model, test_loader, criterion, device, path='best_model.pt'):
    set_seed(42)

    # Load the best model weights
    model.load_state_dict(torch.load(path))
    model.to(device)  # Move model to the specified device
    model.eval()      # Set model to evaluation mode

    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, labels)
            test_loss += loss.item() * images.size(0)  # Sum up batch loss

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    # Calculate average loss and accuracy
    test_loss /= total
    accuracy = correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.3f}")
    return test_loss, accuracy

In [66]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_loss, test_accuracy = load_and_test_model(model, test_loader, criterion, device, path='best_model.pt')
experiment_results_resnet[-1].update({'test_loss': test_loss, 'test_accuracy': test_accuracy})

  model.load_state_dict(torch.load(path))


Test Loss: 0.4700, Test Accuracy: 0.900%


In [69]:
df_results = pd.DataFrame(experiment_results_resnet)
df_results.to_csv("experiment_results_resnet.csv", index=False)

# AlexNet

In [70]:
batch_size = 32
learning_rate = 1e-3
model_type = 'alexnet'
optimizer_type = 'Adam'
weight_decay = 0.0
schedulers = 'ReduceLROnPlateau'
grad_clip=None
droput_rate= 0.3

In [71]:
set_seed(seed=seed)

In [72]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
num_classes = len(train_loader.dataset.classes)

In [73]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [74]:
experiment_results_alexnet = []

In [75]:
torch.cuda.empty_cache()
gc.collect()

set_seed(42)

model = models.alexnet(weights='IMAGENET1K_V1')
num_ftrs = model.classifier[6].in_features

model.classifier[0] = nn.Dropout(0.3)
model.classifier[3] = nn.Dropout(0.3)

model.classifier[6] = nn.Linear(num_ftrs, num_classes)

for param in model.parameters():
    param.requires_grad = False
for param in model.classifier[6].parameters():
    param.requires_grad = True

model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)

# Print current configuration
print(f"\nTraining {model_type} with Batch Size={batch_size}, LR={learning_rate}, "
      f"Optimizer={optimizer_type}, Scheduler={schedulers}\n")

# Train and validate
history = train_with_early_stopping(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_loader,
    valid_loader=valid_loader,
    scheduler=scheduler,
    num_epochs=50,
    grad_clip=1.0,
    device=device
)

# Record results
experiment_results_alexnet.append({
    'model_type': model_type,
    'batch_size': batch_size,
    'learning_rate': learning_rate,
    'optimizer_type': optimizer_type,
    'scheduler': schedulers,
    'dropout': 0.3,
    'grad_clip': 1.0,
    'history': history
})

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 229MB/s]



Training alexnet with Batch Size=32, LR=0.001, Optimizer=Adam, Scheduler=ReduceLROnPlateau

Epoch 1/50, Train Loss: 1.1793, Val Loss: 0.8837
Epoch 2/50, Train Loss: 0.7709, Val Loss: 0.5618
Epoch 3/50, Train Loss: 0.6058, Val Loss: 0.4812
Epoch 4/50, Train Loss: 0.3726, Val Loss: 0.3877
Epoch 5/50, Train Loss: 0.4187, Val Loss: 0.4480
Epoch 6/50, Train Loss: 0.3664, Val Loss: 0.3366
Epoch 7/50, Train Loss: 0.2861, Val Loss: 0.2796
Epoch 8/50, Train Loss: 0.2422, Val Loss: 0.3293
Epoch 9/50, Train Loss: 0.2073, Val Loss: 0.2940
Epoch 10/50, Train Loss: 0.2572, Val Loss: 0.2956
Epoch 11/50, Train Loss: 0.1885, Val Loss: 0.2462
Epoch 12/50, Train Loss: 0.2132, Val Loss: 0.3002
Epoch 13/50, Train Loss: 0.1468, Val Loss: 0.2690
Epoch 14/50, Train Loss: 0.1493, Val Loss: 0.2434
Epoch 15/50, Train Loss: 0.1512, Val Loss: 0.2185
Epoch 16/50, Train Loss: 0.1041, Val Loss: 0.2555
Epoch 17/50, Train Loss: 0.1196, Val Loss: 0.1917
Epoch 18/50, Train Loss: 0.1013, Val Loss: 0.2514
Epoch 19/50, Tra

  model.load_state_dict(torch.load(save_path))


In [76]:
def load_and_test_model(model, test_loader, criterion, device, path='best_model.pt'):
    set_seed(42)

    # Load the best model weights
    model.load_state_dict(torch.load(path))
    model.to(device)  # Move model to the specified device
    model.eval()      # Set model to evaluation mode

    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, labels)
            test_loss += loss.item() * images.size(0)  # Sum up batch loss

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    # Calculate average loss and accuracy
    test_loss /= total
    accuracy = correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.3f}")
    return test_loss, accuracy

In [77]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_loss, test_accuracy = load_and_test_model(model, test_loader, criterion, device, path='best_model.pt')
experiment_results_alexnet[-1].update({'test_loss': test_loss, 'test_accuracy': test_accuracy})

  model.load_state_dict(torch.load(path))


Test Loss: 0.1820, Test Accuracy: 0.900


# MobileNet

In [85]:
batch_size = 32
learning_rate = 1e-3
model_type = 'mobilenet'
optimizer_type = 'SGD'
weight_decay = 1e-4
schedulers = 'ReduceLROnPlateau'

In [86]:
set_seed(seed=seed)

In [87]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
num_classes = len(train_loader.dataset.classes)

In [88]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [89]:
experiment_results_mobilenet = []

In [90]:
set_seed(42)

# Release RAM
torch.cuda.empty_cache()
gc.collect()

# Initialize MobileNet model
model = models.mobilenet_v2(pretrained=True)
num_ftrs = model.classifier[1].in_features

# Modify the final layer to match the number of classes
model.classifier[1] = nn.Linear(num_ftrs, num_classes)

for param in model.parameters():
    param.requires_grad = True

# Move the model to the specified device
model = model.to(device)

# Define loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)

# Print current configuration
freeze_status = "All Layers Unfrozen"
print(f"\nTraining {model_type} with Batch Size={batch_size}, LR={learning_rate}, "
      f"Optimizer={optimizer_type}, Scheduler={schedulers}, Freeze Status: {freeze_status}\n")

# Train and validate
history = train_with_early_stopping(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=train_loader,
    valid_loader=valid_loader,
    scheduler=scheduler,
    num_epochs=50,
    grad_clip=None,
    device=device
)

# Record results
experiment_results_mobilenet.append({
    'model_type': model_type,
    'batch_size': batch_size,
    'learning_rate': learning_rate,
    'optimizer_type': optimizer_type,
    'scheduler': schedulers,
    'freeze_status': freeze_status,  # Record the freeze configuration
    'history': history
})


Training mobilenet with Batch Size=32, LR=0.001, Optimizer=SGD, Scheduler=ReduceLROnPlateau, Freeze Status: All Layers Unfrozen

Epoch 1/50, Train Loss: 1.4645, Val Loss: 1.1821
Epoch 2/50, Train Loss: 1.1574, Val Loss: 1.0750
Epoch 3/50, Train Loss: 1.0945, Val Loss: 1.0176
Epoch 4/50, Train Loss: 0.8454, Val Loss: 0.8383
Epoch 5/50, Train Loss: 0.7599, Val Loss: 0.6856
Epoch 6/50, Train Loss: 0.6025, Val Loss: 0.5574
Epoch 7/50, Train Loss: 0.4966, Val Loss: 0.4949
Epoch 8/50, Train Loss: 0.4214, Val Loss: 0.4183
Epoch 9/50, Train Loss: 0.3416, Val Loss: 0.4017
Epoch 10/50, Train Loss: 0.3166, Val Loss: 0.4123
Epoch 11/50, Train Loss: 0.3042, Val Loss: 0.3483
Epoch 12/50, Train Loss: 0.2375, Val Loss: 0.2831
Epoch 13/50, Train Loss: 0.1804, Val Loss: 0.3007
Epoch 14/50, Train Loss: 0.1679, Val Loss: 0.2428
Epoch 15/50, Train Loss: 0.1706, Val Loss: 0.2721
Epoch 16/50, Train Loss: 0.1597, Val Loss: 0.1763
Epoch 17/50, Train Loss: 0.1008, Val Loss: 0.2277
Epoch 18/50, Train Loss: 0.10

  model.load_state_dict(torch.load(save_path))


In [91]:
def load_and_test_model(model, test_loader, criterion, device, path='best_model.pt'):
    set_seed(42)

    # Load the best model weights
    model.load_state_dict(torch.load(path))
    model.to(device)  # Move model to the specified device
    model.eval()      # Set model to evaluation mode

    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, labels)
            test_loss += loss.item() * images.size(0)  # Sum up batch loss

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    # Calculate average loss and accuracy
    test_loss /= total
    accuracy = correct / total

    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.3f}")
    return test_loss, accuracy

In [92]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_loss, test_accuracy = load_and_test_model(model, test_loader, criterion, device, path='best_model.pt')
experiment_results_mobilenet[-1].update({'test_loss': test_loss, 'test_accuracy': test_accuracy})

  model.load_state_dict(torch.load(path))


Test Loss: 0.3204, Test Accuracy: 0.850


In [93]:
df_results = pd.DataFrame(experiment_results_mobilenet)
df_results.to_csv("experiment_results_mobilenet.csv", index=False)

In [78]:
# df_results = pd.DataFrame(experiment_results_alexnet)
# df_results.to_csv("experiment_results_alexnet.csv", index=False)