In [None]:
# Run the following cell if using Google Colab

from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/cs340/project/models

!pip install git+https://github.com/openai/CLIP.git

In [2]:
import clip_feature_extractor
import numpy as np

from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import random
import time

device = torch.device("cuda")
print(device)

import cupy as cp
import cudf

%load_ext autoreload
%autoreload 2

cuda


# CIFAR100

In [3]:
X_train_CIFAR100_np, y_train_CIFAR100_np, X_test_CIFAR100_np, y_test_CIFAR100_np = clip_feature_extractor.get_CIFAR100_features();

# CLIP embeddings are already scaled; L2 normalized
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

X_train = torch.tensor(X_train_CIFAR100_np, dtype=torch.float32)
y_train = torch.tensor(y_train_CIFAR100_np, dtype=torch.long)
X_test  = torch.tensor(X_test_CIFAR100_np, dtype=torch.float32)
y_test  = torch.tensor(y_test_CIFAR100_np, dtype=torch.long)

full_train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified
Extracting features from CIFAR100 dataset
Loaded previously extracted features from disk.


In [6]:
class fcnet_CIFAR100(nn.Module):
    def __init__(self, input_size, num_classes, dropout_p1=0.2, dropout_p2=0.2):
        super(fcnet_CIFAR100, self).__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=dropout_p1)  

        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(p=dropout_p2) 

        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        return x

In [7]:
def train_and_evaluate(input_size, 
                       num_classes, 
                       train_loader, 
                       val_loader, 
                       test_loader,
                       device='cuda', 
                       dropout_p1=0.2, 
                       dropout_p2=0.2, 
                       lr=0.001, 
                       weight_decay=0.001,
                       num_epochs=50,
                       patience=5,
                       verbose=False):
    """
    Trains the fcnet_CIFAR100 model and evaluates its performance on validation loss.

    Parameters:
    - input_size (int): Dimensionality of input features.
    - num_classes (int): Number of target classes.
    - train_loader, val_loader, test_loader: DataLoaders for training, validation, and testing.
    - device (str): 'cuda' or 'cpu'.
    - dropout_p1 (float): Dropout probability after first layer.
    - dropout_p2 (float): Dropout probability after second layer.
    - lr (float): Learning rate for the optimizer.
    - weight_decay (float): Weight decay (L2 regularization) factor.
    - num_epochs (int): Maximum number of training epochs.
    - patience (int): Number of epochs to wait for improvement before stopping.
    - verbose (bool): If True, prints progress at each epoch.

    Returns:
    - dict: Contains the trained model and performance metrics.
    """
    model = fcnet_CIFAR100(input_size=input_size, num_classes=num_classes,
                           dropout_p1=dropout_p1, dropout_p2=dropout_p2)
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    best_val_loss = float('inf')
    best_model_state = None
    epochs_no_improve = 0

    # training
    time_start = time.time()
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct / total

        # validation 
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for val_batch_X, val_batch_y in val_loader:
                val_batch_X, val_batch_y = val_batch_X.to(device), val_batch_y.to(device)

                val_outputs = model(val_batch_X)
                v_loss = criterion(val_outputs, val_batch_y)

                val_loss += v_loss.item()

                _, val_predicted = torch.max(val_outputs.data, 1)
                val_total += val_batch_y.size(0)
                val_correct += (val_predicted == val_batch_y).sum().item()

        val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # early stopping
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break
        
        time_end = time.time()

        if verbose:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy*100:.2f}%, "
                f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy*100:.2f}%")
            if (epoch == 0):
                print(f"Estimated time per epoch: {time_end - time_start:.2f} seconds")

    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    # test
    model.eval()
    test_loss = 0.0
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for test_batch_X, test_batch_y in test_loader:
            test_batch_X, test_batch_y = test_batch_X.to(device), test_batch_y.to(device)

            test_outputs = model(test_batch_X)
            t_loss = criterion(test_outputs, test_batch_y)

            test_loss += t_loss.item()

            _, test_predicted = torch.max(test_outputs.data, 1)
            test_total += test_batch_y.size(0)
            test_correct += (test_predicted == test_batch_y).sum().item()

    test_loss = test_loss / len(test_loader)
    test_accuracy = test_correct / test_total

    return {
        'model': model,
        'train_accuracy': train_accuracy,
        'val_loss': best_val_loss,
        'test_accuracy': test_accuracy
    }

In [29]:
dropout_p1_min, dropout_p1_max = 0.2, 0.7   
dropout_p2_min, dropout_p2_max = 0.2, 0.7  
lr_min, lr_max = 0.00001, 0.001 # previous best result around 0.0001            
weight_decay_min, weight_decay_max = 0.0001, 0.01 # previous best result around 0.001

n_trials = 200

best_params = None
best_val_loss = float('inf')
best_test_acc = 0.0
best_model = None

input_size = X_train.shape[1]
num_classes = len(set(y_train))

for i in range(n_trials):
    chosen_params = {
        'dropout_p1': random.uniform(dropout_p1_min, dropout_p1_max),
        'dropout_p2': random.uniform(dropout_p2_min, dropout_p2_max),
        'lr': random.uniform(lr_min, lr_max),
        'weight_decay': random.uniform(weight_decay_min, weight_decay_max)
    }

    print(f"=== Trial {i+1}/{n_trials}: {chosen_params} ===")

    results = train_and_evaluate(
        input_size=input_size,
        num_classes=num_classes,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        dropout_p1=chosen_params['dropout_p1'],
        dropout_p2=chosen_params['dropout_p2'],
        lr=chosen_params['lr'],
        weight_decay=chosen_params['weight_decay'],
        num_epochs=100,
        patience=5,
        verbose=True
    )

    if results['val_loss'] < best_val_loss:
        best_val_loss = results['val_loss']
        best_test_acc = results['test_accuracy']
        best_params = chosen_params
        best_model = results['model']
        print(f"New best model found with Val Loss: {best_val_loss:.4f} and Test Accuracy: {best_test_acc*100:.2f}%\n")
    else:
        print(f"No improvement. Current Best Val Loss: {best_val_loss:.4f}, Test Accuracy: {best_test_acc*100:.2f}%\n")

print("=== Random Hyperparameter Search Completed ===")
print("Best Parameters Found:")
print(best_params)
print(f"Best Validation Loss: {best_val_loss:.4f}")
print(f"Test Accuracy with Best Params: {best_test_acc*100:.2f}%")


=== Trial 1/200: {'dropout_p1': 0.6371756295334261, 'dropout_p2': 0.29679287544778044, 'lr': 0.00015880587239474185, 'weight_decay': 0.005045140242202204} ===
Epoch 1/100, Train Loss: 4.0556, Train Acc: 47.60%, Val Loss: 1.3570, Val Acc: 72.36%
Estimate time per epoch: 8.34 seconds
Epoch 2/100, Train Loss: 1.2443, Train Acc: 69.92%, Val Loss: 0.9660, Val Acc: 75.89%
Epoch 3/100, Train Loss: 1.0581, Train Acc: 73.14%, Val Loss: 0.8862, Val Acc: 76.45%
Epoch 4/100, Train Loss: 0.9963, Train Acc: 74.48%, Val Loss: 0.8575, Val Acc: 77.48%
Epoch 5/100, Train Loss: 0.9598, Train Acc: 75.55%, Val Loss: 0.8398, Val Acc: 77.45%
Epoch 6/100, Train Loss: 0.9343, Train Acc: 76.00%, Val Loss: 0.8297, Val Acc: 77.25%
Epoch 7/100, Train Loss: 0.9275, Train Acc: 76.13%, Val Loss: 0.8192, Val Acc: 77.72%
Epoch 8/100, Train Loss: 0.9104, Train Acc: 76.42%, Val Loss: 0.8148, Val Acc: 78.19%
Epoch 9/100, Train Loss: 0.9026, Train Acc: 76.87%, Val Loss: 0.8170, Val Acc: 77.67%
Epoch 10/100, Train Loss: 0.8

# CIFAR10

In [8]:
X_train_CIFAR10_np, y_train_CIFAR10_np, X_test_CIFAR10_np, y_test_CIFAR10_np = clip_feature_extractor.get_CIFAR10_features();

# CLIP embeddings are already scaled; L2 normalized
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

X_train = torch.tensor(X_train_CIFAR10_np, dtype=torch.float32)
y_train = torch.tensor(y_train_CIFAR10_np, dtype=torch.long)
X_test  = torch.tensor(X_test_CIFAR10_np, dtype=torch.float32)
y_test  = torch.tensor(y_test_CIFAR10_np, dtype=torch.long)

full_train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_size = int(0.8 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified
Loaded features from disk.


In [9]:
# TODO turn this into a function I guess...
dropout_p1_min, dropout_p1_max = 0.2, 0.7   
dropout_p2_min, dropout_p2_max = 0.2, 0.7  
lr_min, lr_max = 0.00001, 0.001         
weight_decay_min, weight_decay_max = 0.0001, 0.01 

n_trials = 300

best_params = None
best_val_loss = float('inf')
best_test_acc = 0.0
best_model = None

input_size = X_train.shape[1]
num_classes = len(set(y_train))

for i in range(n_trials):
    chosen_params = {
        'dropout_p1': random.uniform(dropout_p1_min, dropout_p1_max),
        'dropout_p2': random.uniform(dropout_p2_min, dropout_p2_max),
        'lr': random.uniform(lr_min, lr_max),
        'weight_decay': random.uniform(weight_decay_min, weight_decay_max)
    }

    print(f"=== Trial {i+1}/{n_trials}: {chosen_params} ===")

    results = train_and_evaluate(
        input_size=input_size,
        num_classes=num_classes,
        train_loader=train_loader,
        val_loader=val_loader,
        test_loader=test_loader,
        dropout_p1=chosen_params['dropout_p1'],
        dropout_p2=chosen_params['dropout_p2'],
        lr=chosen_params['lr'],
        weight_decay=chosen_params['weight_decay'],
        num_epochs=100,
        patience=5,
        verbose=True
    )

    if results['val_loss'] < best_val_loss:
        best_val_loss = results['val_loss']
        best_test_acc = results['test_accuracy']
        best_params = chosen_params
        best_model = results['model']
        print(f"New best model found with Val Loss: {best_val_loss:.4f} and Test Accuracy: {best_test_acc*100:.2f}%\n")
    else:
        print(f"No improvement. Current Best Val Loss: {best_val_loss:.4f}, Test Accuracy: {best_test_acc*100:.2f}%\n")

print("=== Random Hyperparameter Search Completed ===")
print("Best Parameters Found:")
print(best_params)
print(f"Best Validation Loss: {best_val_loss:.4f}")
print(f"Test Accuracy with Best Params: {best_test_acc*100:.2f}%")

=== Trial 1/300: {'dropout_p1': 0.5999151139590021, 'dropout_p2': 0.326502761048833, 'lr': 0.00027951786049384747, 'weight_decay': 0.0014277176444735485} ===
Epoch 1/100, Train Loss: 0.7357, Train Acc: 91.31%, Val Loss: 0.1700, Val Acc: 94.83%
Estimated time per epoch: 9.95 seconds
Epoch 2/100, Train Loss: 0.2072, Train Acc: 93.55%, Val Loss: 0.1650, Val Acc: 94.68%
Epoch 3/100, Train Loss: 0.1938, Train Acc: 93.98%, Val Loss: 0.1603, Val Acc: 94.82%
Epoch 4/100, Train Loss: 0.1838, Train Acc: 94.34%, Val Loss: 0.1534, Val Acc: 95.01%
Epoch 5/100, Train Loss: 0.1796, Train Acc: 94.38%, Val Loss: 0.1606, Val Acc: 94.84%
Epoch 6/100, Train Loss: 0.1753, Train Acc: 94.41%, Val Loss: 0.1556, Val Acc: 95.12%
Epoch 7/100, Train Loss: 0.1775, Train Acc: 94.34%, Val Loss: 0.1522, Val Acc: 95.04%
Epoch 8/100, Train Loss: 0.1731, Train Acc: 94.45%, Val Loss: 0.1621, Val Acc: 94.93%
Epoch 9/100, Train Loss: 0.1699, Train Acc: 94.55%, Val Loss: 0.1584, Val Acc: 94.84%
Epoch 10/100, Train Loss: 0.1