In [18]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import json
from tqdm.auto import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Set seeds for reproducibility
SEED = 58
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


NUM_EPOCHS = 100
BATCH_SIZE = 128
LEARNING_RATE = 0.001

WEIGHT_DECAY = 1e-4


DATA_DIR =  'Simpsons/archive/characters_train'


In [19]:
class SimpsonsDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        try:
            img = Image.open(self.image_paths[idx]).convert('RGB')
        except Exception as e:
            # Fallback for corrupted images (rare but possible)
            print(f"Error loading {self.image_paths[idx]}: {e}")
            return torch.zeros(3, 128, 128), self.labels[idx]

        if self.transform:
            img = self.transform(img)
        return img, self.labels[idx]


In [20]:
def load_dataset(data_dir):
    image_paths = []
    labels = []
    class_names = sorted([d for d in os.listdir(data_dir) if not d.startswith('.')])
    class_to_idx = {name: idx for idx, name in enumerate(class_names)}

    for class_name in class_names:
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
        for img_name in os.listdir(class_dir):
            # Check valid extensions
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(class_dir, img_name))
                labels.append(class_to_idx[class_name])

    return image_paths, labels, class_names, class_to_idx

In [21]:
class SimpsonsCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpsonsCNN, self).__init__()

        self.block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.block4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.classifier(x)
        return x


In [22]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, class_names, class_to_idx):
    best_val_f1 = 0.0
    patience_counter = 0
    patience_limit = 7

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_preds = []
        train_targets = []

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_preds.extend(predicted.cpu().numpy())
            train_targets.extend(labels.cpu().numpy())

        avg_train_loss = train_loss / len(train_loader.dataset)
        train_f1 = f1_score(train_targets, train_preds, average='macro', zero_division=0)

        model.eval()
        val_loss = 0.0
        val_preds = []
        val_targets = []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)

                _, predicted = torch.max(outputs.data, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_targets.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader.dataset)
        val_f1 = f1_score(val_targets, val_preds, average='macro', zero_division=0)

        print(f'Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Train F1: {train_f1:.4f} | Val Loss: {avg_val_loss:.4f} | Val F1: {val_f1:.4f}')

        scheduler.step(val_f1)

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            patience_counter = 0
            torch.save({
                'model_state_dict': model.state_dict(),
                'class_names': class_names,
                'class_to_idx': class_to_idx,
                'best_f1': best_val_f1
            }, 'best_model.pth')
            print(f'  --> Model saved! Best Val F1: {best_val_f1:.4f}')
        else:
            patience_counter += 1
            if patience_counter >= patience_limit:
                print(f'Early stopping triggered after {epoch+1} epochs')
                break

    print(f'Training complete. Best Val Macro F1: {best_val_f1:.4f}')

In [23]:

BATCH_SIZE = 32
NUM_EPOCHS = 30

LEARNING_RATE = 0.001

In [24]:
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomRotation(15),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Load data
image_paths, labels, class_names, class_to_idx = load_dataset(DATA_DIR)
print(f'Total images: {len(image_paths)}')
print(f'Number of classes: {len(class_names)}')
print(f'Classes: {class_names}')

# Train/val split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=SEED, stratify=labels
)

Total images: 16764
Number of classes: 43
Classes: ['.DS_Store', 'abraham_grampa_simpson', 'agnes_skinner', 'apu_nahasapeemapetilon', 'barney_gumble', 'bart_simpson', 'carl_carlson', 'charles_montgomery_burns', 'chief_wiggum', 'cletus_spuckler', 'comic_book_guy', 'disco_stu', 'edna_krabappel', 'fat_tony', 'gil', 'groundskeeper_willie', 'homer_simpson', 'kent_brockman', 'krusty_the_clown', 'lenny_leonard', 'lionel_hutz', 'lisa_simpson', 'maggie_simpson', 'marge_simpson', 'martin_prince', 'mayor_quimby', 'milhouse_van_houten', 'miss_hoover', 'moe_szyslak', 'ned_flanders', 'nelson_muntz', 'otto_mann', 'patty_bouvier', 'principal_skinner', 'professor_john_frink', 'rainier_wolfcastle', 'ralph_wiggum', 'selma_bouvier', 'sideshow_bob', 'sideshow_mel', 'snake_jailbird', 'troy_mcclure', 'waylon_smithers']


In [27]:
train_dataset = SimpsonsDataset(train_paths, train_labels, train_transform)
val_dataset = SimpsonsDataset(val_paths, val_labels, val_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,  num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False,  num_workers=2)

In [28]:
model = SimpsonsCNN(len(class_names)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)

In [None]:
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, NUM_EPOCHS, class_names, class_to_idx)

# Save final model
torch.save({
    'model_state_dict': model.state_dict(),
    'class_names': class_names,
    'class_to_idx': class_to_idx
}, 'model_old.pth')

print('Training complete. Model saved as model.pth')