In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
import pandas as pd
import os
from torch.utils.data import ConcatDataset #For Combining Dataset

path = '../cs-424-ass-1-wednesday-class'


In [2]:
# load the training-set
# batch_size = 64
batch_size = 128
# batch_size = 256

transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#Data Augmentation

transform_augmentation_one = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # Increased flip probability
    transforms.RandomRotation(15),  # Increased rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.3, saturation=0.3, hue=0.1),  # Enhanced color jitter
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), shear=10),  # Added shear transformation
    transforms.GaussianBlur(kernel_size=(5, 5), sigma=(0.1, 2.0)),  # Slight blurring
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_augmentation_two = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.RandomRotation(5),
    transforms.ColorJitter(brightness=0.05, contrast=0.1, hue=0.05),
    transforms.RandomSolarize(threshold=192, p=0.3),  # Solarization effect
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Crop and resize
    transforms.RandomGrayscale(p=0.2),  # Convert to grayscale sometimes
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_augmentation_three = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.4),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.15, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),  # Stronger crop variation
    transforms.RandomInvert(p=0.2),  # Randomly invert colors
    transforms.RandomPosterize(bits=4, p=0.3),  # Reduce color depth for effect
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


train_dataset = datasets.ImageFolder(root=f"{path}/train", transform=transform_train)
train_dataset_one = datasets.ImageFolder(root=f"{path}/train", transform=transform_augmentation_one)
train_dataset_two = datasets.ImageFolder(root=f"{path}/train", transform=transform_augmentation_two)
train_dataset_three = datasets.ImageFolder(root=f"{path}/train", transform=transform_augmentation_three)

combined_dataset = ConcatDataset([train_dataset, train_dataset_one, train_dataset_two, train_dataset_three])

train_loader = DataLoader(dataset=combined_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

In [3]:
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {dev}")

Using device: cuda


In [4]:
# Define ResNet-18
class CustomResNet18(nn.Module):
    def __init__(self, num_classes):
        super(CustomResNet18, self).__init__()
        self.model = models.resnet18(weights=None)  # Initialize from scratch

        # Modify the fully connected layers
        in_features = self.model.fc.in_features  # Get the original ResNet FC input size

        self.model.fc = nn.Sequential(
            nn.Linear(in_features, 512),  # New intermediate layer
            nn.BatchNorm1d(512),          # Batch normalization for stable training
            nn.ReLU(),                    # Activation function
            nn.Dropout(0.3),              # Dropout for regularization
            nn.Linear(512, num_classes)   # Final classification layer
        )

    def forward(self, x):
        return self.model(x)


num_classes = 10
model = CustomResNet18(num_classes=num_classes).to(dev)

# Define the loss function and optimizer
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate) #Using AdamW

scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6) #Cosine Decay

In [5]:
def train_model():
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(dev), labels.to(dev)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            scheduler.step()


        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")
        print(f"Epoch {epoch+1}, Learning Rate: {scheduler.get_last_lr()}")

    # save your model
    torch.save(model.state_dict(), 'resnet18_local.pth')

In [6]:
if __name__ == "__main__":
    num_epochs = 40
    train_model()

Epoch [1/40], Loss: 1.6901
Epoch 1, Learning Rate: [9.639601130971474e-05]
Epoch [2/40], Loss: 1.3602
Epoch 2, Learning Rate: [0.0006548539886902981]
Epoch [3/40], Loss: 1.2209
Epoch 3, Learning Rate: [0.0006548539886902789]
Epoch [4/40], Loss: 1.0900
Epoch 4, Learning Rate: [9.6396011309712e-05]
Epoch [5/40], Loss: 0.9763
Epoch 5, Learning Rate: [0.0009999999999999545]
Epoch [6/40], Loss: 0.8887
Epoch 6, Learning Rate: [9.639601130972905e-05]
Epoch [7/40], Loss: 0.8152
Epoch 7, Learning Rate: [0.0006548539886903578]
Epoch [8/40], Loss: 0.7205
Epoch 8, Learning Rate: [0.0006548539886903764]
Epoch [9/40], Loss: 0.6752
Epoch 9, Learning Rate: [9.6396011309722e-05]


KeyboardInterrupt: 

In [None]:
# Model size (should be less than 26)

total_params = sum(p.numel() for p in model.parameters())
total_params/(1024*1024)

In [None]:
# load the test-set
transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


test_dataset = datasets.ImageFolder(root=f'{path}/test', transform=transform_test)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# test the model
def test_model():
    model.eval()
    predictions = []
    image_paths = [path for path, _ in test_dataset.imgs]
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(dev)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.cpu().numpy())

    # prediction.csv
    df = pd.DataFrame({
        'id': image_paths,
        'label': predictions
    })
    df.to_csv('brendan.tan.2022.csv', index=False)
    print("Results saved to brendan.tan.2022.csv")

In [None]:
if __name__ == "__main__":
    test_model()