In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import os

In [2]:
def get_data_transforms():
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(512),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.46079967214138484, 0.46104556926356555, 0.4559120253283609], std=[0.23117445026151823, 0.22748220382304327, 0.2637965208115187])
        ]),
        'val': transforms.Compose([
            transforms.Resize(512),
            transforms.CenterCrop(512),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.46079967214138484, 0.46104556926356555, 0.4559120253283609], std=[0.23117445026151823, 0.22748220382304327, 0.2637965208115187])
        ]),
    }
    return data_transforms

In [3]:
def load_data(data_dir, batch_size=32, use_fixed_training_set=False, training_set_size=5):
    data_transforms = get_data_transforms()
    image_dataset = datasets.ImageFolder(data_dir, data_transforms['train'])

    if use_fixed_training_set:
        # Use a fixed-size training set (e.g., 50 data points)
        train_size = training_set_size
        val_size = len(image_dataset) - train_size
        train_dataset, val_dataset = random_split(image_dataset, [train_size, val_size])
    else:
        # Split the data into training and validation sets
        train_size = int(0.8 * len(image_dataset))
        val_size = len(image_dataset) - train_size
        train_dataset, val_dataset = random_split(image_dataset, [train_size, val_size])

    dataloaders = {
        'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4),
        'val': DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    }

    dataset_sizes = {
        'train': len(train_dataset),
        'val': len(val_dataset)
    }

    class_names = image_dataset.classes

    return dataloaders, dataset_sizes, class_names


In [4]:
def create_model(num_classes):
    # Load a pre-trained ResNet18 model
    model = models.resnet18(pretrained=True)

    # Get the input dimensions of the last layer (fully connected layer)
    num_ftrs = model.fc.in_features

    # Replace the last layer with a new one that has `num_classes` outputs
    model.fc = nn.Linear(num_ftrs, num_classes)

    return model

In [5]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10):
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model


In [None]:

if __name__ == "__main__":
    data_dir = "campusVision"
    
    # Use a fixed training set size of 50 data points
    dataloaders, dataset_sizes, class_names = load_data(data_dir, use_fixed_training_set=True, training_set_size=5)

    # Or, split the data into 80% training and 20% validation
    # dataloaders, dataset_sizes, class_names = load_data(data_dir, use_fixed_training_set=False)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = create_model(num_classes=len(class_names))
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Train the model
    model = train_model(model, dataloaders, criterion, optimizer, num_epochs=10)

    # Evaluate the model on the validation set
    print("Validation Set Performance:")
    evaluate_model(model, dataloaders['val'], criterion, class_names)

    # Save the best model
    torch.save(model.state_dict(), "/home/lalit/Projects/campusVisionChallenge/resnet-pretrained.pth")



Epoch 0/9
----------
train Loss: 2.3770 Acc: 0.0000
