<a href="https://colab.research.google.com/github/soniamar2/Introduction-to-ML---Competition/blob/main/vit_WITH_spiralnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Libraries & config

In [None]:
import torch, time, os, copy, random, imageio, os, shutil, zipfile, tarfile, timm
from zipfile import ZipFile
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, ConcatDataset, Dataset
from torch import optim
from torch.optim import lr_scheduler
from torch.cuda.amp import GradScaler, autocast

import torchvision
from torchvision import datasets, models, transforms, utils
from torchvision.transforms import v2
from torchvision import datasets
from torchvision.transforms import AutoAugmentPolicy, InterpolationMode

from transformers import get_cosine_schedule_with_warmup

import json # to be removed
import requests


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set the seed
seed = 47
set_seed(seed)

## Hyper-parameters

In [None]:
#Non-editable hyper-parameters

#Num_class = 196 #FOR AIRCRAFT
im_dimention = 224
layer_width = 512
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Editable hyper-parameters

num_epochs = 20 # 160 in original
batches = 32
val_split = 0.2 #DO NOT TOUCH UNLESS DRAMA HAPPENS

## Dataset & Transformations

In [None]:
class DatasetManager:
    def __init__(self, dataset_dir, train_dir='train', test_dir='test'):
        self.dataset_dir = dataset_dir
        self.train_dir = os.path.join(dataset_dir, train_dir)
        self.test_dir = os.path.join(dataset_dir, test_dir)

    def handle_dataset(self, source):
        if os.path.isfile(source):
            self._handle_local_file(source)
        else:
            raise ValueError("Only local files are supported in this setup")

    def _handle_local_file(self, source):
        if source.endswith('.zip'):
            self._extract_zip(source)
        elif source.endswith('.tar') or source.endswith('.tar.gz'):
            self._extract_tar(source)
        else:
            raise ValueError("Unsupported file format")

    def _extract_zip(self, filepath):
        with zipfile.ZipFile(filepath, 'r') as zip_ref:
            zip_ref.extractall(self.dataset_dir)
        self._correct_directory_structure()

    def _extract_tar(self, filepath):
        with tarfile.open(filepath, 'r') as tar_ref:
            tar_ref.extractall(self.dataset_dir)
        self._correct_directory_structure()

    def _correct_directory_structure(self):
        extracted_folders = [name for name in os.listdir(self.dataset_dir) if os.path.isdir(os.path.join(self.dataset_dir, name))]
        if len(extracted_folders) == 1:
            extracted_main_dir = os.path.join(self.dataset_dir, extracted_folders[0])
            for item in os.listdir(extracted_main_dir):
                shutil.move(os.path.join(extracted_main_dir, item), self.dataset_dir)
            os.rmdir(extracted_main_dir)

    def prepare_dataloaders(self, batch_size=batches, val_split=val_split, random_state=42):
        # Define transforms
        data_transforms = {
        'train': v2.Compose([
            v2.Resize((244,244)),
            v2.RandomRotation(15,),
            v2.RandomCrop(im_dimention),
            #v2.RandomRotation(15,),
            #v2.ColorJitter(brightness=0.2,  #bad picture conditions (e.g. surveillance cameras)
            #               contrast=0.2,     # poor visibility (e.g. underwater images)
            #               saturation=0.2,
            #               hue=0.1),
            v2.RandomHorizontalFlip(),
            v2.ToTensor(),
            v2.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]),
            #v2.RandomErasing(p=0.1)
        ]),
        'valid': v2.Compose([
            v2.Resize((im_dimention,im_dimention)),
            v2.ToTensor(),
            v2.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
        ]),
        'test': v2.Compose([
            v2.Resize((im_dimention,im_dimention)),
            v2.ToTensor(),
            v2.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
        ]),
    }

        # Load train and validation datasets
        image_datasets = {'train': datasets.ImageFolder(self.train_dir, data_transforms['train'])}

        # Split the train dataset into train and validation
        train_dataset = image_datasets['train']
        train_size = int((1 - val_split) * len(train_dataset))
        val_size = len(train_dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

        image_datasets['valid'] = val_dataset

        # Load test dataset without class subfolders
        image_datasets['test'] = CustomImageDataset(self.test_dir, transform=data_transforms['test'])

        # Create dataloaders
        dataloaders = {
            'train': DataLoader(train_dataset, batch_size=batches, shuffle=True, num_workers=4),
            'valid': DataLoader(val_dataset, batch_size=batches, shuffle=False, num_workers=4),
            'test': DataLoader(image_datasets['test'], batch_size=batches, shuffle=False, num_workers=4)
        }

        dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid', 'test']}

        return dataloaders, dataset_sizes

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [os.path.join(root_dir, fname) for fname in os.listdir(root_dir) if fname.endswith(('.jpg', '.jpeg', '.png'))]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        class_id = os.path.basename(img_path).split('_')[0]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, class_id

## Dataloader

In [None]:
# dataloader
dataset_manager = DatasetManager('/home/disi/COMPETITION_DATASET')

# Specify the path to the zipped dataset
dataset_manager.handle_dataset('/home/disi/CAR.zip')

dataloaders, dataset_sizes = dataset_manager.prepare_dataloaders()

In [None]:
def count_labels_in_train(train_dir):
    # List all items in the train directory
    items = os.listdir(train_dir)
    # Filter out only directories
    label_dirs = [item for item in items if os.path.isdir(os.path.join(train_dir, item))]
    return len(label_dirs)

train_dir_path = '/home/disi/COMPETITION_DATASET/train'
Num_class = count_labels_in_train(train_dir_path)



## Model

In [None]:
# Early stopping implementation
class EarlyStopping:
    def __init__(self, patience=7, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score:
            self.counter += 1
            if self.counter >= self.patience:
                if self.verbose:
                    print('Early stopping')
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0

early_stopping = EarlyStopping(patience=7, verbose=True)

In [None]:
#SpiralNet with LeakyReLU

model_ft = timm.create_model('vit_large_patch16_224', pretrained=True, num_classes=Num_class)  # Num_class+1
num_ftrs = model_ft.head.in_features
half_in_size = round(num_ftrs / 2)

class SpinalNet(nn.Module):
    def __init__(self):
        super(SpinalNet, self).__init__()
        self.fc_spinal_layer1 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(half_in_size, layer_width),
            nn.BatchNorm1d(layer_width), nn.LeakyReLU(inplace=True))
        self.fc_spinal_layer2 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(half_in_size + layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.LeakyReLU(inplace=True))
        self.fc_spinal_layer3 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(half_in_size + layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.LeakyReLU(inplace=True))
        self.fc_spinal_layer4 = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(half_in_size + layer_width, layer_width),
            nn.BatchNorm1d(layer_width), nn.LeakyReLU(inplace=True))
        self.fc_out = nn.Sequential(
            nn.Dropout(p=0.5), nn.Linear(layer_width * 4, Num_class))  # Num_class+1

    def forward(self, x):
        x1 = self.fc_spinal_layer1(x[:, 0:half_in_size])
        x2 = self.fc_spinal_layer2(torch.cat([x[:, half_in_size:2 * half_in_size], x1], dim=1))
        x3 = self.fc_spinal_layer3(torch.cat([x[:, 0:half_in_size], x2], dim=1))
        x4 = self.fc_spinal_layer4(torch.cat([x[:, half_in_size:2 * half_in_size], x3], dim=1))

        x = torch.cat([x1, x2], dim=1)
        x = torch.cat([x, x3], dim=1)
        x = torch.cat([x, x4], dim=1)
        x = self.fc_out(x)
        return x

model_ft.head = SpinalNet()

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=num_epochs, checkpoint_path='checkpoint.pth'):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    scaler = GradScaler()
    early_stopping = EarlyStopping(patience=7, verbose=True)

    start_epoch = 0

    # Load checkpoint if exists
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        print("Checkpoint keys:", checkpoint.keys())
        if 'model_state_dict' in checkpoint:
            state_dict = checkpoint['model_state_dict']
            new_state_dict = {}
            for key, value in state_dict.items():
                if "head." in key:
                    new_key = key.replace("head.", "head.fc_spinal_layer1.")  # Adjust this to fit your actual layer naming
                    new_state_dict[new_key] = value
                else:
                    new_state_dict[key] = value
            model.load_state_dict(new_state_dict, strict=False)
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
            start_epoch = checkpoint['epoch'] + 1
            best_acc = checkpoint['best_acc']
            best_model_wts = checkpoint['best_model_wts']
            print(f"Loaded checkpoint '{checkpoint_path}' (epoch {checkpoint['epoch']})")
        else:
            print(f"Checkpoint '{checkpoint_path}' does not contain 'model_state_dict'. Starting from scratch.")

    for epoch in range(start_epoch, num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    with autocast():
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'valid':
                scheduler.step(epoch_loss)
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                early_stopping(epoch_loss, model)

            if early_stopping.early_stop:
                print("Early stopping")
                break

        if early_stopping.early_stop:
            break

        # Save checkpoint
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'best_acc': best_acc,
            'best_model_wts': best_model_wts,
        }, checkpoint_path)

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')
    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model

In [None]:
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing

    def forward(self, input, target):
        log_prob = F.log_softmax(input, dim=-1)
        weight = input.new_ones(input.size()) * (self.smoothing / (input.size(-1) - 1.))
        weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing))
        loss = (-weight * log_prob).sum(dim=-1).mean()
        return loss

criterion = LabelSmoothingCrossEntropy()

## Training

In [None]:
torch.cuda.empty_cache()

In [None]:
model_ft = model_ft.to(device)
#criterion = torch.nn.CrossEntropyLoss()

#optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) #, weight_decay=1e-4)
#scheduler_ft = lr_scheduler.ReduceLROnPlateau(optimizer_ft, mode='min', factor=0.1, patience=4)

# Configure the optimizer
optimizer_ft = torch.optim.AdamW(model_ft.parameters(), lr=0.001, weight_decay=0.01)

# Parameters for warm-up and CosineAnnealingLR
num_warmup_steps = 500  # Number of warm-up steps
num_training_steps = 15 * len(dataloaders['train'])  # Total number of training steps

# Create the combined scheduler
scheduler_ft = get_cosine_schedule_with_warmup(
    optimizer_ft,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)


model_ft = train_model(model_ft, criterion, optimizer_ft, scheduler_ft,
                       num_epochs=num_epochs,
                       checkpoint_path='checkpoint.pth'
                       )


Epoch 0/19
----------


KeyboardInterrupt: 

In [None]:
# Uncomment if running out of memory

#torch.cuda.empty_cache()

In [None]:
#model_ft = train_model(model_ft, criterion, optimizer_ft, scheduler_ft,
#                       num_epochs=num_epochs,
#                       checkpoint_path='checkpoints_comp_'
#                       )

Checkpoint keys: dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'scheduler_state_dict', 'best_acc', 'best_model_wts'])


RuntimeError: Error(s) in loading state_dict for VisionTransformer:
	Missing key(s) in state_dict: "head.fc_spinal_layer1.1.weight", "head.fc_spinal_layer1.1.bias", "head.fc_spinal_layer1.2.weight", "head.fc_spinal_layer1.2.bias", "head.fc_spinal_layer1.2.running_mean", "head.fc_spinal_layer1.2.running_var", "head.fc_spinal_layer2.1.weight", "head.fc_spinal_layer2.1.bias", "head.fc_spinal_layer2.2.weight", "head.fc_spinal_layer2.2.bias", "head.fc_spinal_layer2.2.running_mean", "head.fc_spinal_layer2.2.running_var", "head.fc_spinal_layer3.1.weight", "head.fc_spinal_layer3.1.bias", "head.fc_spinal_layer3.2.weight", "head.fc_spinal_layer3.2.bias", "head.fc_spinal_layer3.2.running_mean", "head.fc_spinal_layer3.2.running_var", "head.fc_spinal_layer4.1.weight", "head.fc_spinal_layer4.1.bias", "head.fc_spinal_layer4.2.weight", "head.fc_spinal_layer4.2.bias", "head.fc_spinal_layer4.2.running_mean", "head.fc_spinal_layer4.2.running_var", "head.fc_out.1.weight", "head.fc_out.1.bias". 
	Unexpected key(s) in state_dict: "head.weight", "head.bias". 

## Final testing

In [None]:
def submit(results, url="https://competition-production.up.railway.app/results/"):
    res = json.dumps(results)
    response = requests.post(url, res)
    try:
        result = json.loads(response.text)
        print(f"accuracy is {result['accuracy']}")
    except json.JSONDecodeError:
        print(f"ERROR: {response.text}")



def test_model_collect_predictions(dataloaders, model, class_names):
    model.eval()
    preds = {}

    for inputs, image_ids in dataloaders['test']:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        for image_id, pred in zip(image_ids, preds):
            preds[image_id] = class_names[pred.item()]

    return preds

In [None]:
# to be removed the day of the competition

def submit__(results, filename="results.json"):
    res = json.dumps(results, indent=4)  # Format JSON with indentation for readability
    try:
        with open(filename, "w") as file:
            file.write(res)
        print(f"Results saved to {filename}")
    except IOError as e:
        print(f"ERROR: Unable to write to file {filename}. Exception: {e}")

In [None]:
class_names = dataloaders['train'].dataset.dataset.classes

# Get predictions from the test set
preds = test_model_collect_predictions(dataloaders, model_ft, class_names)

# Prepare the submission dictionary
res = {
    "images": preds,
    "groupname": "Tanos Matadores"  # Replace with your actual group name
}


#submit__(res) #Test version
submit(res)

Results saved to results.json
