# dAiv AI_Competition[2024]_Pro

## Import Libraries

In [1]:
# %pip install pygwalker wandb

In [2]:
from os import path, rename, mkdir, listdir

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from torchvision import datasets, utils, transforms, models

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pygwalker as pyg
import wandb

datasets.utils.tqdm = tqdm
%matplotlib inline

In [3]:
# WandB Initialization
wandb.init(project="dAiv-ai-competition-2024-pro")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mrnoro5122[0m ([33mrnoro5122-chungnam-national-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


### Check GPU Availability

In [4]:
!nvidia-smi

Sun Oct 27 08:00:14 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   47C    P0   201W / 250W |  15918MiB / 16280MiB |     19%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   41C    P0    31W / 250W |  15369MiB / 16280MiB |      0%      Defaul

In [5]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 7

device = torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(DEVICE_NUM)
    device = torch.device("cuda")
print("INFO: Using device -", device)

INFO: Using device - cuda


## Load DataSets

In [6]:
from typing import Callable, Optional
from sklearn.model_selection import train_test_split


class ImageDataset(datasets.ImageFolder):
    download_url = "https://daiv-cnu.duckdns.org/contest/ai_competition[2024]_pro/dataset/archive.zip"
    random_state = 20241028

    def __init__(
            self, root: str, force_download: bool = True,
            train: bool = False, valid: bool = False, split_ratio: float = 0.8,
            test: bool = False, unlabeled: bool = False,
            transform: Optional[Callable] = None, target_transform: Optional[Callable] = None
    ):
        self.download(root, force=force_download)  # Download Dataset from server

        if train or valid:  # Set-up directory
            root = path.join(root, "train")
        else:
            root = path.join(root, "test" if test else "unlabeled" if unlabeled else None)

        # Initialize ImageFolder
        super().__init__(root=root, transform=transform, target_transform=target_transform)

        if train or valid:  # Split Train and Validation Set
            seperated = train_test_split(
                self.samples, self.targets, test_size=1-split_ratio, stratify=self.targets, random_state=self.random_state
            )
            self.samples, self.targets = (seperated[0], seperated[2]) if train else (seperated[1], seperated[3])
            self.imgs = self.samples

    @property
    def df(self) -> pd.DataFrame:
        return pd.DataFrame(dict(path=[d[0] for d in self.samples], label=[self.classes[lb] for lb in self.targets]))

    @classmethod
    def download(cls, root: str, force: bool = False):
        if force or not path.isfile(path.join(root, "archive.zip")):
            # Download and Extract Dataset
            datasets.utils.download_and_extract_archive(cls.download_url, download_root=root, extract_root=root, filename="archive.zip")

            # Arrange Dataset Directory
            for target_dir in [path.join(root, "test"), path.join(root, "unlabeled")]:
                for file in listdir(target_dir):
                    mkdir(path.join(target_dir, file.replace(".jpg", "")))
                    rename(path.join(target_dir, file), path.join(target_dir, file.replace(".jpg", ""), file))

            print("INFO: Dataset archive downloaded and extracted.")
        else:
            print("INFO: Dataset archive found in the root directory. Skipping download.")

### Dataset Initialization

In [7]:
# Image Resizing and Tensor Conversion
IMG_SIZE = (256, 256)
IMG_NORM = dict(  # ImageNet Normalization
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)

resizer = transforms.Compose([
    transforms.Resize(IMG_SIZE),  # Resize Image
    transforms.ToTensor(),  # Convert Image to Tensor
    transforms.Normalize(**IMG_NORM)  # Normalization
])

In [8]:
DATA_ROOT = path.join(".", "data")

train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=resizer)
valid_dataset = ImageDataset(root=DATA_ROOT, force_download=False, valid=True, transform=resizer)

test_dataset = ImageDataset(root=DATA_ROOT, force_download=False, test=True, transform=resizer)
unlabeled_dataset = ImageDataset(root=DATA_ROOT, force_download=False, unlabeled=True, transform=resizer)

print(f"INFO: Dataset loaded successfully. Number of samples - Train({len(train_dataset)}), Valid({len(valid_dataset)}), Test({len(test_dataset)}), Unlabeled({len(unlabeled_dataset)})")

INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset loaded successfully. Number of samples - Train(7478), Valid(1870), Test(1110), Unlabeled(380)


### Visualize Dataset Distribution
    - for checking...

In [9]:
# Label Check
for i, label in zip(range(5), train_dataset.targets):
    print(i, "-", train_dataset.classes[label])

0 - 050.Pelagic_Cormorant
1 - 020.Leonberger
2 - 104.Black_capped_Vireo
3 - 020.Leonberger
4 - 009.Siamese


In [10]:
train_dataset.df

Unnamed: 0,path,label
0,./data/train/050.Pelagic_Cormorant/016.jpg,050.Pelagic_Cormorant
1,./data/train/020.Leonberger/162.jpg,020.Leonberger
2,./data/train/104.Black_capped_Vireo/030.jpg,104.Black_capped_Vireo
3,./data/train/020.Leonberger/059.jpg,020.Leonberger
4,./data/train/009.Siamese/120.jpg,009.Siamese
...,...,...
7473,./data/train/023.Pomeranian/061.jpg,023.Pomeranian
7474,./data/train/019.Keeshond/013.jpg,019.Keeshond
7475,./data/train/012.Boxer/156.jpg,012.Boxer
7476,./data/train/039.Brewer_Blackbird/009.jpg,039.Brewer_Blackbird


In [11]:
# Train Dataset Distribution
pyg.walk(train_dataset.df)

Box(children=(HTML(value='\n<div id="ifr-pyg-00062570be2559f5HGQPp7Mjz936omLY" style="height: auto">\n    <hea…

<pygwalker.api.pygwalker.PygWalker at 0x7f584b5051f0>

In [12]:
valid_dataset.df

Unnamed: 0,path,label
0,./data/train/071.Evening_Grosbeak/034.jpg,071.Evening_Grosbeak
1,./data/train/066.Eared_Grebe/004.jpg,066.Eared_Grebe
2,./data/train/118.Rock_Wren/033.jpg,118.Rock_Wren
3,./data/train/035.Groove_billed_Ani/022.jpg,035.Groove_billed_Ani
4,./data/train/008.Russian_Blue/098.jpg,008.Russian_Blue
...,...,...
1865,./data/train/012.Boxer/000.jpg,012.Boxer
1866,./data/train/016.Great_Pyrenees/003.jpg,016.Great_Pyrenees
1867,./data/train/024.Pug/027.jpg,024.Pug
1868,./data/train/005.Egyptian_Mau/134.jpg,005.Egyptian_Mau


In [13]:
# Valid Dataset Distribution
walker = pyg.walk(valid_dataset.df, theme_key="streamlit")

Box(children=(HTML(value='\n<div id="ifr-pyg-00062570be300ff88v5jFsAkVLDEJYBz" style="height: auto">\n    <hea…

## Data Augmentation if needed

In [14]:
ROTATE_ANGLE = 20
COLOR_TRANSFORM = 0.1

In [15]:
augmenter = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(ROTATE_ANGLE),
    transforms.ColorJitter(
        brightness=COLOR_TRANSFORM, contrast=COLOR_TRANSFORM,
        saturation=COLOR_TRANSFORM, hue=COLOR_TRANSFORM
    ),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.75, 1.333)),
    resizer
])

In [16]:
train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=augmenter)

print(f"INFO: Train dataset has been overridden with augmented state. Number of samples - Train({len(train_dataset)})")

INFO: Dataset archive found in the root directory. Skipping download.
INFO: Train dataset has been overridden with augmented state. Number of samples - Train(7478)


### Label Transform

In [17]:
CLASS_LABELS = len(train_dataset.classes) + 1
COMBINATION_AXIS = 2

In [18]:
import itertools

class LabelTransformer:
    def __init__(self, num_classes: int, comb_axis: int):
        self.num_classes = num_classes
        self.comb_axis = comb_axis
        self.combinations = [(-1, n) for n in (*range(num_classes), -2)] + list(itertools.combinations((-2, *range(num_classes)), comb_axis))
        self.num_combinations = len(self.combinations)
        
    def find(self, comb_id):
        return self.combinations[comb_id]

In [19]:
label_transformer = LabelTransformer(CLASS_LABELS, COMBINATION_AXIS)

### Label * 2

In [20]:
train_dataset.targets = [[i, i] for i in train_dataset.targets]
valid_dataset.targets = [[i, i] for i in valid_dataset.targets]
train_dataset.samples = [(p, torch.tensor([i, i])) for p, i in train_dataset.samples]
valid_dataset.samples = [(p, torch.tensor([i, i])) for p, i in valid_dataset.samples]

## DataLoader

In [21]:
# Set Batch Size
BATCH_SIZE = 200

In [22]:
MULTI_PROCESSING = True  # Set False if DataLoader is causing issues

from platform import system
if MULTI_PROCESSING and system() != "Windows":  # Multiprocess data loading is not supported on Windows
    import multiprocessing
    cpu_cores = multiprocessing.cpu_count()
    print(f"INFO: Number of CPU cores - {cpu_cores}")
else:
    cpu_cores = 0
    print("INFO: Using DataLoader without multi-processing.")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)

INFO: Number of CPU cores - 48


In [23]:
# Image Visualizer
def imshow(image_list, mean=IMG_NORM['mean'], std=IMG_NORM['std']):
    np_image = np.array(image_list).transpose((1, 2, 0))
    de_norm_image = np_image * std + mean
    plt.figure(figsize=(10, 10))
    plt.imshow(de_norm_image)

In [24]:
#images, targets = next(iter(train_loader))
#grid_images = utils.make_grid(images, nrow=8, padding=10)
#imshow(grid_images)

## Define Model

In [25]:
class VisualEmbedding(nn.Module):
    """ Visual Embedding Model """
    
    def __init__(self, embedding_dim: int):
        super().__init__()

        self.resnet = models.resnet34(pretrained=True)
        self.resnet.avgpool = nn.AdaptiveMaxPool2d((1, 1))
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, embedding_dim)

    def forward(self, x):
        return self.resnet(x)

In [26]:
class ImageClassifier(nn.Module):
    def __init__(self, embedding_dim: int, comb_axis: int, num_combinations: int, num_classes: int):
        super().__init__()

        # Visual Embedding
        self.visual_embedding = VisualEmbedding(embedding_dim)
        self.semantic_embedding = nn.Linear(embedding_dim, comb_axis)
        self.converter = nn.Linear(comb_axis, num_combinations)

    def forward(self, x) -> torch.Tensor:
        embedding = self.visual_embedding(x)
        embedding = self.semantic_embedding(embedding)
        return self.converter(embedding)

In [27]:
# class ImageClassifier(nn.Module):
#     def __init__(self, embedding_dim: int, comb_axis: int, num_combinations: int, num_classes: int):
#         super().__init__()
#         self.num_classes = num_classes
# 
#         # Visual Embedding
#         self.visual_embedding = VisualEmbedding(embedding_dim)
#         self.hidden_size = self.visual_embedding.resnet.fc.in_features
#         self.semantic_embedding = nn.Sequential(
#             nn.LayerNorm(embedding_dim),
#             nn.Linear(self.hidden_size, self.hidden_size//2),
#             nn.ReLU(),
#             nn.Dropout(0.3),
#             nn.Linear(self.hidden_size//2, comb_axis),
#             nn.Sigmoid()
#         )
# 
#     def forward(self, x) -> torch.Tensor:
#         embedding = self.visual_embedding(x)
#         return self.semantic_embedding(embedding) * self.num_classes

In [28]:
EMBEDDING_DIM = 16  # 8~16: log(labels)

MODEL_PARAMS = dict(
    embedding_dim=EMBEDDING_DIM, comb_axis=COMBINATION_AXIS,
    num_combinations=label_transformer.num_combinations, num_classes=CLASS_LABELS
)

In [29]:
# Initialize Model
model = ImageClassifier(**MODEL_PARAMS)
model.to(device)



ImageClassifier(
  (visual_embedding): VisualEmbedding(
    (resnet): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  

In [30]:
LEARNING_RATE = 0.001
EPOCH = 10

#criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
lr_scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=LEARNING_RATE, steps_per_epoch=len(train_loader), epochs=EPOCH)

## Training Loop

In [31]:
#wandb.watch(model, criterion, log="all", log_freq=10)

In [32]:
from IPython.display import display
import ipywidgets as widgets

# Interactive Loss Plot Update
def create_plot():
    losses = []

    # Enable Interactive Mode
    plt.ion()

    # Loss Plot Setting
    fig, ax = plt.subplots(figsize=(6, 2))
    line, = ax.plot(losses)
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    ax.set_title("Cross Entropy Loss")

    # Display Plot
    plot = widgets.Output()
    display(plot)

    def update_plot(new_loss):
        losses.append(new_loss.item())
        line.set_ydata(losses)
        line.set_xdata(range(len(losses)))
        ax.relim()
        ax.autoscale_view()
        with plot:
            plot.clear_output(wait=True)
            display(fig)

    return update_plot

In [33]:
train_length, valid_length = map(len, (train_loader, valid_loader))

epochs = tqdm(range(EPOCH), desc="Running Epochs")
with (tqdm(total=train_length, desc="Training") as train_progress,
      tqdm(total=valid_length, desc="Validation") as valid_progress):  # Set up Progress Bars
    update = create_plot()  # Create Loss Plot

    for epoch in epochs:
        train_progress.reset(total=train_length)
        valid_progress.reset(total=valid_length)

        # Training
        model.train()
        for i, (inputs, targets) in enumerate(train_loader):
            optimizer.zero_grad()

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            update(loss)
            train_progress.update(1)
            #if i != train_length-1: wandb.log({'Loss': loss.item()})
            print(f"\rEpoch [{epoch+1:2}/{EPOCH}], Step [{i+1:2}/{train_length}], Loss: {loss.item():.6f}", end="")

        val_acc, val_loss = 0, 0

        # Validation
        model.eval()
        with torch.no_grad():
            for inputs, targets in valid_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)

                val_loss += criterion(outputs, targets).item() / valid_length
                val_acc += (torch.max(outputs, 1)[1] == targets.data).sum() / len(valid_dataset)
                valid_progress.update(1)

        #wandb.log({'Loss': loss.item(), 'Val Acc': val_acc, 'Val Loss': val_loss})
        print(f"\rEpoch [{epoch+1:2}/{EPOCH}], Step [{train_length}/{train_length}], Loss: {loss.item():.6f}, Valid Acc: {val_acc:.6%}, Valid Loss: {val_loss:.6f}", end="\n" if (epoch+1) % 5 == 0 or (epoch+1) == EPOCH else "")

Running Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/38 [00:00<?, ?it/s]

Validation:   0%|          | 0/10 [00:00<?, ?it/s]

Output()

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (7503) must match the size of tensor b (2) at non-singleton dimension 1

In [None]:
if not path.isdir(path.join(".", "models")):
    mkdir(path.join(".", "models"))

# Model Save
save_path = path.join(".", "models", f"visual_embedding.pt")
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

# Model Evaluation

In [None]:
# Load Model
model_id = "visual_embedding"

model = ImageClassifier(**MODEL_PARAMS)
model.load_state_dict(torch.load(path.join(".", "models", f"{model_id}.pt")))
model.to(device)

In [None]:
results = dict(id=[], label1=[], label2=[])
test_length = len(test_dataset)

model.eval()
with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        outputs = model(inputs)
        preds = torch.max(outputs, 1)[1]
        results['id'] += [test_dataset.classes[i] for i in ids]
        results['label1'] += preds.cpu().detach().numpy().tolist()
        results['label2'] += preds.cpu().detach().numpy().tolist()

In [None]:
# Re-arrange Results
for i, label in enumerate(results['label1']):
    results['label1'][i], results['label2'][i] = label_transformer.find(label)

results_df = pd.DataFrame(results)
results_df

In [None]:
_ids, _preds = [], []
test_length = len(test_dataset)

model.eval()
with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        _ids.extend([test_dataset.classes[i] for i in ids])
        _preds.extend(model.predict_top_k(inputs, k=2, min_similarity=0.3))

In [None]:
results = dict(id=[], label1=[], label2=[])
for i, labels in zip(_ids, _preds):
    results['id'].append(i)
    labels = [-2 if v == CLASS_LABELS-1 else v for v in (labels[0].item(), labels[1].item())]
    results['label1'].append(min(labels))
    results['label2'].append(max(labels))

results_df = pd.DataFrame(results)
results_df

In [None]:
# Save Results
submission_dir = "submissions"
if not path.isdir(submission_dir):
    mkdir(submission_dir)

submit_file_path = path.join(submission_dir, f"{model_id}.csv")
results_df.to_csv(submit_file_path, index=False)
print("File saved to", submit_file_path)

In [None]:
def calculate_score(submitted: pd.DataFrame, answer: pd.DataFrame) -> float:
    total_cases = len(answer)
    correct_cases = 0
    for _id in answer['id']:
        try:
            if submitted[submitted['id'] == _id].reset_index(drop=True).equals(answer[answer['id'] == _id].reset_index(drop=True)):
                correct_cases += 1
        except KeyError as ignored:
            pass  # The case when the id is not found in the submitted data
    return correct_cases / total_cases * 100

In [None]:
calculate_score(pd.read_csv("submissions/avisual_embedding.csv"), pd.read_csv("submissions/cc.csv"))