# dAiv AI_Competition[2024]_Pro Baseline for PyTorch

## Import Libraries

In [2]:
from os import path, rename, mkdir, listdir

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from torchvision import datasets, utils, models
from torchvision import transforms

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

# datasets.utils.tqdm = tqdm
# %matplotlib inline

### Check GPU Availability

In [4]:
!nvidia-smi

Tue Oct 15 14:57:09 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   41C    P0    34W / 250W |   7724MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   39C    P0    30W / 250W |  15369MiB / 16280MiB |      0%      Defaul

In [5]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 7

device = torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(DEVICE_NUM)
    device = torch.device("cuda")
print("INFO: Using device -", device)

from typing import Callable, Optional
from sklearn.model_selection import train_test_split


class ImageDataset(datasets.ImageFolder):
    download_url = "https://daiv-cnu.duckdns.org/contest/ai_competition[2024]_pro/dataset/archive.zip"
    random_state = 20241028

    def __init__(
            self, root: str, force_download: bool = True,
            train: bool = False, valid: bool = False, split_ratio: float = 0.8,
            test: bool = False, unlabeled: bool = False,
            transform: Optional[Callable] = None, target_transform: Optional[Callable] = None
    ):
        self.download(root, force=force_download)  # Download Dataset from server

        if train or valid:  # Set-up directory
            root = path.join(root, "train")
        else:
            root = path.join(root, "test" if test else "unlabeled" if unlabeled else None)

        # Initialize ImageFolder
        super().__init__(root=root, transform=transform, target_transform=target_transform)

        if train or valid:  # Split Train and Validation Set
            seperated = train_test_split(
                self.samples, self.targets, test_size=1-split_ratio, stratify=self.targets, random_state=self.random_state
            )
            self.samples, self.targets = (seperated[0], seperated[2]) if train else (seperated[1], seperated[3])
            self.imgs = self.samples

    @classmethod
    def download(cls, root: str, force: bool = False):
        if force or not path.isfile(path.join(root, "archive.zip")):
            # Download and Extract Dataset
            datasets.utils.download_and_extract_archive(cls.download_url, download_root=root, extract_root=root, filename="archive.zip")

            # Arrange Dataset Directory
            for target_dir in [path.join(root, "test"), path.join(root, "unlabeled")]:
                for file in listdir(target_dir):
                    mkdir(path.join(target_dir, file.replace(".jpg", "")))
                    rename(path.join(target_dir, file), path.join(target_dir, file.replace(".jpg", ""), file))

            print("INFO: Dataset archive downloaded and extracted.")
        else:
            print("INFO: Dataset archive found in the root directory. Skipping download.")

# Image Resizing and Tensor Conversion
IMG_SIZE = (256, 256)
IMG_NORM = dict(  # ImageNet Normalization
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)

resizer = transforms.Compose([
    transforms.Resize(IMG_SIZE),  # Resize Image
    transforms.ToTensor(),  # Convert Image to Tensor
    transforms.Normalize(**IMG_NORM)  # Normalization
])

DATA_ROOT = path.join(".", "data")

train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=resizer)
valid_dataset = ImageDataset(root=DATA_ROOT, force_download=False, valid=True, transform=resizer)

test_dataset = ImageDataset(root=DATA_ROOT, force_download=False, test=True, transform=resizer)
unlabeled_dataset = ImageDataset(root=DATA_ROOT, force_download=False, unlabeled=True, transform=resizer)

print(f"INFO: Dataset loaded successfully. Number of samples - Train({len(train_dataset)}), Valid({len(valid_dataset)}), Test({len(test_dataset)}), Unlabeled({len(unlabeled_dataset)})")

ROTATE_ANGLE = 20
COLOR_TRANSFORM = 0.1

augmenter = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(ROTATE_ANGLE),
    transforms.ColorJitter(
        brightness=COLOR_TRANSFORM, contrast=COLOR_TRANSFORM,
        saturation=COLOR_TRANSFORM, hue=COLOR_TRANSFORM
    ),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.75, 1.333)),
    resizer
])

train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=augmenter)

print(f"INFO: Train dataset has been overridden with augmented state. Number of samples - Train({len(train_dataset)})")

# Set Batch Size
BATCH_SIZE = 128

MULTI_PROCESSING = True  # Set False if DataLoader is causing issues

from platform import system
if MULTI_PROCESSING and system() != "Windows":  # Multiprocess data loading is not supported on Windows
    import multiprocessing
    cpu_cores = multiprocessing.cpu_count()
    print(f"INFO: Number of CPU cores - {cpu_cores}")
else:
    cpu_cores = 0
    print("INFO: Using DataLoader without multi-processing.")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)

# Image Visualizer
def imshow(image_list, mean=IMG_NORM['mean'], std=IMG_NORM['std']):
    np_image = np.array(image_list).transpose((1, 2, 0))
    de_norm_image = np_image * std + mean
    plt.figure(figsize=(10, 10))
    plt.imshow(de_norm_image)

images, targets = next(iter(train_loader))
grid_images = utils.make_grid(images, nrow=8, padding=10)
# imshow(grid_images)

from IPython.display import display
import ipywidgets as widgets

# Interactive Loss Plot Update
def create_plot():
    losses = []

    # Enable Interactive Mode
    plt.ion()

    # Loss Plot Setting
    fig, ax = plt.subplots(figsize=(6, 2))
    line, = ax.plot(losses)
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    ax.set_title("Cross Entropy Loss")

    # Display Plot
    plot = widgets.Output()
    display(plot)

    def update_plot(new_loss):
        losses.append(new_loss.item())
        line.set_ydata(losses)
        line.set_xdata(range(len(losses)))
        ax.relim()
        ax.autoscale_view()
        with plot:
            plot.clear_output(wait=True)
            display(fig)

    return update_plot

class GradientReversalFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        return -ctx.alpha * grad_output, None

class SecondMaxLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x = x.clone()
        max_val, max_idx = torch.max(x, dim=1, keepdim=True)
        x.scatter_(1, max_idx, 1e-12)
        return x

class ImageClassifier(nn.Module):
    def __init__(self, input_channel: int, output_channel: int, adaptive_pool_size: int, img_size: int, num_classes: int):
        super().__init__()
        self.multiple_output = False

        # Feature Extractor
        self.resnet = models.resnet34(pretrained=True)
        self.fc_size = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()  # Fully connected layer 제거

        # Adaptive Layer
        self.feature_extractor = nn.Sequential(
            nn.Linear(self.fc_size, adaptive_pool_size),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        # Label Predictor
        self.classifier = nn.Linear(adaptive_pool_size, num_classes)
        self.secondary = SecondMaxLayer()  # For multi-label classification

        # Domain Classifier
        self.domain_classifier = nn.Sequential(
            nn.Linear(adaptive_pool_size, 100),
            nn.ReLU(),
            nn.Linear(100, 1)
        )

    def toggle_multilabel(self, multi_label: bool = None):
        if isinstance(multi_label, bool):
            self.multiple_output = multi_label
        else:
            self.multiple_output = not self.multiple_output

    def forward(self, x, alpha=0.0):
        # Feature Extraction
        features = self.resnet(x)
        features = self.feature_extractor(features)

        # Label Prediction
        class_output = self.classifier(features)
        if self.multiple_output:
            secondary_output = self.secondary(class_output)
        else:
            secondary_output = None

        # Domain Classification with Gradient Reversal
        reverse_feature = GradientReversalFunction.apply(features, alpha)
        domain_output = self.domain_classifier(reverse_feature)

        return domain_output, class_output, secondary_output


CLASS_LABELS = len(train_dataset.classes)

MODEL_PARAMS = dict(
    input_channel=3, output_channel=64, adaptive_pool_size=512,
    img_size=IMG_SIZE[0], num_classes=CLASS_LABELS
)

# Initialize Model
model = ImageClassifier(**MODEL_PARAMS)
model.toggle_multilabel(True)
model.to(device)

LEARNING_RATE = 0.0001

label_criterion = nn.BCEWithLogitsLoss()  # 멀티 레이블 분류를 위한 손실 함수
domain_criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)

# Set Epoch Count
num_epochs = 10

INFO: Using device - cuda
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset loaded successfully. Number of samples - Train(7478), Valid(1870), Test(1110), Unlabeled(380)
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Train dataset has been overridden with augmented state. Number of samples - Train(7478)
INFO: Number of CPU cores - 48




In [7]:
train_length, valid_length = map(len, (train_loader, valid_loader))

epochs = tqdm(range(num_epochs), desc="Running Epochs")
with (tqdm(total=train_length, desc="Training") as train_progress,
      tqdm(total=valid_length, desc="Validation") as valid_progress):  # Set up Progress Bars
    # update = create_plot()  # Create Loss Plot (이 함수가 정의되어 있어야 합니다.)

    from itertools import zip_longest

    # Set up for training
    alpha = 0.0  # Gradient Reversal 강도 조절
    for epoch in epochs:
        train_progress.reset(total=train_length)
        valid_progress.reset(total=valid_length)

        model.train()
        len_dataloader = min(len(train_loader), len(unlabeled_loader))
        train_iter = iter(train_loader)
        unlabeled_iter = iter(unlabeled_loader)

        for i in range(len_dataloader):
            # 데이터 로드
            data_source = next(train_iter)
            data_target = next(unlabeled_iter)

            # 레이블이 있는 데이터 (소스 도메인)
            inputs_source, labels_source = data_source
            inputs_source, labels_source = inputs_source.to(device), labels_source.to(device)
            domain_labels_source = torch.zeros(inputs_source.size(0), 1).to(device)  # 소스 도메인 레이블: 0

            # 레이블이 없는 데이터 (타겟 도메인)
            inputs_target = data_target

            # inputs_target이 리스트인 경우 텐서로 변환
            if isinstance(inputs_target, list):
                inputs_target = torch.stack(inputs_target, dim=0)
            elif isinstance(inputs_target, tuple):
                inputs_target = inputs_target[0]

            inputs_target = inputs_target.to(device)
            domain_labels_target = torch.ones(inputs_target.size(0), 1).to(device)  # 타겟 도메인 레이블: 1

            # 모델 초기화
            optimizer.zero_grad()

            # 소스 도메인 데이터로 예측
            p = float(i + epoch * len_dataloader) / (num_epochs * len_dataloader)
            alpha = 2. / (1. + np.exp(-10 * p)) - 1  # GRL의 스케줄링 파라미터
            domain_output_s, class_output_s, _ = model(inputs_source, alpha=alpha)
            # 타겟 도메인 데이터로 도메인 분류기 예측
            domain_output_t, _, _ = model(inputs_target, alpha=alpha)

            # 손실 계산
            # 레이블 예측 손실 (소스 도메인)
            label_loss = label_criterion(class_output_s, labels_source.float())
            # 도메인 분류 손실 (소스 + 타겟 도메인)
            domain_output = torch.cat((domain_output_s, domain_output_t), 0)
            domain_labels = torch.cat((domain_labels_source, domain_labels_target), 0)
            domain_loss = domain_criterion(domain_output, domain_labels)

            # 총 손실
            loss = label_loss + domain_loss

            # update(loss)  # 이 부분은 주석 처리 또는 함수 정의 필요
            train_progress.update(1)
            print(f"\rEpoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len_dataloader}], Loss: {loss.item():.4f}", end="")

            # 역전파 및 최적화
            loss.backward()
            optimizer.step()

        # 검증 루프
        model.eval()
        val_loss = 0.0
        val_acc = 0.0
        total_samples = 0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                batch_size = inputs.size(0)
                total_samples += batch_size

                # 예측
                _, outputs, _ = model(inputs)

                # 손실 계산
                loss = label_criterion(outputs, labels.float())
                val_loss += loss.item() * batch_size  # 배치 손실의 합

                # 정확도 계산
                y_pred = torch.sigmoid(outputs)
                y_pred = (y_pred > 0.5).float()
                correct = (y_pred == labels).float().sum()  # 배치에서 맞은 예측의 수
                val_acc += correct.item()

        # 평균 손실 및 정확도 계산
        val_loss = val_loss / total_samples
        val_acc = val_acc / (total_samples * labels.size(1))  # 멀티 레이블이므로 클래스 수로 나눔

        print(f"\nEpoch [{epoch+1:2}/{num_epochs}], Validation Loss: {val_loss:.6f}, Validation Accuracy: {val_acc:.6%}")

Running Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/59 [00:00<?, ?it/s]

Validation:   0%|          | 0/15 [00:00<?, ?it/s]

RuntimeError: stack expects each tensor to be equal size, but got [128, 3, 256, 256] at entry 0 and [128] at entry 1

In [None]:
if not path.isdir(path.join(".", "models")):
    mkdir(path.join(".", "models"))

# Model Save
save_path = path.join(".", "models", f"test.pt")
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

# Model Evaluation

In [None]:
# Load Model
model_id = "test"

model = ImageClassifier(**MODEL_PARAMS)
model.load_state_dict(torch.load(path.join(".", "models", f"{model_id}.pt")))
model.to(device)

In [None]:
results = dict(id=[], label1=[], label2=[])
test_length = len(test_dataset)

model.eval()
model.toggle_multilabel(True)
with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        _, outputs1, outputs2 = model(inputs)
        preds1, preds2 = torch.max(outputs1, 1)[1], torch.max(outputs2, 1)[1]
        results['id'] += [test_dataset.classes[i] for i in ids]
        results['label1'] += preds1.cpu().detach().numpy().tolist()
        results['label2'] += preds2.cpu().detach().numpy().tolist()

In [None]:
def evaluate_model_with_single_and_multi_labels(model, dataloader, device, test_dataset):
    """
    모델의 추론 결과를 id, label1, label2로 저장합니다.
    - 단일 레이블일 경우 label1에는 -1, label2에는 해당 레이블이 들어가야 합니다.
    - 멀티 레이블일 경우 두 레이블이 오름차순으로 정렬되어 label1과 label2에 들어갑니다.
    """
    results = dict(id=[], label1=[], label2=[])

    model.eval()  # 평가 모드로 설정
    model.toggle_multilabel(True)  # 멀티 레이블 분류 활성화

    with torch.no_grad():  # 그라디언트 계산 비활성화
        for inputs, ids in tqdm(dataloader):  # test_loader에서 입력과 id를 가져옴
            inputs = inputs.to(device)

            # 모델 출력
            _, outputs1, outputs2 = model(inputs)

            # 각 출력의 예측 클래스 계산
            preds1 = torch.max(outputs1, 1)[1]  # outputs1에서 가장 높은 값을 가지는 클래스
            preds2 = torch.max(outputs2, 1)[1]  # outputs2에서 가장 높은 값을 가지는 클래스

            # 각 배치의 예측 결과 처리
            for i in range(len(preds1)):
                id_ = test_dataset.classes[ids[i]]  # 해당 샘플의 id

                if preds1[i] == preds2[i]:  # 단일 레이블인 경우
                    label1, label2 = -1, preds1[i].item()
                else:  # 멀티 레이블인 경우
                    label1, label2 = sorted([preds1[i].item(), preds2[i].item()])

                # 결과 저장
                results['id'].append(id_)
                results['label1'].append(label1)
                results['label2'].append(label2)

    return results


# 모델을 평가하는 함수 호출 예시
results = evaluate_model_with_single_and_multi_labels(model, test_loader, device, test_dataset)

results_df = pd.DataFrame(results)
results_df


In [None]:
# Re-arrange Results
for i, labels in enumerate(zip(results['label1'], results['label2'])):
    #results['label1'][i], results['label2'][i] = min(labels), max(labels)
    results['label1'][i], results['label2'][i] = -1, results['label1'][i]  # 멀티 라벨 분류 안하고 그냥 '-1, 라벨'로 처리

results_df = pd.DataFrame(results)
results_df

In [None]:
# Save Results
submission_dir = "submissions"
if not path.isdir(submission_dir):
    mkdir(submission_dir)

submit_file_path = path.join(submission_dir, f"test.csv")
results_df.to_csv(submit_file_path, index=False)
print("File saved to", submit_file_path)