# dAiv AI_Competition[2024]_Pro Baseline for PyTorch

## Import Libraries

In [1]:
from os import path, rename, mkdir, listdir

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from torchvision import datasets, utils, models
from torchvision import transforms

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

datasets.utils.tqdm = tqdm
%matplotlib inline

### Check GPU Availability

In [2]:
!nvidia-smi

Wed Oct 16 00:07:54 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   40C    P0    34W / 250W |    679MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   39C    P0    31W / 250W |  15369MiB / 16280MiB |      0%      Defaul

In [3]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 7

device = torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.set_device(DEVICE_NUM)
    device = torch.device("cuda")
print("INFO: Using device -", device)

from typing import Callable, Optional
from sklearn.model_selection import train_test_split


class ImageDataset(datasets.ImageFolder):
    download_url = "https://daiv-cnu.duckdns.org/contest/ai_competition[2024]_pro/dataset/archive.zip"
    random_state = 20241028

    def __init__(
            self, root: str, force_download: bool = True,
            train: bool = False, valid: bool = False, split_ratio: float = 0.8,
            test: bool = False, unlabeled: bool = False,
            transform: Optional[Callable] = None, target_transform: Optional[Callable] = None
    ):
        self.download(root, force=force_download)  # Download Dataset from server

        if train or valid:  # Set-up directory
            root = path.join(root, "train")
        else:
            root = path.join(root, "test" if test else "unlabeled" if unlabeled else None)

        # Initialize ImageFolder
        super().__init__(root=root, transform=transform, target_transform=target_transform)

        if train or valid:  # Split Train and Validation Set
            seperated = train_test_split(
                self.samples, self.targets, test_size=1-split_ratio, stratify=self.targets, random_state=self.random_state
            )
            self.samples, self.targets = (seperated[0], seperated[2]) if train else (seperated[1], seperated[3])
            self.imgs = self.samples

    @classmethod
    def download(cls, root: str, force: bool = False):
        if force or not path.isfile(path.join(root, "archive.zip")):
            # Download and Extract Dataset
            datasets.utils.download_and_extract_archive(cls.download_url, download_root=root, extract_root=root, filename="archive.zip")

            # Arrange Dataset Directory
            for target_dir in [path.join(root, "test"), path.join(root, "unlabeled")]:
                for file in listdir(target_dir):
                    mkdir(path.join(target_dir, file.replace(".jpg", "")))
                    rename(path.join(target_dir, file), path.join(target_dir, file.replace(".jpg", ""), file))

            print("INFO: Dataset archive downloaded and extracted.")
        else:
            print("INFO: Dataset archive found in the root directory. Skipping download.")

# Image Resizing and Tensor Conversion
IMG_SIZE = (256, 256)
IMG_NORM = dict(  # ImageNet Normalization
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)

resizer = transforms.Compose([
    transforms.Resize(IMG_SIZE),  # Resize Image
    transforms.ToTensor(),  # Convert Image to Tensor
    transforms.Normalize(**IMG_NORM)  # Normalization
])

DATA_ROOT = path.join(".", "data")

train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=resizer)
valid_dataset = ImageDataset(root=DATA_ROOT, force_download=False, valid=True, transform=resizer)

test_dataset = ImageDataset(root=DATA_ROOT, force_download=False, test=True, transform=resizer)
unlabeled_dataset = ImageDataset(root=DATA_ROOT, force_download=False, unlabeled=True, transform=resizer)

print(f"INFO: Dataset loaded successfully. Number of samples - Train({len(train_dataset)}), Valid({len(valid_dataset)}), Test({len(test_dataset)}), Unlabeled({len(unlabeled_dataset)})")

ROTATE_ANGLE = 20
COLOR_TRANSFORM = 0.1

augmenter = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(ROTATE_ANGLE),
    transforms.ColorJitter(
        brightness=COLOR_TRANSFORM, contrast=COLOR_TRANSFORM,
        saturation=COLOR_TRANSFORM, hue=COLOR_TRANSFORM
    ),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0), ratio=(0.75, 1.333)),
    resizer
])

train_dataset = ImageDataset(root=DATA_ROOT, force_download=False, train=True, transform=augmenter)

print(f"INFO: Train dataset has been overridden with augmented state. Number of samples - Train({len(train_dataset)})")

# Set Batch Size
BATCH_SIZE = 128

MULTI_PROCESSING = True  # Set False if DataLoader is causing issues

from platform import system
if MULTI_PROCESSING and system() != "Windows":  # Multiprocess data loading is not supported on Windows
    import multiprocessing
    cpu_cores = multiprocessing.cpu_count()
    print(f"INFO: Number of CPU cores - {cpu_cores}")
else:
    cpu_cores = 0
    print("INFO: Using DataLoader without multi-processing.")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)

# Image Visualizer
def imshow(image_list, mean=IMG_NORM['mean'], std=IMG_NORM['std']):
    np_image = np.array(image_list).transpose((1, 2, 0))
    de_norm_image = np_image * std + mean
    plt.figure(figsize=(10, 10))
    plt.imshow(de_norm_image)

images, targets = next(iter(train_loader))
grid_images = utils.make_grid(images, nrow=8, padding=10)
# imshow(grid_images)

from IPython.display import display
import ipywidgets as widgets

# Interactive Loss Plot Update
def create_plot():
    losses = []

    # Enable Interactive Mode
    plt.ion()

    # Loss Plot Setting
    fig, ax = plt.subplots(figsize=(6, 2))
    line, = ax.plot(losses)
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Loss")
    ax.set_title("Cross Entropy Loss")

    # Display Plot
    plot = widgets.Output()
    display(plot)

    def update_plot(new_loss):
        losses.append(new_loss.item())
        line.set_ydata(losses)
        line.set_xdata(range(len(losses)))
        ax.relim()
        ax.autoscale_view()
        with plot:
            plot.clear_output(wait=True)
            display(fig)

    return update_plot

class GradientReversalFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        return -ctx.alpha * grad_output, None

class SecondMaxLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        x_copy = x.clone()
        max_val, max_idx = torch.max(x_copy, dim=1, keepdim=True)
        x_copy = x_copy.scatter(1, max_idx, -float('inf'))
        return x_copy

class ImageClassifier(nn.Module):
    def __init__(self, input_channel: int, output_channel: int, adaptive_pool_size: int, img_size: int, num_classes: int):
        super().__init__()
        self.multiple_output = False

        # Feature Extractor
        self.resnet = models.resnet34(pretrained=True)
        self.fc_size = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()  # Fully connected layer 제거

        # Adaptive Layer
        self.feature_extractor = nn.Sequential(
            nn.Linear(self.fc_size, adaptive_pool_size),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        # Label Predictor
        self.classifier = nn.Linear(adaptive_pool_size, num_classes)
        self.secondary = SecondMaxLayer()  # For multi-label classification

        # Domain Classifier
        self.domain_classifier = nn.Sequential(
            nn.Linear(adaptive_pool_size, 100),
            nn.ReLU(),
            nn.Linear(100, 1)
        )

    def toggle_multilabel(self, multi_label: bool = None):
        if isinstance(multi_label, bool):
            self.multiple_output = multi_label
        else:
            self.multiple_output = not self.multiple_output

    def forward(self, x, alpha=0.0):
        # Feature Extraction
        features = self.resnet(x)
        features = self.feature_extractor(features)
    
        # Label Prediction
        class_output = self.classifier(features)
        if self.multiple_output:
            secondary_output = self.secondary(class_output)
        else:
            secondary_output = None
    
        # Domain Classification with Gradient Reversal
        reverse_feature = GradientReversalFunction.apply(features, alpha)
        domain_output = self.domain_classifier(reverse_feature)
    
        return domain_output, class_output, secondary_output



CLASS_LABELS = len(train_dataset.classes)

MODEL_PARAMS = dict(
    input_channel=3, output_channel=64, adaptive_pool_size=512,
    img_size=IMG_SIZE[0], num_classes=CLASS_LABELS
)

# Initialize Model
model = ImageClassifier(**MODEL_PARAMS)
model.toggle_multilabel(True)
model.to(device)

LEARNING_RATE = 0.0001

INFO: Using device - cuda
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Dataset loaded successfully. Number of samples - Train(7478), Valid(1870), Test(1110), Unlabeled(380)
INFO: Dataset archive found in the root directory. Skipping download.
INFO: Train dataset has been overridden with augmented state. Number of samples - Train(7478)
INFO: Number of CPU cores - 48




In [4]:
# MultiLabelWrapper 클래스 정의 (오타 수정: Dataset 상속)
from torch.utils.data import Dataset

class MultiLabelWrapper(Dataset):
    def __init__(self, dataset, num_classes):
        self.dataset = dataset
        self.num_classes = num_classes

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        multilabel = torch.zeros(self.num_classes)
        multilabel[label] = 1
        return image, multilabel

# 모든 데이터셋을 멀티 레이블 형식으로 변환
train_dataset_ml = MultiLabelWrapper(train_dataset, CLASS_LABELS)
valid_dataset_ml = MultiLabelWrapper(valid_dataset, CLASS_LABELS)

# 데이터로더 생성
train_loader = DataLoader(train_dataset_ml, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
valid_loader = DataLoader(valid_dataset_ml, batch_size=BATCH_SIZE, shuffle=False, num_workers=cpu_cores)

# 손실 함수 정의
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Set Epoch Count
num_epochs = 30

from torch.utils.data import DataLoader, ConcatDataset

train_length = len(train_loader)
valid_length = len(valid_loader)

epochs = tqdm(range(num_epochs), desc="Running Epochs")
with (tqdm(total=train_length, desc="Training") as train_progress,
      tqdm(total=valid_length, desc="Validation") as valid_progress):  # Set up Progress Bars
    # update = create_plot()  # Create Loss Plot (필요한 경우 활성화)

    # Set up for training
    for epoch in epochs:
        train_progress.reset(total=train_length)
        valid_progress.reset(total=valid_length)

        model.train()
        total_loss = 0.0

        # 단일 레이블 데이터로 학습
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            domain_output, class_output, _ = model(inputs)
            loss = criterion(class_output, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            train_progress.update(1)

            print(f"\rEpoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}", end="")

        # 비지도 데이터에 대한 가상 레이블 생성
        model.eval()
        pseudo_labels_list = []
        inputs_list = []

        with torch.no_grad():
            for data in unlabeled_loader:
                inputs, _ = data  # 레이블은 사용하지 않으므로 '_'로 무시
                inputs = inputs.to(device)
                domain_output, class_output, _ = model(inputs)
                probs = torch.sigmoid(class_output)

                # 임계값을 사용하여 가상 레이블 생성 (예: 0.7 이상인 클래스만 선택)
                threshold = 0.7
                pseudo_labels = (probs >= threshold).float()

                # 가상 레이블이 없는 샘플은 제외
                if pseudo_labels.sum() == 0:
                    continue

                pseudo_labels_list.append(pseudo_labels.cpu())
                inputs_list.append(inputs.cpu())

            # 가상 레이블 데이터셋 생성
            if pseudo_labels_list:
                pseudo_dataset = torch.utils.data.TensorDataset(
                    torch.cat(inputs_list), torch.cat(pseudo_labels_list)
                )
            else:
                pseudo_dataset = None

        if pseudo_dataset is not None:
            # train_dataset_ml과 pseudo_dataset을 결합
            combined_dataset = ConcatDataset([train_dataset_ml, pseudo_dataset])
            combined_loader = DataLoader(combined_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=cpu_cores)
        else:
            combined_loader = train_loader

        # 모델 재학습
        model.train()
        total_loss = 0.0

        train_progress.reset(total=len(combined_loader))

        for i, (inputs, labels) in enumerate(combined_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            domain_output, class_output, _ = model(inputs)

            # 손실 함수 적용 (모든 데이터에 대해 동일하게)
            loss = criterion(class_output, labels)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            # update(loss)  # 필요한 경우 활성화
            train_progress.update(1)

            # 스텝별 손실 출력
            print(f"\rEpoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(combined_loader)}], Loss: {loss.item():.4f}", end="")

        from sklearn.metrics import f1_score
        
        # 검증 루프
        model.eval()
        val_loss = 0.0
        all_labels = []
        all_preds = []
        
        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device)
        
                domain_output, class_output, _ = model(inputs)
                loss = criterion(class_output, labels)
                val_loss += loss.item() * inputs.size(0)
        
                # 예측 및 레이블 저장
                preds = torch.sigmoid(class_output)
                preds = (preds >= 0.5).float()
                all_labels.append(labels.cpu())
                all_preds.append(preds.cpu())

                valid_progress.update(1)
        
            val_loss /= len(valid_loader.dataset)
        
            # 전체 예측 및 레이블 텐서로 변환
            all_labels = torch.cat(all_labels)
            all_preds = torch.cat(all_preds)
        
            # F1-스코어 계산 (매크로 평균)
            val_f1 = f1_score(all_labels.numpy(), all_preds.numpy(), average='macro') * 100
        
            print(f"\nEpoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.6f}, Validation F1 Score: {val_f1:.6f}")

Running Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/59 [00:00<?, ?it/s]

Validation:   0%|          | 0/15 [00:00<?, ?it/s]

Epoch [1/30], Step [59/59], Loss: 0.0454
Epoch [1/30], Validation Loss: 0.038312, Validation F1 Score: 0.000000
Epoch [2/30], Step [60/60], Loss: 0.0285
Epoch [2/30], Validation Loss: 0.025029, Validation F1 Score: 12.217657
Epoch [3/30], Step [62/62], Loss: 0.0237
Epoch [3/30], Validation Loss: 0.020067, Validation F1 Score: 22.590933
Epoch [4/30], Step [62/62], Loss: 0.0164
Epoch [4/30], Validation Loss: 0.016616, Validation F1 Score: 24.412598
Epoch [5/30], Step [62/62], Loss: 0.0122
Epoch [5/30], Validation Loss: 0.014958, Validation F1 Score: 25.391870
Epoch [6/30], Step [62/62], Loss: 0.0119
Epoch [6/30], Validation Loss: 0.012258, Validation F1 Score: 27.382527
Epoch [7/30], Step [62/62], Loss: 0.0091
Epoch [7/30], Validation Loss: 0.010046, Validation F1 Score: 32.994706
Epoch [8/30], Step [62/62], Loss: 0.0079
Epoch [8/30], Validation Loss: 0.009513, Validation F1 Score: 43.836942
Epoch [9/30], Step [62/62], Loss: 0.0063
Epoch [9/30], Validation Loss: 0.008082, Validation F1 S

KeyboardInterrupt: 

In [5]:
if not path.isdir(path.join(".", "models")):
    mkdir(path.join(".", "models"))

# Model Save
save_path = path.join(".", "models", f"baseline.pt")
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

Model saved to ./models/baseline.pt


# Model Evaluate

In [6]:
# Load Model
model_id = "baseline"

model = ImageClassifier(**MODEL_PARAMS)
model.load_state_dict(torch.load(path.join(".", "models", f"{model_id}.pt")))
model.to(device)

  model.load_state_dict(torch.load(path.join(".", "models", f"{model_id}.pt")))


ImageClassifier(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tr

In [7]:
results = dict(id=[], label1=[], label2=[])
test_length = len(test_dataset)

model.eval()
model.toggle_multilabel(True)

with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        _, outputs1, outputs2 = model(inputs)

        # outputs2에서 최대값의 인덱스를 얻습니다.
        preds1 = torch.argmax(outputs1, dim=1)
        preds2 = torch.argmax(outputs2, dim=1)

        results['id'] += [test_dataset.classes[i] for i in ids]
        results['label1'] += preds1.cpu().numpy().tolist()
        results['label2'] += preds2.cpu().numpy().tolist()

  0%|          | 0/9 [00:00<?, ?it/s]

In [None]:
results = dict(id=[], label1=[], label2=[])
test_length = len(test_dataset)

model.eval()
model.toggle_multilabel(True)

with torch.no_grad():
    for inputs, ids in tqdm(test_loader):
        inputs = inputs.to(device)
        _, class_output, _ = model(inputs)

        # 가장 큰 두 개의 값의 인덱스를 얻습니다.
        topk_vals, topk_indices = torch.topk(class_output, k=2, dim=1)
        print(topk_vals, topk_indices)

        # 첫 번째와 두 번째로 큰 값의 인덱스
        preds1 = topk_indices[:, 0]
        preds2 = topk_indices[:, 1]

        results['id'] += [test_dataset.classes[i] for i in ids]
        results['label1'] += preds1.cpu().numpy().tolist()
        results['label2'] += preds2.cpu().numpy().tolist()

In [8]:
# Re-arrange Results and handle cases where label1 == label2
for i, labels in enumerate(zip(results['label1'], results['label2'])):
    label1, label2 = min(labels), max(labels)
    if label1 == label2:
        # label1과 label2가 같은 경우 label1을 -1로 변경
        results['label1'][i] = -1
        results['label2'][i] = label2
    else:
        results['label1'][i] = label1
        results['label2'][i] = label2

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,id,label1,label2
0,TEST_00000,116,118
1,TEST_00001,58,62
2,TEST_00002,84,94
3,TEST_00003,16,20
4,TEST_00004,59,81
...,...,...,...
1105,TEST_01105,2,18
1106,TEST_01106,66,67
1107,TEST_01107,20,24
1108,TEST_01108,28,29


In [9]:
# Save Results
submission_dir = "submissions"
if not path.isdir(submission_dir):
    mkdir(submission_dir)

submit_file_path = path.join(submission_dir, f"{model_id}.csv")
results_df.to_csv(submit_file_path, index=False)
print("File saved to", submit_file_path)

File saved to submissions/baseline.csv
