# run

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install torch evaluate accelerate==0.27.2 dataset transformers scikit-learn pandas tqdm

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting accelerate==0.27.2
  Downloading accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Collecting dataset
  Downloading dataset-1.6.2-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x8

In [None]:
import os
import random
from tqdm import tqdm
import shutil
import pandas as pd
import numpy as np

import cv2
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torchvision.transforms as T
import torchvision.models as models

from datasets import load_dataset
from evaluate import load
from transformers import AutoModelForImageClassification, AutoImageProcessor, TrainingArguments, Trainer, pipeline
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

import warnings
warnings.filterwarnings('ignore')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

SEED = 42
seed_everything(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
import pandas as pd
import os

# 폴더 경로 설정
folder_path = '/content/drive/MyDrive/KUBIG/25_wint_contest/y9'

# train.csv 및 test.csv 불러오기
train = pd.read_csv(f'{folder_path}/train.csv')
test = pd.read_csv(f'{folder_path}/test.csv')

# img_path 수정: 숫자만 추출하여 새로운 형식으로 변환
train['img_path'] = train['img_path'].apply(lambda x: f"./train_resize/masked_TRAIN_{str(int(''.join(filter(str.isdigit, str(x))))).zfill(5)}.jpg")
test['img_path'] = test['img_path'].apply(lambda x: f"./test_resize/masked_TEST_{str(int(''.join(filter(str.isdigit, str(x))))).zfill(5)}.jpg")
train['upscale_img_path'] = train['upscale_img_path'].apply(lambda x: f"./upscale_cropped/cropped_TRAIN_{str(int(''.join(filter(str.isdigit, str(x))))).zfill(5)}.png")

train['img_path'] = train['img_path'].apply(lambda x: folder_path + x[1:])
test['img_path'] = test['img_path'].apply(lambda x: folder_path + x[1:])
train['upscale_img_path'] = train['upscale_img_path'].apply(lambda x: folder_path + x[1:])

In [None]:
# train-validation split
train_df, val_df = train_test_split(train, test_size=0.1, stratify=train['label'], random_state=SEED)

# Label Encoding
le = preprocessing.LabelEncoder()
train_df['label'] = le.fit_transform(train_df['label'])
val_df['label'] = le.transform(val_df['label'])

In [None]:
# upscaled 데이터 추가하여 train_df 확장
train_expanded_df = pd.concat([
    train_df,  # 원본
    train_df.assign(img_path=train_df['upscale_img_path'])  # 업스케일링
], ignore_index=True)

print("train_expanded_df:", len(train_expanded_df))

train_expanded_df: 28500


In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, transforms, processor):
        self.df = df
        self.transforms = transforms
        self.processor = processor

    def __getitem__(self, index):
        row = self.df.iloc[index]
        img_path = row['img_path']
        label = row['label']

        image = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        image = self.transforms(image=image)['image'] #augmentation

        inputs = self.processor(image, return_tensors="pt")
        pixel_values = inputs["pixel_values"].squeeze(0)  # (1, C, H, W) -> (C, H, W)

        return {
            "pixel_values": pixel_values,
            "labels": torch.tensor(int(label), dtype=torch.long)
        }

    def __len__(self):
        return len(self.df)

In [None]:
train_transform = A.Compose([
    A.Resize(256, 256),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
    ToTensorV2(),
])

swin

In [None]:
model_name = "microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft"
processor = AutoImageProcessor.from_pretrained(model_name, do_normalize=False)

In [None]:
# 데이터셋 생성
train_dataset = CustomDataset(train_expanded_df, train_transform, processor)
val_dataset = CustomDataset(val_df, train_transform, processor)

## run

In [None]:
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/KUBIG/25_wint_contest/y9/results/swinv2_model",
    per_device_train_batch_size=32,  # 배치 크기 증가
    per_device_eval_batch_size=32,
    evaluation_strategy="epoch",  # 평가 빈도 줄이기
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=500,  # 너무 자주 로깅하지 않기
    num_train_epochs=10,
    gradient_accumulation_steps=2,  # 배치 크기를 늘렸으므로 1로 설정
    fp16=False,  # A100에서는 False, 대신 bf16=True 설정
    bf16=True,  # A100에서는 BF16 활성화가 훨씬 빠름
    learning_rate=5e-5,
    weight_decay=0.01,
    warmup_steps=0,
    seed=SEED,
    report_to="none",
    ddp_find_unused_parameters=None,  # 싱글 GPU 사용 시 불필요
)


In [None]:
!nvidia-smi


Sat Feb 22 01:46:35 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   30C    P0             47W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
from transformers import DefaultDataCollator, AutoModelForImageClassification
import torch.nn as nn

# Data Collator 설정
data_collator = DefaultDataCollator()

# 사전 학습된 SwinV2 모델 로드
model = AutoModelForImageClassification.from_pretrained(
    model_name,
    num_labels=len(le.classes_),  # 사용자 데이터셋의 클래스 개수
    ignore_mismatched_sizes=True  # 크기 불일치 해결
).to(device)

# ✅ classifier 레이어를 명확하게 설정
model.classifier = nn.Linear(model.config.hidden_size, len(le.classes_))

# Trainer 설정
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=processor
)

print("✅ 모델의 classifier 레이어가 정상적으로 변경되었습니다!")


Some weights of Swinv2ForImageClassification were not initialized from the model checkpoint at microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 1536]) in the checkpoint and torch.Size([25, 1536]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([25]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ 모델의 classifier 레이어가 정상적으로 변경되었습니다!


train

In [None]:
from accelerate import Accelerator

# Accelerator 초기화
accelerator = Accelerator()

# distributed_type 속성 확인
if not hasattr(accelerator.state, "distributed_type"):
    accelerator.state.distributed_type = None  # 기본값 설정

In [None]:
train_results = trainer.train()
print("✅ Training 완료!")

trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Epoch,Training Loss,Validation Loss
1,No log,0.338672
2,0.299800,0.311479
3,0.104300,0.338361
4,0.055100,0.38916
5,0.023600,0.404171
6,0.012000,0.318548
7,0.006000,0.365603
8,0.001900,0.34576
9,0.001300,0.34377


✅ Training 완료!
***** train metrics *****
  epoch                    =        9.9787
  total_flos               = 61004248787GF
  train_loss               =        0.0567
  train_runtime            =    3:44:34.81
  train_samples_per_second =        21.151
  train_steps_per_second   =          0.33


# inf

inference

In [None]:
# 학습된 모델 로드
epoch_checkpoint = "/content/drive/MyDrive/KUBIG/25_wint_contest/y9/results/swinv2_model/checkpoint-2676"
model = AutoModelForImageClassification.from_pretrained(epoch_checkpoint).to(device)

In [None]:
#model_name = "microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft"
processor = AutoImageProcessor.from_pretrained(model_name, return_tensors="pt")

In [None]:
model.eval()

# Image Classification Pipeline 생성
classifier = pipeline("image-classification", model=model, image_processor=processor, device=0)

# test 데이터 로드
test_file_names = test["img_path"].tolist()


# predict
predict_labels = []

for name in tqdm(test_file_names, total=len(test_file_names), desc = "Predicting"):
    image = Image.open(name).convert("RGB")  # PIL 이미지 변환
    prediction = classifier(image)[0]['label']  # SwinV2 모델로 예측
    predict_labels.append(prediction)

Device set to use cuda:0
Predicting:  47%|████▋     | 3179/6786 [02:23<02:42, 22.19it/s]


KeyboardInterrupt: 

In [None]:
from google.colab import files

le.fit(train_df["label"])

# label 숫자만 출력
predict_labels = [int(label.replace("LABEL_", "")) for label in predict_labels]

submission_df = pd.read_csv("/content/drive/MyDrive/KUBIG/25_wint_contest/y9/sample_submission.csv")
submission_df["label"] = le.inverse_transform(predict_labels) # 숫자 -> 클래스로 변환
submission_df.to_csv("swin_resize_upscale.csv", index=False)
files.download("swin_resize_upscale.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd

# train.csv 파일 로드
file_path = "/content/drive/MyDrive/KUBIG/25_wint_contest/y9/train.csv"
train_df = pd.read_csv(file_path)

# label 컬럼의 고유값을 오름차순 정렬
unique_labels = sorted(train_df["label"].unique())

# 숫자 라벨 ↔ 원래 클래스명 매핑
label_mapping = {idx: label for idx, label in enumerate(unique_labels)}

# 매핑 결과 출력
for idx, label in label_mapping.items():
    print(f"{idx}: {label}")


0: Asian Green Bee-Eater
1: Brown-Headed Barbet
2: Cattle Egret
3: Common Kingfisher
4: Common Myna
5: Common Rosefinch
6: Common Tailorbird
7: Coppersmith Barbet
8: Forest Wagtail
9: Gray Wagtail
10: Hoopoe
11: House Crow
12: Indian Grey Hornbill
13: Indian Peacock
14: Indian Pitta
15: Indian Roller
16: Jungle Babbler
17: Northern Lapwing
18: Red-Wattled Lapwing
19: Ruddy Shelduck
20: Rufous Treepie
21: Sarus Crane
22: White Wagtail
23: White-Breasted Kingfisher
24: White-Breasted Waterhen


In [None]:
import pandas as pd

# 파일 로드
submission_file_path = "/content/drive/MyDrive/KUBIG/25_wint_contest/y9/swin_resize_upscale.csv"
submission_df = pd.read_csv(submission_file_path)

# train.csv에서 label 인코딩 정보 로드
train_file_path = "/content/drive/MyDrive/KUBIG/25_wint_contest/y9/train.csv"
train_df = pd.read_csv(train_file_path)

# 원래 클래스명 매핑 생성 (오름차순 정렬 후 0부터 인코딩)
unique_labels = sorted(train_df["label"].unique())
label_mapping = {idx: label for idx, label in enumerate(unique_labels)}

# 숫자 라벨을 원래 클래스명으로 변환
submission_df["label"] = submission_df["label"].map(label_mapping)

# 변환된 데이터 저장
output_file_path = "/content/drive/MyDrive/KUBIG/25_wint_contest/y9/swin_detect_resize_upscale.csv"
submission_df.to_csv(output_file_path, index=False)

# 변환된 파일 다운로드
from google.colab import files
files.download(output_file_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>