<a href="https://colab.research.google.com/github/seongjiko/low_scale_bird_classification/blob/master/base_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset.py

In [1]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from autoaugment import ImageNetPolicy
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from torch.utils.data import Subset
class CustomDataset(Dataset):
    def __init__(self, csv, transforms, is_low=False, is_test=False, use_distillation=False, debug=False):
        if debug:
            csv = csv[::500]  # 디버그 모드 시 간격을 늘려서 데이터 로드 감소

        self.is_test = is_test
        self.use_distillation = use_distillation

        if use_distillation:
            print('distillation mode입니다.')
        self.transforms = transforms
        self.path = csv['img_path'].values
        self.path_high = csv['upscale_img_path'].values

        if not is_test:
            self.labels = csv['label'].values

    def __getitem__(self, idx):
        if self.use_distillation:
            img_low = Image.open(self.path[idx]).convert('RGB')
            img_high = Image.open(self.path_high[idx]).convert('RGB')

            if self.transforms:
                img_low = self.transforms(img_low)
                img_high = self.transforms(img_high)

            if not self.is_test:
                label = self.labels[idx]
                return img_low, img_high, label
            else:
                return img_low, img_high
        else:
            img = Image.open(self.path[idx]).convert('RGB')

            if self.transforms:
                img = self.transforms(img)

            if not self.is_test:
                label = self.labels[idx]
                return img, label
            else:
                return img

    def __len__(self):
        return len(self.path)

def get_transforms_AutoAug(is_low):
    size = 64 if is_low else 256
    train_transforms = transforms.Compose([
        transforms.Resize(size),
        transforms.RandomHorizontalFlip(),
        ImageNetPolicy(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    valid_transforms = transforms.Compose([
        transforms.Resize(size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return train_transforms, valid_transforms

def get_loader(cfg, is_test=False):
    if cfg['use_kfold']:
        df = pd.read_csv('train.csv')
        kfold = KFold(n_splits=3, shuffle=True, random_state=1020)

        train_transforms, valid_transforms = get_transforms_AutoAug(cfg['is_low'])

        # K-Fold의 각 분할에 대해 DataLoader 리스트를 초기화합니다.
        train_loaders = []
        valid_loaders = []

        for fold, (train_idx, valid_idx) in enumerate(kfold.split(df)):
            # 훈련 및 검증 데이터프레임을 생성합니다.
            train_df = df.iloc[train_idx]
            valid_df = df.iloc[valid_idx]

            encoder = LabelEncoder()
            train_df['label'] = encoder.fit_transform(train_df['label'])
            valid_df['label'] = encoder.transform(valid_df['label'])

            # CustomDataset을 사용하여 훈련 및 검증 데이터셋을 생성합니다.
            train_dataset = CustomDataset(train_df, train_transforms, cfg['is_low'], is_test=False, use_distillation=cfg['use_distillation'], debug=cfg['debug'])
            train_dataset = CustomDataset(train_df, train_transforms, cfg['is_low'], is_test=False, use_distillation=cfg['use_distillation'], debug=cfg['debug'])

            # DataLoader 인스턴스를 생성합니다.
            train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
            valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)

            train_loaders.append(train_loader)
            valid_loaders.append(valid_loader)

        return train_loaders, valid_loaders

    else:
        if is_test:
            test_df = pd.read_csv('test.csv')
            test_dataset = CustomDataset(test_df, valid_transforms, cfg['is_low'], is_test=True)
            test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)

        else:
            df = pd.read_csv('train.csv')
            train_df, valid_df = train_test_split(df, test_size=0.2, random_state=1020, stratify=df['label'])

            encoder = LabelEncoder()
            train_df['label'] = encoder.fit_transform(train_df['label'])
            valid_df['label'] = encoder.transform(valid_df['label'])

            train_transforms, valid_transforms = get_transforms_AutoAug(cfg['is_low'])
            train_dataset = CustomDataset(train_df, train_transforms, cfg['is_low'], False, cfg['use_distillation'], cfg['debug'])
            valid_dataset = CustomDataset(valid_df, valid_transforms, cfg['is_low'], False, cfg['use_distillation'], cfg['debug'])

            train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
            valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False)


    if not is_test:
        return train_loader, valid_loader

    else:
        return test_loader

# models.py

In [2]:
import torch
import torch.nn as nn
import timm

def get_convnext_base(pretrained=True):
    model = timm.create_model('convnext_base', pretrained=pretrained, num_classes=25)
    model = model.to('cuda')

    return model

def get_convnext_large(pretrained=True):
    model = timm.create_model('convnext_large', pretrained=pretrained, num_classes=25)
    model = model.to('cuda')

    return model

def get_mobileNetV3_large():
    model = timm.create_model('mobilenetv3_large_100', pretrained=True, num_classes=25)
    model = model.to('cuda')

    return model

def get_resnet50_32x4d():
    model = timm.create_model('resnext50_32x4d', pretrained=True, num_classes=25)
    model = model.to('cuda')

    return model

def get_swin_large(pretrained=True):
    model = timm.create_model('swin_large_patch4_window7_224', pretrained=pretrained, num_classes=25)
    model = model.to('cuda')

    return model

def get_deit3_large():
    model = timm.create_model('deit3_large_patch16_224', pretrained=True, num_classes=25)
    model = model.to('cuda')

    return model

def get_swinv2_large(pretrained=True):
    model = timm.create_model('swinv2_large_window12to16_192to256', pretrained=pretrained, num_classes=25)
    model = model.to('cuda')

    return model

# Trainer.py

In [3]:

import torch.nn.functional as F

def distllation(student_outputs, targets, teacher_outputs, T=20, alpha=0.75):
    student_loss = F.cross_entropy(student_outputs, targets)

    student_smoothing_softmax = F.log_softmax(student_outputs/T, dim=1)
    teacher_smoothing_softmax = F.softmax(teacher_outputs/T, dim=1)
    distillation_loss = nn.KLDivLoss(reduction='batchmean')(student_smoothing_softmax, teacher_smoothing_softmax)

    total_loss = alpha * student_loss + (T * T)*(1-alpha) * distillation_loss

    return total_loss

In [4]:
import torch.optim as optim
import torch.nn as nn
from collections import deque
from sklearn.metrics import f1_score, accuracy_score
import numpy as np
import csv
import torch
from tqdm.cli import tqdm

from tqdm import tqdm
import torch
from sklearn.metrics import f1_score, accuracy_score
import numpy as np

def run_model(model, loader, criterion, optimizer=None, is_training=False, is_test=False, teacher_model=None):
    model.train() if is_training else model.eval()
    targets, preds = [], []
    running_loss, smooth_loss = 0.0, 0.0
    smooth_loss_queue = deque(maxlen=100)

    bar = tqdm(loader, ascii=True)
    for data, *temp in bar:
        if len(temp) == 2: # high img 사용
            high_data = temp[0]
            high_data = high_data.to('cuda')
            target = temp[1]
            target = target.to('cuda')
        elif len(temp) == 1:
            target = temp[0]

        data = data.to('cuda')
        outputs = model(data)

        if teacher_model:
            teacher_model.eval()
            with torch.no_grad():
                teacher_outputs = teacher_model(high_data).detach()

        if not is_test:
            target = target.to('cuda')
            if teacher_model:
                # Using the outputs from the teacher as targets for the student
                loss = distllation(outputs, target, teacher_outputs)  # Modify loss function as required
            else:
                loss = criterion(outputs, target)

            running_loss += loss.item()
            smooth_loss_queue.append(loss.item())
            smooth_loss = sum(smooth_loss_queue) / len(smooth_loss_queue)

            bar.set_description(f'Loss: {loss.item():.6f} | Smooth Loss: {smooth_loss:.6f}')

        if is_training:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        predicted = torch.argmax(outputs, dim=-1).detach().cpu().tolist()
        preds.extend(predicted)
        if not is_test:
            targets.extend(target.detach().cpu().tolist())

    if not is_test:
        f1_score_ = f1_score(np.array(targets), np.array(preds), average='macro')
        acc_score = accuracy_score(np.array(targets), np.array(preds))
        return running_loss / len(loader), acc_score, f1_score_, np.array(targets), np.array(preds)
    else:
        return preds



In [5]:
def train_kfold(model, cfg, train_loaders, valid_loaders):
    for idx, (train_loader, valid_loader) in enumerate(zip(train_loaders, valid_loaders)):
        model = get_swin_large()
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(model.parameters(), lr=cfg['lr'])
        scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch: 0.95 ** epoch)

        best_f1 = 0

        # Initialization list for train visualization
        train_per_loss = []
        valid_per_loss = []

        train_per_acc = []
        valid_per_acc = []

        train_per_f1 = []
        valid_per_f1 = []

        # Print table header

        with open(f"logs/{cfg['attempt_name']}_{idx+1}fold.csv", "w", newline='') as csvfile:
            fieldnames = ['Epoch', 'Train Loss', 'Train Acc', 'Train F1', 'Valid Loss', 'Valid Acc', 'Valid F1', 'Learning Rate']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            # Write the header
            writer.writeheader()
            for e in range(cfg['epochs']):
                train_loss, train_acc, train_f1, train_targets, train_preds = run_model(model, train_loader, criterion, optimizer, is_training=True)
                valid_loss, valid_acc, valid_f1, valid_targets, valid_preds = run_model(model, valid_loader, criterion, optimizer, is_training=False)

                train_per_loss.append(train_loss)
                valid_per_loss.append(valid_loss)

                train_per_acc.append(train_acc)
                valid_per_acc.append(valid_acc)

                train_per_f1.append(train_f1)
                valid_per_f1.append(valid_f1)

                # Print epoch results in table format
                print(f'{"-"*75}')
                print_output = f'Epoch: {e} | Train Loss: {train_loss:.6f} | Train Acc: {train_acc:.6f} | Train F1: {train_f1:.6f} | Valid Loss: {valid_loss:.6f} | Valid Acc: {valid_acc:.6f} | Valid F1: {valid_f1:.6f} | LR: {optimizer.param_groups[0]["lr"]:.2e}'
                print(print_output)
                print(f'{"-"*75}')
                writer.writerow({
                    'Epoch': e,
                    'Train Loss': train_loss,
                    'Train Acc': train_acc,
                    'Train F1': train_f1,
                    'Valid Loss': valid_loss,
                    'Valid Acc': valid_acc,
                    'Valid F1': valid_f1,
                    'Learning Rate': optimizer.param_groups[0]['lr']
                })

                scheduler.step()

                if valid_f1 < best_f1:
                    print(f'{"*"*75}\nModel saved! Improved from {best_f1:.6f} to {valid_f1:.6f}\n{"*"*75}')
                    best_f1 = valid_loss
                    torch.save(model.state_dict(), f'models/{cfg["attempt_name"]}_{idx+1}fold.pt')

def train(model, cfg, train_loader, valid_loader):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=cfg['lr'])
    scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=lambda epoch: 0.95 ** epoch)

    best_f1 = 0

    # Initialization list for train visualization
    train_per_loss = []
    valid_per_loss = []

    train_per_acc = []
    valid_per_acc = []

    train_per_f1 = []
    valid_per_f1 = []

    # Print table header

    with open(f"/content/drive/MyDrive/dacon/bird_classification/logs/{cfg['attempt_name']}.csv", "w", newline='') as csvfile:
        fieldnames = ['Epoch', 'Train Loss', 'Train Acc', 'Train F1', 'Valid Loss', 'Valid Acc', 'Valid F1', 'Learning Rate']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write the header
        writer.writeheader()
        for e in range(cfg['epochs']):
            if cfg['use_distillation']:
                teacher_model = get_convnext_base(False)
                teacher_model.load_state_dict(torch.load(f'/content/drive/MyDrive/dacon/bird_classification/models/0416_swin_large_patch16_224_KD_teacher.pt'))
                teacher_model.eval()
            else:
                teacher_model = None
            train_loss, train_acc, train_f1, train_targets, train_preds = run_model(model, train_loader, criterion, optimizer, is_training=True, teacher_model=teacher_model)
            valid_loss, valid_acc, valid_f1, valid_targets, valid_preds = run_model(model, valid_loader, criterion, optimizer, is_training=False, teacher_model=teacher_model)

            train_per_loss.append(train_loss)
            valid_per_loss.append(valid_loss)

            train_per_acc.append(train_acc)
            valid_per_acc.append(valid_acc)

            train_per_f1.append(train_f1)
            valid_per_f1.append(valid_f1)

            # Print epoch results in table format
            print(f'{"-"*75}')
            print_output = f'Epoch: {e} | Train Loss: {train_loss:.6f} | Train Acc: {train_acc:.6f} | Train F1: {train_f1:.6f} | Valid Loss: {valid_loss:.6f} | Valid Acc: {valid_acc:.6f} | Valid F1: {valid_f1:.6f} | LR: {optimizer.param_groups[0]["lr"]:.2e}'
            print(print_output)
            print(f'{"-"*75}')
            writer.writerow({
                'Epoch': e,
                'Train Loss': train_loss,
                'Train Acc': train_acc,
                'Train F1': train_f1,
                'Valid Loss': valid_loss,
                'Valid Acc': valid_acc,
                'Valid F1': valid_f1,
                'Learning Rate': optimizer.param_groups[0]['lr']
            })

            scheduler.step()

            if valid_f1 > best_f1:
                print(f'{"*"*75}\nModel saved! Improved from {best_f1:.6f} to {valid_f1:.6f}\n{"*"*75}')
                best_f1 = valid_f1
                torch.save(model.state_dict(), f'/content/drive/MyDrive/dacon/bird_classification/models/{cfg["attempt_name"]}.pt')

# Train.py

In [6]:
import random, os
import json

def seed_everything(seed=42):
    random.seed(seed)  # Python 내장 random 모듈
    os.environ['PYTHONHASHSEED'] = str(seed)  # 환경변수 설정
    np.random.seed(seed)  # NumPy
    torch.manual_seed(seed)  # PyTorch CPU 시드 고정
    torch.cuda.manual_seed(seed)  # PyTorch GPU 시드 고정
    torch.cuda.manual_seed_all(seed)  # 멀티 GPU 환경에서도 시드 고정
    torch.backends.cudnn.deterministic = True  # CuDNN 관련 설정
    torch.backends.cudnn.benchmark = False  # 동일한 입력 크기의 데이터가 반복될 경우 속도 향상을 위한 벤치마크 모드 비활성화

def train_start(cfg, seed, pretrained=False, pretrained_model_pt=None):
    seed_everything(seed=seed)

    filename = 'method_log.json'
    # 파일이 존재하는지 확인하고, 존재하면 기존 내용을 읽습니다.
    if os.path.exists(filename):
        with open(filename, 'r', encoding='utf-8') as file:
            try:
                data = json.load(file)
            except json.JSONDecodeError:  # 파일이 비어있거나 JSON 형식이 아닌 경우
                data = {}
    else:
        data = {}

    # 'attempt_name'을 키로 하여 'method' 값을 갱신하거나 추가합니다.
    data[cfg['attempt_name']] = cfg['method']

    # 변경된 데이터를 파일에 씁니다.
    with open(filename, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

    print(f"'{cfg['attempt_name']}' 설정이 '{filename}' 파일에 저장되었습니다.")

    model = get_swinv2_large()
    if pretrained:
        state_dict = torch.load(f'models/{pretrained_model_pt}.pt')
        model.load_state_dict(state_dict)
    print(f"model load 완료!")

    train_loader, valid_loader = get_loader(cfg)
    print(f"data load 완료!")

    print('학습을 진행합니다.')
    train(model, cfg, train_loader, valid_loader)

    return model

# main controler

In [7]:
timm

<module 'timm' from '/usr/local/lib/python3.10/dist-packages/timm/__init__.py'>

In [10]:
cfg = {
    'lr' : 5e-5,
    'epochs' : 6,
    'batch_size' : 8,
    'is_low' : False,
    'attempt_name' : '0416_swinv2_large_KD_student',
    'method' : '고해상도 학습(Teacher, convNext_base) --> 저해상도 학습(student, swin_large_v2), KD',
    'use_kfold' : False,
    'use_distillation' : True,
    'debug' : False
}

train_start(cfg, 1020, pretrained=False)

'0416_swinv2_large_KD_student' 설정이 'method_log.json' 파일에 저장되었습니다.


model.safetensors:   0%|          | 0.00/792M [00:00<?, ?B/s]

model load 완료!
distillation mode입니다.
distillation mode입니다.
data load 완료!
학습을 진행합니다.


Loss: 1.239888 | Smooth Loss: 0.558440: 100%|##########| 1584/1584 [30:30<00:00,  1.16s/it]
Loss: 0.095115 | Smooth Loss: 0.275232: 100%|##########| 3167/3167 [05:04<00:00, 10.41it/s]


---------------------------------------------------------------------------
Epoch: 0 | Train Loss: 0.662622 | Train Acc: 0.870135 | Train F1: 0.869992 | Valid Loss: 0.323114 | Valid Acc: 0.940954 | Valid F1: 0.941088 | LR: 5.00e-05
---------------------------------------------------------------------------
***************************************************************************
Model saved! Improved from 0.000000 to 0.941088
***************************************************************************


Loss: 0.581542 | Smooth Loss: 0.384210: 100%|##########| 1584/1584 [30:27<00:00,  1.15s/it]
Loss: 0.875381 | Smooth Loss: 0.296321: 100%|##########| 3167/3167 [05:15<00:00, 10.04it/s]


---------------------------------------------------------------------------
Epoch: 1 | Train Loss: 0.374875 | Train Acc: 0.929818 | Train F1: 0.929854 | Valid Loss: 0.317228 | Valid Acc: 0.937796 | Valid F1: 0.937790 | LR: 4.75e-05
---------------------------------------------------------------------------


Loss: 0.088764 | Smooth Loss: 0.261237: 100%|##########| 1584/1584 [30:40<00:00,  1.16s/it]
Loss: 0.080815 | Smooth Loss: 0.492801: 100%|##########| 3167/3167 [05:15<00:00, 10.04it/s]


---------------------------------------------------------------------------
Epoch: 2 | Train Loss: 0.279654 | Train Acc: 0.946949 | Train F1: 0.946862 | Valid Loss: 0.319728 | Valid Acc: 0.936217 | Valid F1: 0.935762 | LR: 4.51e-05
---------------------------------------------------------------------------


Loss: 1.456183 | Smooth Loss: 0.268793: 100%|##########| 1584/1584 [30:36<00:00,  1.16s/it]
Loss: 0.056289 | Smooth Loss: 0.226380: 100%|##########| 3167/3167 [05:10<00:00, 10.19it/s]


---------------------------------------------------------------------------
Epoch: 3 | Train Loss: 0.245727 | Train Acc: 0.954922 | Train F1: 0.954856 | Valid Loss: 0.344625 | Valid Acc: 0.935586 | Valid F1: 0.936621 | LR: 4.29e-05
---------------------------------------------------------------------------


Loss: 0.044560 | Smooth Loss: 0.217698: 100%|##########| 1584/1584 [30:38<00:00,  1.16s/it]
Loss: 0.037028 | Smooth Loss: 0.326828: 100%|##########| 3167/3167 [05:14<00:00, 10.08it/s]


---------------------------------------------------------------------------
Epoch: 4 | Train Loss: 0.213753 | Train Acc: 0.962027 | Train F1: 0.962015 | Valid Loss: 0.292580 | Valid Acc: 0.941901 | Valid F1: 0.941919 | LR: 4.07e-05
---------------------------------------------------------------------------
***************************************************************************
Model saved! Improved from 0.941088 to 0.941919
***************************************************************************


Loss: 0.116297 | Smooth Loss: 0.215963: 100%|##########| 1584/1584 [30:37<00:00,  1.16s/it]
Loss: 0.055546 | Smooth Loss: 0.174841: 100%|##########| 3167/3167 [05:11<00:00, 10.16it/s]


---------------------------------------------------------------------------
Epoch: 5 | Train Loss: 0.190716 | Train Acc: 0.967948 | Train F1: 0.967976 | Valid Loss: 0.251584 | Valid Acc: 0.951374 | Valid F1: 0.950875 | LR: 3.87e-05
---------------------------------------------------------------------------
***************************************************************************
Model saved! Improved from 0.941919 to 0.950875
***************************************************************************


SwinTransformerV2(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
    (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
  )
  (layers): Sequential(
    (0): SwinTransformerV2Stage(
      (downsample): Identity()
      (blocks): ModuleList(
        (0): SwinTransformerV2Block(
          (attn): WindowAttention(
            (cpb_mlp): Sequential(
              (0): Linear(in_features=2, out_features=512, bias=True)
              (1): ReLU(inplace=True)
              (2): Linear(in_features=512, out_features=6, bias=False)
            )
            (qkv): Linear(in_features=192, out_features=576, bias=False)
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=192, out_features=192, bias=True)
            (proj_drop): Dropout(p=0.0, inplace=False)
            (softmax): Softmax(dim=-1)
          )
          (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
          (drop_path1): 