## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

  from .autonotebook import tqdm as notebook_tqdm
INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.21 (you have 1.4.11). Upgrade using: pip install --upgrade albumentations


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [None]:
CFG = {
    'IMG_SIZE': 299,  # Inception 모델 입력 크기
    'EPOCHS': 10 , # 10 이상 
    'LEARNING_RATE': 3e-4,
    'BATCH_SIZE': 32,
    'SEED': 41
}


## Fixed RandomSeed

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Pre-processing

In [8]:
df = pd.read_csv('/Users/nayeong-eun/desktop/open/train.csv')

In [9]:
train_len = int(len(df) * 0.8) #전체 데이터의 80%를 훈련용으로 
train_df = df.iloc[:train_len]
val_df = df.iloc[train_len:] #나머지 20%를 검증 데이터로 

In [10]:
train_label_vec = train_df.iloc[:,2:].values.astype(np.float32) 
val_label_vec = val_df.iloc[:,2:].values.astype(np.float32) # 훈련, 검증 데이터에서 레이블 데이터를 추출 

In [11]:
CFG['label_size'] = train_label_vec.shape[1] #레이블 크기 설정 

## CustomDataset

In [12]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms #augmentation 포함 
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None: 
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [13]:
train_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),  # 299x299로 수정
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), 
                max_pixel_value=255.0, always_apply=True, p=1.0),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), 
                max_pixel_value=255.0, always_apply=True, p=1.0),
    ToTensorV2()
])

In [14]:
train_dataset = CustomDataset(train_df['path'].values, train_label_vec, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

val_dataset = CustomDataset(val_df['path'].values, val_label_vec, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [15]:
! pip install timm



In [None]:
# 터미널에서 설치 필요
# pip install timm

import timm
import torch.nn as nn

class BaseModel(nn.Module):
    def __init__(self, gene_size=CFG['label_size']):
        super(BaseModel, self).__init__()
       
        self.backbone = timm.create_model('inception_resnet_v2', pretrained=True)
        
        in_features = self.backbone.classif.in_features
       
        self.backbone.classif = nn.Identity()  
        
       
        self.regressor = nn.Linear(in_features, gene_size)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.regressor(x)
        return x


## Train

In [17]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    
    best_loss = 99999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_loss)
            
        if best_loss > _val_loss:
            best_loss = _val_loss
            best_model = model
    
    return best_model

In [18]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
    
    return _val_loss

## Run!!

In [19]:
# 1. 이미지 파일의 기본 경로 지정
base_path = "/Users/nayeong-eun/desktop/open/"  # train.csv 파일이 위치한 폴더로 변경

# 2. 데이터프레임의 이미지 경로를 절대 경로로 업데이트
train_df['path'] = base_path + train_df['path'].astype(str)
val_df['path'] = base_path + val_df['path'].astype(str)

# 3. 데이터셋 및 데이터 로더 생성 (이 부분은 기존 코드와 동일)
train_dataset = CustomDataset(train_df['path'].values, train_label_vec, train_transform)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

val_dataset = CustomDataset(val_df['path'].values, val_label_vec, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [20]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

INFO:timm.models._builder:Loading pretrained weights from Hugging Face hub (timm/inception_resnet_v2.tf_in1k)
INFO:timm.models._hub:[timm/inception_resnet_v2.tf_in1k] Safe alternative available for 'pytorch_model.bin' (as 'model.safetensors'). Loading weights using safetensors.
  6%|▋         | 11/175 [11:24<2:50:11, 62.27s/it]


KeyboardInterrupt: 

## Inference

In [29]:
test = pd.read_csv('/Users/nayeong-eun/desktop/open/test.csv')

In [30]:
# 1. 기본 디렉토리 경로 설정
base_path = "/Users/nayeong-eun/desktop/open/"  # 이미지 파일들이 저장된 폴더 경로로 변경하세요.

# 2. 테스트 데이터 경로 절대 경로로 변환
test['path'] = base_path + test['path'].astype(str)

# 3. 테스트 데이터셋과 데이터 로더 생성
test_dataset = CustomDataset(test['path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)


In [31]:
test_dataset = CustomDataset(test['path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [32]:
def inference(model, test_loader, device):
    model.eval()
    preds = []
    with torch.no_grad():
        for imgs in tqdm(test_loader):
            imgs = imgs.to(device).float()
            pred = model(imgs)
            
            preds.append(pred.detach().cpu())
    
    preds = torch.cat(preds).numpy()

    return preds

In [33]:
preds = inference(infer_model, test_loader, device)

100%|██████████| 72/72 [09:38<00:00,  8.04s/it]


## Submission

In [34]:
submit = pd.read_csv('/Users/nayeong-eun/desktop/open/sample_submission.csv')
submit.iloc[:, 1:] = np.array(preds).astype(np.float32)
submit.to_csv('/Users/nayeong-eun/desktop/open/baseline_submit.csv', index=False)