In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [3]:
CFG = {
    "IMG_SIZE" : 128,
    "EPOCHS" : 5,
    "LEARNING_RATE" : 3e-4,
    "BATCH_SIZE" : 64,
    "SEED" : 41
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
all_df = pd.read_csv('./data/train.csv')

In [6]:
train_df, val_df, _, _ = train_test_split(all_df, all_df['cat3'], test_size=0.2, random_state=CFG['SEED'])

In [7]:
le = preprocessing.LabelEncoder()
le.fit(train_df['cat3'].values)

In [8]:
train_df['cat3'] = le.transform(train_df['cat3'].values)
val_df['cat3'] = le.transform(val_df['cat3'].values)

In [9]:
train_df

Unnamed: 0,id,img_path,overview,cat1,cat2,cat3
1786,TRAIN_01786,./image/train/TRAIN_01786.jpg,함평양서파충류생태공원은 전남 함평군 신광면에 자리 잡고 있다. 공원 내에는 양서류를...,인문(문화/예술/역사),문화시설,44
16670,TRAIN_16670,./image/train/TRAIN_16670.jpg,국제수변레포츠 단지 내 위치한 충주 탄금호 캠핑 리조트는 다양한 문화체험이 가능한 ...,레포츠,육상 레포츠,73
3377,TRAIN_03377,./image/train/TRAIN_03377.jpg,경남 함양군 서하면에 위치한 함양 라온캠핑장은 마운틴뷰의 조용하고 깨끗한 신설 캠핑...,레포츠,육상 레포츠,73
12814,TRAIN_12814,./image/train/TRAIN_12814.jpg,캠프바베큐는 충남 천안시 동남구에 자리 잡고 있다. 천안시청을 기점으로 약 8㎞가량...,레포츠,육상 레포츠,73
2607,TRAIN_02607,./image/train/TRAIN_02607.jpg,"원수산습지생태원은 세종시 연기면 세종리에 있다. 생태원 내에는 보존습지, 수생식물습...",자연,자연관광지,93
...,...,...,...,...,...,...
6819,TRAIN_06819,./image/train/TRAIN_06819.jpg,"밤바다의 멋진 야경을 보며 걷는 물 위의 데크 산책로, 바다 위에 조성된 742m의...",인문(문화/예술/역사),건축/조형물,21
15829,TRAIN_15829,./image/train/TRAIN_15829.jpg,남해비치텔은 한려해상 국립공원의 중심지역인 남해군 설천면 노량리에 위치해 있다. 이...,숙박,숙박시설,31
8513,TRAIN_08513,./image/train/TRAIN_08513.jpg,장경리해변은 자갈모래로 형성된 해변을 가지고 있으며 백사장의 길이는 1.5km 정도...,자연,자연관광지,121
931,TRAIN_00931,./image/train/TRAIN_00931.jpg,서울 종로에 자리한 낙원떡집은 ‘서울미래유산’으로도 선정될 만큼 오랜 역사와 전통을...,음식,음식점,118


# Vectorizer

In [10]:
vectorizer = CountVectorizer(max_features=4096)

In [11]:
train_vectors = vectorizer.fit_transform(train_df['overview'])
train_vectors = train_vectors.todense()

val_vectors = vectorizer.transform(val_df['overview'])
val_vectors = val_vectors.todense()

In [12]:
display(train_vectors.shape)
display(val_vectors.shape)

(13588, 4096)

(3398, 4096)

# CustomDataset

In [63]:
class CustomDataset(Dataset) :
    def __init__(self, img_path_list, text_vectors, label_list, transforms, infer=False) :
        self.img_path_list = img_path_list
        self.text_vectors = text_vectors
        self.label_list = label_list
        self.transforms = transforms
        self.infer = infer
        
    def __getitem__(self, index) :
        # NLP
        text_vector = self.text_vectors[index]
        
        # image
        img_path = self.img_path_list[index]
        image = cv2.imread(os.path.join('./data/',img_path))
        
        if self.transforms is not None :
            image = self.transforms(image=image)['image']
            
        # Label
        if self.infer : 
            return image, torch.Tensor(text_vector).view(-1)
        else :
            label = self.label_list[index]
            return image, torch.Tensor(text_vector).view(-1), torch.tensor(label, dtype=torch.long)
        
    def __len__(self) :
        return len(self.img_path_list)

In [64]:
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [65]:
train_dataset = CustomDataset(train_df['img_path'].values, train_vectors, train_df['cat3'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_df['img_path'].values, val_vectors, val_df['cat3'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

# Model Define

In [66]:
class CustomModel(nn.Module) :
    def __init__(self, num_classes=len(le.classes_)) :
        super(CustomModel, self).__init__()
        
        # image
        self.cnn_extract = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=4, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # text
        self.nlp_extract = nn.Sequential(
            nn.Linear(4096, 2048),
            nn.ReLU(),
            nn.Linear(2048, 1024)
        )
        
        # classifier
        self.classifier = nn.Sequential(
            nn.Linear(4160, num_classes)
        )
        
    def forward(self, img, txt) :
        img_feature = self.cnn_extract(img)
        img_feature = torch.flatten(img_feature, start_dim=1)
        text_feature = self.nlp_extract(txt)
        feature = torch.cat([img_feature, text_feature], axis=1)
        output = self.classifier(feature)
        
        return output

# train

In [67]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)
    best_score = 0
    best_model = None
    
    for epoch in range(1,CFG["EPOCHS"]+1):
        model.train()
        train_loss = []
        print("epoch : ",epoch)
        for img, text, label in tqdm(iter(train_loader)):
            img = img.float().to(device)
            text = text.to(device)
            label = label.to(device)
            
            optimizer.zero_grad()

            model_pred = model(img, text)
            
            loss = criterion(model_pred, label)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        tr_loss = np.mean(train_loss)
            
        val_loss, val_score = validation(model, criterion, val_loader, device)
            
        print(f'Epoch [{epoch}], Train Loss : [{tr_loss:.5f}] Val Loss : [{val_loss:.5f}] Val Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step()
            
        if best_score < val_score:
            best_score = val_score
            best_model = model
    
    return best_model

In [68]:
def score_function(real, pred):
    return f1_score(real, pred, average="weighted")

def validation(model, criterion, val_loader, device):
    model.eval()
    
    model_preds = []
    true_labels = []
    
    val_loss = []
    
    with torch.no_grad():
        for img, text, label in tqdm(iter(val_loader)):
            img = img.float().to(device)
            text = text.to(device)
            label = label.to(device)
            
            model_pred = model(img, text)
            
            loss = criterion(model_pred, label)
            
            val_loss.append(loss.item())
            
            model_preds += model_pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += label.detach().cpu().numpy().tolist()
        
    test_weighted_f1 = score_function(true_labels, model_preds)
    return np.mean(val_loss), test_weighted_f1

# run

In [69]:
model = CustomModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = None

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

epoch :  1


  0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/54 [00:00<?, ?it/s]

Epoch [1], Train Loss : [2.61232] Val Loss : [1.82024] Val Score : [0.50167]
epoch :  2


  0%|          | 0/213 [00:00<?, ?it/s]

KeyboardInterrupt: 