In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import torchvision.models as models
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


In [3]:
CFG = {
    'FPS' : 30,
    'IMG_SIZE' : 128,
    'LEARNING_RATE' : 3e-4,
    'BATCH_SIZE' : 4,
    'SEED' : 41,
    'EPOCHS' : 30
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # seed 고정

In [5]:
df = pd.read_csv('./train.csv')

# 8:2 로 Train / Val 분할
train_data, val_data, _, _ = train_test_split(df, df['label'], test_size = 0.2, random_state=CFG['SEED'])

In [6]:
train_data['path'].values[0]

'./train/TRAIN_045.mp4'

In [7]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list, transform = None):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.tf = transform
        self.totensor = transforms.ToTensor()

    def __getitem__(self, idx):
        frames = self.get_video(self.video_path_list[idx])

        if self.label_list is not None:
            label = self.label_list[idx]
            return frames, label
        else:
            return frames

    def __len__(self):
        return len(self.video_path_list)

    def get_video(self, path): # 30 frame 비디오 >> 30장 이미지 얻는 코드
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['FPS']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.  

            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3,0,1,2)

In [8]:
train_dataset = CustomDataset(train_data['path'].values, train_data['label'].values)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_data['path'].values, val_data['label'].values)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [9]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=5):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (3, 3, 3)),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((1, 7, 7)),
        )
        self.classifier = nn.Linear(512, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

In [10]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)

    best_val_score = 0
    best_model = None

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, label in tqdm(train_loader):
            videos = videos.to(device)
            label = label.to(device)

            optimizer.zero_grad()

            output = model(videos)
            loss = criterion(output, label)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'EPOCH [{epoch}] | TRAIN LOSS [{_train_loss:.5f}] | VAL LOSS [{_val_loss:.5f}] | VAL F1 [{_val_score}]')

        if scheduler is not None:
            scheduler.step(_val_score)

        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    return best_model


In [11]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []

    with torch.no_grad():
        for videos, label in tqdm(iter(val_loader)):
            videos = videos.to(device)
            label = label.to(device)

            logit = model(videos)
            loss = criterion(logit, label)

            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist() # argmax(1) : axis 1 로 최대 인덱스 반환
            trues += label.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)

    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [12]:
model = BaseModel()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

100%|██████████| 122/122 [00:09<00:00, 12.78it/s]
100%|██████████| 31/31 [00:01<00:00, 17.76it/s]


EPOCH [1] | TRAIN LOSS [1.49790] | VAL LOSS [1.12766] | VAL F1 [0.5344198168295532]


100%|██████████| 122/122 [00:09<00:00, 13.36it/s]
100%|██████████| 31/31 [00:01<00:00, 17.69it/s]


EPOCH [2] | TRAIN LOSS [0.75764] | VAL LOSS [1.04968] | VAL F1 [0.5471941720094016]


100%|██████████| 122/122 [00:09<00:00, 13.20it/s]
100%|██████████| 31/31 [00:01<00:00, 18.47it/s]


EPOCH [3] | TRAIN LOSS [0.49339] | VAL LOSS [1.10295] | VAL F1 [0.5917502169282314]


100%|██████████| 122/122 [00:09<00:00, 13.48it/s]
100%|██████████| 31/31 [00:01<00:00, 18.76it/s]


EPOCH [4] | TRAIN LOSS [0.31543] | VAL LOSS [0.96945] | VAL F1 [0.6473474666917289]


100%|██████████| 122/122 [00:09<00:00, 13.41it/s]
100%|██████████| 31/31 [00:01<00:00, 18.72it/s]


EPOCH [5] | TRAIN LOSS [0.17956] | VAL LOSS [0.78451] | VAL F1 [0.716030767725683]


100%|██████████| 122/122 [00:09<00:00, 13.24it/s]
100%|██████████| 31/31 [00:01<00:00, 17.61it/s]


EPOCH [6] | TRAIN LOSS [0.10894] | VAL LOSS [0.88071] | VAL F1 [0.7010657038963153]


100%|██████████| 122/122 [00:09<00:00, 13.01it/s]
100%|██████████| 31/31 [00:01<00:00, 17.37it/s]


EPOCH [7] | TRAIN LOSS [0.09766] | VAL LOSS [0.93963] | VAL F1 [0.7059755714006893]


100%|██████████| 122/122 [00:09<00:00, 13.03it/s]
100%|██████████| 31/31 [00:01<00:00, 17.19it/s]


EPOCH [8] | TRAIN LOSS [0.07261] | VAL LOSS [0.72318] | VAL F1 [0.7496170030639755]


100%|██████████| 122/122 [00:09<00:00, 12.77it/s]
100%|██████████| 31/31 [00:01<00:00, 17.81it/s]


EPOCH [9] | TRAIN LOSS [0.09372] | VAL LOSS [1.07188] | VAL F1 [0.6555316599742329]


100%|██████████| 122/122 [00:09<00:00, 12.89it/s]
100%|██████████| 31/31 [00:01<00:00, 17.23it/s]


EPOCH [10] | TRAIN LOSS [0.10215] | VAL LOSS [1.00302] | VAL F1 [0.6826112901300871]


100%|██████████| 122/122 [00:09<00:00, 13.07it/s]
100%|██████████| 31/31 [00:01<00:00, 17.35it/s]


EPOCH [11] | TRAIN LOSS [0.15080] | VAL LOSS [2.20180] | VAL F1 [0.4404647596451311]
Epoch    11: reducing learning rate of group 0 to 1.5000e-04.


100%|██████████| 122/122 [00:09<00:00, 13.06it/s]
100%|██████████| 31/31 [00:01<00:00, 17.62it/s]


EPOCH [12] | TRAIN LOSS [0.09347] | VAL LOSS [0.74339] | VAL F1 [0.7236286960841486]


100%|██████████| 122/122 [00:09<00:00, 12.97it/s]
100%|██████████| 31/31 [00:01<00:00, 17.56it/s]


EPOCH [13] | TRAIN LOSS [0.02032] | VAL LOSS [0.74159] | VAL F1 [0.743599770826601]


100%|██████████| 122/122 [00:09<00:00, 12.97it/s]
100%|██████████| 31/31 [00:01<00:00, 16.74it/s]


EPOCH [14] | TRAIN LOSS [0.01240] | VAL LOSS [0.82115] | VAL F1 [0.754181608186692]


100%|██████████| 122/122 [00:09<00:00, 12.88it/s]
100%|██████████| 31/31 [00:01<00:00, 17.38it/s]


EPOCH [15] | TRAIN LOSS [0.00710] | VAL LOSS [0.75682] | VAL F1 [0.7411457536537853]


100%|██████████| 122/122 [00:09<00:00, 13.34it/s]
100%|██████████| 31/31 [00:01<00:00, 17.32it/s]


EPOCH [16] | TRAIN LOSS [0.01320] | VAL LOSS [0.74372] | VAL F1 [0.7601594202898551]


100%|██████████| 122/122 [00:09<00:00, 13.38it/s]
100%|██████████| 31/31 [00:01<00:00, 17.63it/s]


EPOCH [17] | TRAIN LOSS [0.00530] | VAL LOSS [0.72682] | VAL F1 [0.7616937993996273]


100%|██████████| 122/122 [00:09<00:00, 13.30it/s]
100%|██████████| 31/31 [00:01<00:00, 17.60it/s]


EPOCH [18] | TRAIN LOSS [0.00414] | VAL LOSS [0.76614] | VAL F1 [0.7450481905801054]


100%|██████████| 122/122 [00:09<00:00, 13.33it/s]
100%|██████████| 31/31 [00:01<00:00, 18.21it/s]


EPOCH [19] | TRAIN LOSS [0.00326] | VAL LOSS [0.73637] | VAL F1 [0.7523944040893193]


100%|██████████| 122/122 [00:09<00:00, 13.44it/s]
100%|██████████| 31/31 [00:01<00:00, 17.62it/s]


EPOCH [20] | TRAIN LOSS [0.00180] | VAL LOSS [0.76404] | VAL F1 [0.7696715399435725]


100%|██████████| 122/122 [00:09<00:00, 13.23it/s]
100%|██████████| 31/31 [00:01<00:00, 17.44it/s]


EPOCH [21] | TRAIN LOSS [0.00223] | VAL LOSS [0.80096] | VAL F1 [0.726797428483988]


100%|██████████| 122/122 [00:09<00:00, 13.35it/s]
100%|██████████| 31/31 [00:01<00:00, 18.68it/s]


EPOCH [22] | TRAIN LOSS [0.00184] | VAL LOSS [0.74778] | VAL F1 [0.7676417237617807]


100%|██████████| 122/122 [00:09<00:00, 13.34it/s]
100%|██████████| 31/31 [00:01<00:00, 17.71it/s]


EPOCH [23] | TRAIN LOSS [0.00173] | VAL LOSS [0.78995] | VAL F1 [0.7608610224242625]
Epoch    23: reducing learning rate of group 0 to 7.5000e-05.


100%|██████████| 122/122 [00:09<00:00, 13.48it/s]
100%|██████████| 31/31 [00:01<00:00, 18.58it/s]


EPOCH [24] | TRAIN LOSS [0.00186] | VAL LOSS [0.76315] | VAL F1 [0.7698173105720276]


100%|██████████| 122/122 [00:09<00:00, 13.46it/s]
100%|██████████| 31/31 [00:01<00:00, 18.45it/s]


EPOCH [25] | TRAIN LOSS [0.00172] | VAL LOSS [0.80067] | VAL F1 [0.7628117225993869]


100%|██████████| 122/122 [00:09<00:00, 13.33it/s]
100%|██████████| 31/31 [00:01<00:00, 18.66it/s]


EPOCH [26] | TRAIN LOSS [0.00153] | VAL LOSS [0.75904] | VAL F1 [0.7708358142781012]


100%|██████████| 122/122 [00:09<00:00, 13.29it/s]
100%|██████████| 31/31 [00:01<00:00, 18.30it/s]


EPOCH [27] | TRAIN LOSS [0.00130] | VAL LOSS [0.82586] | VAL F1 [0.7866184104825552]


100%|██████████| 122/122 [00:09<00:00, 13.29it/s]
100%|██████████| 31/31 [00:01<00:00, 18.56it/s]


EPOCH [28] | TRAIN LOSS [0.00152] | VAL LOSS [0.80677] | VAL F1 [0.759248597131572]


100%|██████████| 122/122 [00:09<00:00, 13.41it/s]
100%|██████████| 31/31 [00:01<00:00, 18.32it/s]


EPOCH [29] | TRAIN LOSS [0.00105] | VAL LOSS [0.82782] | VAL F1 [0.7595191849443463]


100%|██████████| 122/122 [00:09<00:00, 13.38it/s]
100%|██████████| 31/31 [00:01<00:00, 18.25it/s]

EPOCH [30] | TRAIN LOSS [0.00104] | VAL LOSS [0.82598] | VAL F1 [0.7785485329226622]
Epoch    30: reducing learning rate of group 0 to 3.7500e-05.





# Inference

In [13]:
test = pd.read_csv('./test.csv')

test_dataset = CustomDataset(test['path'].values, None)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle = False)

In [15]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [16]:
preds = inference(model, test_loader, device)

100%|██████████| 39/39 [00:02<00:00, 17.27it/s]


In [19]:
print(preds)

[1, 3, 0, 4, 4, 0, 4, 3, 4, 2, 2, 2, 0, 4, 1, 4, 3, 1, 0, 1, 2, 2, 2, 3, 2, 3, 1, 1, 4, 0, 3, 1, 0, 4, 3, 4, 2, 3, 2, 0, 2, 3, 4, 3, 3, 0, 4, 0, 4, 1, 3, 4, 1, 0, 1, 3, 4, 2, 2, 4, 0, 3, 4, 2, 1, 2, 1, 0, 2, 1, 3, 1, 4, 3, 3, 0, 4, 4, 3, 0, 0, 4, 3, 1, 3, 2, 2, 3, 0, 2, 1, 2, 2, 0, 4, 2, 4, 0, 2, 3, 2, 1, 1, 3, 0, 2, 0, 0, 3, 1, 0, 1, 4, 3, 1, 0, 0, 1, 1, 3, 4, 3, 1, 2, 3, 4, 0, 2, 1, 4, 0, 1, 4, 3, 4, 1, 0, 2, 2, 0, 0, 0, 4, 1, 3, 1, 2, 2, 2, 4, 0, 1, 2]


In [25]:
submit = pd.read_csv('./sample_submission.csv')
submit['label'] = preds
submit.to_csv('base_line_code_submit.csv', index=False)