## Import

In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.models.feature_extraction import create_feature_extractor

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix

import warnings
warnings.filterwarnings(action='ignore') 

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [3]:
CFG = {
    'VIDEO_LENGTH':50, # 10프레임 * 5초
    'IMG_SIZE':(160, 90),
    'EPOCHS':20,
    'LEARNING_RATE':5e-5,
    'BATCH_SIZE':4,
    'SEED':41
}

## Fixed RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load

In [6]:
df = pd.read_csv('./train.csv')

## Train / Validation Split

In [7]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

## Encoder / Decoder

In [8]:
encoder = [[[0 for _ in range(2)] for _ in range(3)] for _ in range(3)]
encoder[2][0][0] = 1
encoder[2][0][1] = 2
encoder[2][1][0] = 3
encoder[2][1][1] = 4
encoder[2][2][0] = 5
encoder[2][2][1] = 6
encoder[1][0][0] = 7
encoder[1][0][1] = 8
encoder[1][1][0] = 9
encoder[1][1][1] = 10
encoder[1][2][0] = 11
encoder[1][2][1] = 12

In [24]:
encoder[0][:][:]

[[0, 0], [0, 0], [0, 0]]

## CustomDataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list, tfms):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.tfms = tfms
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            label = np.array(label, dtype=np.int64)
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            frames.append(img)
        frames = aug_video(frames, tfms=self.tfms)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)
    
def aug_video(vid, tfms):
    seed = random.randint(0,99999)
    aug_vid = []
    for x in vid:
        random.seed(seed)
        aug_vid.append((tfms(image = np.asarray(x)))['image'])
    return torch.from_numpy(np.stack(aug_vid))

tfms_train = A.Compose([
            A.Resize(width=CFG['IMG_SIZE'][0], height=CFG['IMG_SIZE'][1]),
            A.HorizontalFlip(p=0.5),
            A.Normalize()
            ], p=1)

tfms_test = A.Compose([
            A.Resize(width=CFG['IMG_SIZE'][0], height=CFG['IMG_SIZE'][1]),
            A.Normalize()
            ], p=1)

In [10]:
train_dataset = CustomDataset(train['video_path'].values, train[['crash_ego', 'weather', 'timing']].values, tfms=tfms_train)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['video_path'].values, val[['crash_ego', 'weather', 'timing']].values, tfms=tfms_test)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [11]:
class R2Plus1D_18(nn.Module):
    def __init__(self, num_classes=13):
        super(R2Plus1D_18, self).__init__()
        self.pretrained_model = models.video.r2plus1d_18(weights=models.video.R2Plus1D_18_Weights)
        self.pretrained_model.fc = nn.Linear(512, 7)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.pretrained_model(x)
        return x

## Train

In [12]:
def calc_loss(output, labels, criterion):
    loss = 0

    if output[0][labels[0][:,0] == 1].size(0) != 0:
        loss += criterion(output[0][labels[0][:,0] == 1], labels[0][labels[0][:,0] == 1]) / 4
        loss += criterion(output[1][labels[0][:,0] == 1], labels[1][labels[0][:,0] == 1]) / 4

    if output[0][labels[0][:,0] == 0].size(0) != 0:
        loss +=  criterion(output[0][labels[0][:,0] == 0], labels[0][labels[0][:,0] == 0]) / 8
        loss +=  criterion(output[1][labels[0][:,0] == 0], labels[1][labels[0][:,0] == 0]) / 8
        loss +=  criterion(output[2][labels[0][:,0] == 0], labels[2][labels[0][:,0] == 0]) / 8
        loss +=  criterion(output[3][labels[0][:,0] == 0], labels[3][labels[0][:,0] == 0]) / 8
        
    return loss
    
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    CE = nn.CrossEntropyLoss().to(device)
    BCE = nn.BCELoss().to(device)
    best_val_score = -1
    best_model = None

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            
            output = model(videos)

            loss = 0
            for i, label in enumerate(labels):
                if label[0] == 0:
                    loss += CE(input = output[i][:3], target = F.one_hot(label[0], 3).to(torch.float64).to(device))
                else:
                    loss += CE(input = output[i][:3], target = F.one_hot(label[0], 3).to(torch.float64).to(device)) / 3
                    loss += CE(input = output[i][3:6], target = F.one_hot(label[1], 3).to(torch.float64).to(device)) / 3
                    loss += BCE(input = F.sigmoid(output[i][6]).to(torch.float64), target = label[2].to(torch.float64).to(device)) / 3
            loss /= len(labels)

            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score, _val_acc = validation(model, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}], Val ACC : [{_val_acc:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [13]:
def validation(model, val_loader, device):
    model.eval()
    CE = nn.CrossEntropyLoss().to(device)
    BCE = nn.BCELoss().to(device)
    val_loss = []
    preds, trues = [], []
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            output = model(videos)
            
            loss = 0
            for i, label in enumerate(labels):
                if label[0] == 0:
                    loss += CE(input = output[i][:3], target = F.one_hot(label[0], 3).to(torch.float64).to(device))
                else:
                    loss += CE(input = output[i][:3], target = F.one_hot(label[0], 3).to(torch.float64).to(device)) / 3
                    loss += CE(input = output[i][3:6], target = F.one_hot(label[1], 3).to(torch.float64).to(device)) / 3
                    loss += BCE(input = F.sigmoid(output[i][6]).to(torch.float64), target = label[2].to(torch.float64).to(device)) / 3
            loss /= len(labels)

            val_loss.append(loss.item())
            
            output = output.to('cpu').numpy()
            pred = np.stack([np.argmax(output[:, :3], axis=1),
                             np.argmax(output[:, 3:6], axis=1),
                             np.int16(output[:, 6] > 0.5)], axis=1)

            preds += list(pred)
            trues += list(labels.to('cpu').numpy())

        _val_loss = np.mean(val_loss)

    preds_encoded = list(map(lambda x: encoder[x[0]][x[1]][x[2]], preds))
    trues_encoded = list(map(lambda x: encoder[x[0]][x[1]][x[2]], trues))

    _val_acc = np.mean(np.array(preds_encoded) == np.array(trues_encoded))
    _val_score = f1_score(trues_encoded, preds_encoded, average='macro')
    return _val_loss, _val_score, _val_acc

## Run!!

In [14]:
model = R2Plus1D_18()
model.eval()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.2, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)
torch.save(infer_model.state_dict(), 'model.pth')

  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.30435] Val Loss : [0.14561] Val F1 : [0.41717], Val ACC : [0.86667]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.16098] Val Loss : [0.09907] Val F1 : [0.49543], Val ACC : [0.91296]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.12005] Val Loss : [0.10106] Val F1 : [0.50673], Val ACC : [0.91667]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.09902] Val Loss : [0.10474] Val F1 : [0.47088], Val ACC : [0.91852]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.07567] Val Loss : [0.11327] Val F1 : [0.48568], Val ACC : [0.89815]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.05989] Val Loss : [0.10595] Val F1 : [0.45449], Val ACC : [0.90370]
Epoch 00006: reducing learning rate of group 0 to 1.0000e-05.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.03425] Val Loss : [0.10510] Val F1 : [0.48538], Val ACC : [0.91111]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.03616] Val Loss : [0.10195] Val F1 : [0.49274], Val ACC : [0.91296]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.02418] Val Loss : [0.09922] Val F1 : [0.49296], Val ACC : [0.92037]
Epoch 00009: reducing learning rate of group 0 to 2.0000e-06.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.01712] Val Loss : [0.09592] Val F1 : [0.55949], Val ACC : [0.92222]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.01692] Val Loss : [0.09223] Val F1 : [0.51765], Val ACC : [0.92037]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.02161] Val Loss : [0.10766] Val F1 : [0.50760], Val ACC : [0.92037]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.01606] Val Loss : [0.08963] Val F1 : [0.55550], Val ACC : [0.92407]
Epoch 00013: reducing learning rate of group 0 to 4.0000e-07.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.01668] Val Loss : [0.10281] Val F1 : [0.48344], Val ACC : [0.91481]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.01707] Val Loss : [0.09978] Val F1 : [0.50280], Val ACC : [0.91852]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.01359] Val Loss : [0.10653] Val F1 : [0.51050], Val ACC : [0.92037]
Epoch 00016: reducing learning rate of group 0 to 8.0000e-08.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.02168] Val Loss : [0.10219] Val F1 : [0.49702], Val ACC : [0.91667]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.01551] Val Loss : [0.10011] Val F1 : [0.48178], Val ACC : [0.91667]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.01660] Val Loss : [0.09706] Val F1 : [0.54073], Val ACC : [0.91667]
Epoch 00019: reducing learning rate of group 0 to 1.6000e-08.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.01550] Val Loss : [0.10021] Val F1 : [0.51173], Val ACC : [0.92037]


## Inference

In [15]:
test = pd.read_csv('./test.csv')

In [16]:
test_dataset = CustomDataset(test['video_path'].values, None, tfms=tfms_test)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [17]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            output = model(videos)
            output = output.to('cpu').numpy()
            pred = np.stack([np.argmax(output[:, :3], axis=1),
                             np.argmax(output[:, 3:6], axis=1),
                             np.int16(output[:, 6] > 0.5)], axis=1)
            preds += list(pred)
    return list(map(lambda x: encoder[x[0]][x[1]][x[2]], preds))

In [18]:
preds = inference(model, test_loader, device)

  0%|          | 0/450 [00:00<?, ?it/s]

## Submission

In [19]:
submit = pd.read_csv('./sample_submission.csv')

In [20]:
submit['label'] = preds
submit.head()

Unnamed: 0,sample_id,label
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,0


In [26]:
from datetime import datetime

current_time = datetime.now()
time = str(current_time.strftime("%m_%d %H_%M_%S"))
if not os.path.isdir('history'):
    os.mkdir('history')

if not os.path.isdir(os.path.join('history', time)):
    os.mkdir(os.path.join('history', time))

'./history\\02_11 09_15_26/model.pth'