In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda')

## Hyperparameter Setting

In [3]:
CFG = {
    'FPS':30,
    'IMG_SIZE':128,
    'EPOCHS':50,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':4,
    'SEED':41
}

## Fixed Random Seed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

## Data Load

In [5]:
data_root_path = r"D:\AI_Data\TV_HMCR"

os.listdir(data_root_path)

['sample_submission.csv', 'test', 'test.csv', 'train', 'train.csv']

In [6]:
df = pd.read_csv(os.path.join(data_root_path, "train.csv"))
df["path"] = df["path"].apply(lambda x: os.path.join(data_root_path, os.path.join(x.split("/")[1], x.split("/")[2])))
display(df.head(3))
df.shape

Unnamed: 0,id,path,label
0,TRAIN_000,D:\AI_Data\TV_HMCR\train\TRAIN_000.mp4,3
1,TRAIN_001,D:\AI_Data\TV_HMCR\train\TRAIN_001.mp4,0
2,TRAIN_002,D:\AI_Data\TV_HMCR\train\TRAIN_002.mp4,1


(610, 3)

## Train / Validation Split

In [7]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

train.shape, val.shape

((488, 3), (122, 3))

## Custom Dataset

In [8]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['FPS']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

In [9]:
train_dataset = CustomDataset(train['path'].values, train['label'].values)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['path'].values, val['label'].values)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [10]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=5):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (3, 3, 3)),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.MaxPool3d(2),
            nn.Conv3d(8, 32, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, (2, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((1, 7, 7)),
        )
        self.classifier = nn.Linear(512, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

## Train

In [11]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [12]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

## Run

In [13]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [1], Train Loss : [1.55585] Val Loss : [1.19087] Val F1 : [0.49931]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.88125] Val Loss : [1.01233] Val F1 : [0.62753]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.55915] Val Loss : [0.81653] Val F1 : [0.72972]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.32234] Val Loss : [0.73588] Val F1 : [0.73823]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.19857] Val Loss : [0.77049] Val F1 : [0.68922]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.11211] Val Loss : [0.76988] Val F1 : [0.70132]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.07309] Val Loss : [0.74528] Val F1 : [0.71388]
Epoch 00007: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.03414] Val Loss : [0.64329] Val F1 : [0.78214]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.01756] Val Loss : [0.66574] Val F1 : [0.77647]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.01435] Val Loss : [0.63841] Val F1 : [0.78313]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.01703] Val Loss : [0.64055] Val F1 : [0.76337]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.00899] Val Loss : [0.66165] Val F1 : [0.76662]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.00906] Val Loss : [0.64563] Val F1 : [0.76701]
Epoch 00013: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.00612] Val Loss : [0.65701] Val F1 : [0.75042]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.00605] Val Loss : [0.67238] Val F1 : [0.75742]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.00615] Val Loss : [0.62498] Val F1 : [0.76755]
Epoch 00016: reducing learning rate of group 0 to 3.7500e-05.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.00586] Val Loss : [0.63111] Val F1 : [0.80064]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.00467] Val Loss : [0.63290] Val F1 : [0.79882]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.00368] Val Loss : [0.64486] Val F1 : [0.79276]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.00423] Val Loss : [0.66144] Val F1 : [0.76631]
Epoch 00020: reducing learning rate of group 0 to 1.8750e-05.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.00344] Val Loss : [0.63438] Val F1 : [0.78191]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.00413] Val Loss : [0.66963] Val F1 : [0.78277]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.00405] Val Loss : [0.62934] Val F1 : [0.79978]
Epoch 00023: reducing learning rate of group 0 to 9.3750e-06.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.00282] Val Loss : [0.65503] Val F1 : [0.76728]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.00333] Val Loss : [0.63637] Val F1 : [0.78240]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.00278] Val Loss : [0.66939] Val F1 : [0.76481]
Epoch 00026: reducing learning rate of group 0 to 4.6875e-06.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.00351] Val Loss : [0.63730] Val F1 : [0.78237]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.00301] Val Loss : [0.65404] Val F1 : [0.76534]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.00363] Val Loss : [0.63328] Val F1 : [0.77323]
Epoch 00029: reducing learning rate of group 0 to 2.3437e-06.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [30], Train Loss : [0.00277] Val Loss : [0.64736] Val F1 : [0.79227]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [31], Train Loss : [0.00278] Val Loss : [0.66206] Val F1 : [0.73619]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [32], Train Loss : [0.00233] Val Loss : [0.64863] Val F1 : [0.76468]
Epoch 00032: reducing learning rate of group 0 to 1.1719e-06.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [33], Train Loss : [0.00297] Val Loss : [0.66088] Val F1 : [0.77222]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [34], Train Loss : [0.00271] Val Loss : [0.64218] Val F1 : [0.75180]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [35], Train Loss : [0.00309] Val Loss : [0.65306] Val F1 : [0.76614]
Epoch 00035: reducing learning rate of group 0 to 5.8594e-07.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [36], Train Loss : [0.00292] Val Loss : [0.66518] Val F1 : [0.76876]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [37], Train Loss : [0.00304] Val Loss : [0.65822] Val F1 : [0.77268]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [38], Train Loss : [0.00259] Val Loss : [0.66619] Val F1 : [0.77579]
Epoch 00038: reducing learning rate of group 0 to 2.9297e-07.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [39], Train Loss : [0.00262] Val Loss : [0.63790] Val F1 : [0.77579]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [40], Train Loss : [0.00301] Val Loss : [0.65221] Val F1 : [0.78277]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [41], Train Loss : [0.00323] Val Loss : [0.65540] Val F1 : [0.73993]
Epoch 00041: reducing learning rate of group 0 to 1.4648e-07.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [42], Train Loss : [0.00242] Val Loss : [0.66414] Val F1 : [0.76855]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [43], Train Loss : [0.00256] Val Loss : [0.64773] Val F1 : [0.74262]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [44], Train Loss : [0.00244] Val Loss : [0.66438] Val F1 : [0.77703]
Epoch 00044: reducing learning rate of group 0 to 7.3242e-08.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [45], Train Loss : [0.00285] Val Loss : [0.66050] Val F1 : [0.78439]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [46], Train Loss : [0.00310] Val Loss : [0.67054] Val F1 : [0.76535]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [47], Train Loss : [0.00250] Val Loss : [0.67700] Val F1 : [0.77327]
Epoch 00047: reducing learning rate of group 0 to 3.6621e-08.


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [48], Train Loss : [0.00255] Val Loss : [0.64386] Val F1 : [0.78276]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [49], Train Loss : [0.00306] Val Loss : [0.65954] Val F1 : [0.76828]


  0%|          | 0/122 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

Epoch [50], Train Loss : [0.00255] Val Loss : [0.67018] Val F1 : [0.77345]
Epoch 00050: reducing learning rate of group 0 to 1.8311e-08.


## Inference

In [14]:
test = pd.read_csv(os.path.join(data_root_path, "test.csv"))

test.shape

(153, 2)

In [16]:
test["path"] = test["path"].apply(lambda x: os.path.join(data_root_path, os.path.join(x.split("/")[1], x.split("/")[2])))

test.head(3)

Unnamed: 0,id,path
0,TEST_000,D:\AI_Data\TV_HMCR\test\TEST_000.mp4
1,TEST_001,D:\AI_Data\TV_HMCR\test\TEST_001.mp4
2,TEST_002,D:\AI_Data\TV_HMCR\test\TEST_002.mp4


In [17]:
test_dataset = CustomDataset(test["path"].values, None)
test_loader = DataLoader(test_dataset, batch_size=CFG["BATCH_SIZE"], shuffle=False, num_workers=0)

In [18]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            logit = model(videos)
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [19]:
preds = inference(model, test_loader, device)

  0%|          | 0/39 [00:00<?, ?it/s]

## Submission

In [21]:
submit = pd.read_csv(os.path.join(data_root_path, "sample_submission.csv"))

submit.head(3)

Unnamed: 0,id,label
0,TEST_000,0
1,TEST_001,0
2,TEST_002,0


In [22]:
submit["label"] = preds
submit.head()

Unnamed: 0,id,label
0,TEST_000,1
1,TEST_001,3
2,TEST_002,0
3,TEST_003,2
4,TEST_004,4


In [23]:
submit.to_csv("baseline_e50.csv", index=False)