# DFL benchmark - training
This is a simple benchmark script for DFL.  
It classifies each frame image in the video into 4 classes（'background','challenge','play','throwin'） 
It does not use temporal information, so it may not be competitive on its own for this competition, but it could be used as a feature extractor for more advanced models.

In [1]:
!nvidia-smi

Thu Sep 15 11:52:47 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0  On |                  N/A |
| 55%   57C    P5    43W / 350W |    885MiB / 24576MiB |     22%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
import numpy as np
import pandas as pd
import random
import gc
import cv2
import matplotlib.pyplot as plt
import time

import timm
from timm import utils

from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR, ReduceLROnPlateau, StepLR, LambdaLR

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import imageio
from PIL import Image
from tqdm.notebook import tqdm

# from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score


# setting

In [3]:
DEBUG = False

In [4]:
class CFG:
    EXP = "eff_b5_ap_bce_flowimage_1output"

    # model
    # model_path = "swint_large224" #effnetでやってみる？
    # model_path =  "tf_efficientnet_b7_ap"
    model_path =  "tf_efficientnet_b5_ap"
    MODEL_SAVE_DIR = f"/workdir/work/output/{EXP}"
    out_features = 1 # output class play:1 or background:0の分類
    inp_channels = 3 #RGB -> 3
    dropout = 0
    pretrained = True

    # train, valid
    TRAIN_IMG_DIR = "/workdir/work/output/train_images_flow"
    TRAIN_CSV = "/workdir/work/output/saved_train_flowimages.csv"
    random_seed = 42
    # batch_size = 8
    batch_size = 64
    num_workers = 0
    n_epoch = 150
    early_stopping_rounds = 10

    img_height = 224
    img_width = 224
        
    #optimizer
    gradient_accumulation_steps = 1
    max_grad_norm = 1000
    opt_eps = 1e-5
    lr = 5e-6
    opt_wd_non_norm_bias = 0.01
    opt_wd_norm_bias = 0

    #scheduler
    scheduler_name = "CosineAnnealingLR"
    T_0 = 5
    min_lr = 1e-7
    max_lr = 5e-5
    T_max = 5

    seed = 42

    if DEBUG:
        n_epoch = 1
        TRAIN_FOLD = [0, 1]

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
CFG.device = device
print(device)



cuda


# Logger

In [5]:
# save trained model in this dir
if not os.path.exists(CFG.MODEL_SAVE_DIR):
    os.makedirs(CFG.MODEL_SAVE_DIR)

In [6]:
def init_logger(log_file=f'{CFG.MODEL_SAVE_DIR}/train_{CFG.EXP}.log'):
    """Output Log."""
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger
LOGGER = init_logger()
LOGGER.info(f"EXP NAME = {CFG.EXP}")
LOGGER.info(f"Model = {CFG.model_path}, (height, width) = ({CFG.img_height}, {CFG.img_width})")

EXP NAME = eff_b5_ap_bce_flowimage_1output
Model = tf_efficientnet_b5_ap, (height, width) = (224, 224)


In [7]:
class AverageMeter(object):
    """Computes and stores the average and current value."""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# Seed

In [8]:
def seed_torch(seed=0):
    """Fixed seed value."""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

# Read Data

In [9]:
train_df = pd.read_csv(CFG.TRAIN_CSV)
display(train_df)

Unnamed: 0,video_id,frame,event,distance,time
0,1606b0e6_0,5006,background,16.120910,200.24
1,1606b0e6_0,6040,background,107.551291,241.60
2,1606b0e6_0,6114,play,95.963118,244.56
3,1606b0e6_0,6174,background,22.191445,246.96
4,1606b0e6_0,6202,play,29.062033,248.08
...,...,...,...,...,...
1817,cfbe2e94_1,88110,play,28.930048,3524.40
1818,cfbe2e94_1,89033,background,12.604130,3561.32
1819,cfbe2e94_1,89066,play,28.153789,3562.64
1820,cfbe2e94_1,89312,background,16.033176,3572.48


# Dataset

In [10]:
event_encoding = {
    "background" : 0.0,
    "challenge" : 1.0,
    "play" : 1.0,
    "throwin" : 1.0,
}


In [11]:
class DFLDataset(Dataset):
    def __init__(self, video_id, frame, targets, transform=None):
        self.video_id = video_id
        self.frame = frame
        self.targets = targets
        # self.transform = transform
    
    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        image_path = f"{CFG.TRAIN_IMG_DIR}/{self.video_id[idx]}_{self.frame[idx]:06}.jpg"
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, dsize=(CFG.img_height, CFG.img_width))
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = np.float32(event_encoding[self.targets[idx]])
        return image, target

# Model

In [12]:
class DFLNet(nn.Module):
    def __init__(self, model_name=CFG.model_path, 
                 out_features=CFG.out_features, inp_channels=CFG.inp_channels,
                 pretrained=CFG.pretrained):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes=out_features)
    
    def forward(self, image):
        fc = self.model(image)
        output = torch.sigmoid(fc)
        return output

# Loss

# optimizer

In [13]:
def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
            'lr': encoder_lr, 'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
            'lr': encoder_lr, 'weight_decay': 0.0},
        {'params': [p for n, p in model.named_parameters() if "model" not in n],
            'lr': decoder_lr, 'weight_decay': 0.0}
    ]
    return optimizer_parameters

In [14]:
def get_scheduler(optimizer):
    scheduler = None
    if CFG.scheduler_name == 'CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(
            optimizer,
            T_0 = CFG.T_0,
            eta_min = CFG.min_lr,
            last_epoch=-1
        )
    elif CFG.scheduler_name == 'OneCycleLR':
        scheduler = OneCycleLR(
            optimizer,
            max_lr = CFG.max_lr,
            steps_per_epoch = int( ( (CFG.n_fold-1) * train_df.shape[0]) / (CFG.n_fold * CFG.batch_size) ) + 1,
            epochs = CFG.n_epoch,
        )

    elif CFG.scheduler_name == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(
            optimizer,
            T_max = CFG.T_max,
            eta_min = CFG.min_lr,
            last_epoch = -1
        )
    
    return scheduler

In [15]:
def divice_norm_bias(model): 
    norm_bias_params = []
    non_norm_bias_params = []
    except_wd_layers = ['norm', '.bias']
    for n, p in model.model.named_parameters():
        if any([nd in n for nd in except_wd_layers]):
            norm_bias_params.append(p)
        else:
            non_norm_bias_params.append(p)
    return norm_bias_params, non_norm_bias_params

# Train function

In [16]:
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler=None, scaler=None):
    model.train()
    # stream = tqdm(train_loader)
    losses = AverageMeter()
    global_step = 0

    # for step, (images, targets) in enumerate(stream, start=1):
    for step, (images, targets) in enumerate(train_loader):
        images = images.to(CFG.device, non_blocking=True)
        targets = targets.to(CFG.device, non_blocking=True)
        targets = targets.view(-1, 1)
        batch_size = targets.size(0) 

        preds = model(images)
        loss = criterion(preds, targets)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
            loss.backward()
        else:
            loss.backward()
        losses.update(loss.item(), batch_size) 

        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1

        if scheduler is not None:
            scheduler.step()
            # scheduler.step_update(num_updates=step, metric=losses.avg)

    return losses.avg

# Valid function

In [17]:
def valid_fn(val_loader, model, criterion, epoch):
    model.eval()
    # stream = tqdm(val_loader)
    
    losses = AverageMeter()
    
    final_targets = []
    final_preds = []
    
    with torch.no_grad():
        # for i, (images, targets) in enumerate(stream, start=1):
        for i, (images, targets) in enumerate(val_loader):
            images = images.to(CFG.device, non_blocking=True)
            targets = targets.to(CFG.device, non_blocking=True)
            targets = targets.view(-1, 1)
            batch_size = targets.size(0)
            
            preds = model(images)

            loss = criterion(preds, targets)
            losses.update(loss.item(), batch_size)

            targets_list = (targets.detach().cpu().numpy()).tolist()
            preds_list = torch.argmax(preds, dim=1).tolist()
            
            final_targets.extend(targets_list)
            final_preds.extend(preds_list)
    return losses.avg, final_preds, final_targets

# Train

In [18]:
train_valid_videos = train_df["video_id"].unique()
print(train_valid_videos)
train_videos = train_valid_videos[:2]
valid_videos = train_valid_videos[2:]
if DEBUG:
    train_videos = [train_videos[0]]
    valid_videos = [valid_videos[0]]
LOGGER.info(f"train_videos {train_videos}")
LOGGER.info(f"valid_videos {valid_videos}")

train_videos ['1606b0e6_0' '1606b0e6_1']
valid_videos ['cfbe2e94_0' 'cfbe2e94_1']


['1606b0e6_0' '1606b0e6_1' 'cfbe2e94_0' 'cfbe2e94_1']


## set dataset

In [19]:
# separate train/valid data 
X_train_videoid = train_df[train_df["video_id"].isin(train_videos)]["video_id"].values
X_train_frame = train_df[train_df["video_id"].isin(train_videos)]["frame"].values
y_train = train_df[train_df["video_id"].isin(train_videos)]["event"].values

X_valid_videoid = train_df[train_df["video_id"].isin(valid_videos)]["video_id"].values
X_valid_frame = train_df[train_df["video_id"].isin(valid_videos)]["frame"].values
y_valid = train_df[train_df["video_id"].isin(valid_videos)]["event"].values

In [20]:
# prepare dataset
train_dataset = DFLDataset(video_id= X_train_videoid, frame=X_train_frame, targets = y_train)
valid_dataset = DFLDataset(video_id= X_valid_videoid, frame=X_valid_frame, targets = y_valid)

# create dataloader
train_loader = DataLoader(train_dataset,
                        batch_size = CFG.batch_size,
                        shuffle = False,
                        num_workers = CFG.num_workers)
valid_loader = DataLoader(valid_dataset,
                        batch_size = CFG.batch_size,
                        shuffle = False,
                        num_workers = CFG.num_workers)

In [21]:
# instantiate model, cost function and optimizer
model = DFLNet()
model = model.to(device)

norm_bias_params, non_norm_bias_params = divice_norm_bias(model)
# criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()

#print(f"norm bias params: {len(norm_bias_params)}, non norm bias params: {len(non_norm_bias_params)}")
optimizer = torch.optim.AdamW(
    [
        {'params': norm_bias_params, 'weight_decay': CFG.opt_wd_norm_bias},
        {'params': non_norm_bias_params, 'weight_decay': CFG.opt_wd_non_norm_bias},
    ],
    eps = CFG.opt_eps,
    lr = CFG.lr,
    amsgrad = False
)

# load scaler
scheduler = get_scheduler(optimizer)
scaler = GradScaler()

In [22]:
# train / valid loop
best_loss = 1e10
ealry_stopping_count = 0

start_time = time.time()
for epoch in range(1, CFG.n_epoch + 1):
    train_avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, scaler)
    valid_avg_loss, preds, targets = valid_fn(valid_loader, model, criterion, epoch)
    
    elapsed = time.time() - start_time
    elapsed_min = elapsed/60
    LOGGER.info(f"Epoch {epoch}: Train loss {train_avg_loss:.6f},  Valid loss {valid_avg_loss:.6f}. elapsed time:{elapsed_min:.1f} min.")
    if valid_avg_loss < best_loss:
        LOGGER.info(f"Model is improved.")
        ealry_stopping_count = 0
        best_loss = valid_avg_loss
        model_name = CFG.model_path
        LOGGER.info(f'{CFG.MODEL_SAVE_DIR}/{model_name}.pth is saved.')
        torch.save(model.state_dict(), f'{CFG.MODEL_SAVE_DIR}/{model_name}.pth')

    else:
        ealry_stopping_count += 1
        if ealry_stopping_count >= CFG.early_stopping_rounds:
            LOGGER.info(f"Early stopping. Model is not improved in {CFG.early_stopping_rounds} epochs")
            break
del model, train_loader, train_dataset
gc.collect()

LOGGER.info("Learning finished.")

torch.cuda.empty_cache()

Epoch 1: Train loss 2.063000,  Valid loss 2.370314. elapsed time:0.2 min.
Model is improved.
/workdir/work/output/eff_b5_ap_bce_flowimage_1output/tf_efficientnet_b5_ap.pth is saved.
Epoch 2: Train loss 1.548859,  Valid loss 2.511300. elapsed time:0.3 min.
Epoch 3: Train loss 1.216675,  Valid loss 2.289792. elapsed time:0.5 min.
Model is improved.
/workdir/work/output/eff_b5_ap_bce_flowimage_1output/tf_efficientnet_b5_ap.pth is saved.
Epoch 4: Train loss 0.952859,  Valid loss 2.143212. elapsed time:0.6 min.
Model is improved.
/workdir/work/output/eff_b5_ap_bce_flowimage_1output/tf_efficientnet_b5_ap.pth is saved.
Epoch 5: Train loss 0.734381,  Valid loss 2.086715. elapsed time:0.8 min.
Model is improved.
/workdir/work/output/eff_b5_ap_bce_flowimage_1output/tf_efficientnet_b5_ap.pth is saved.
Epoch 6: Train loss 0.554290,  Valid loss 2.053656. elapsed time:0.9 min.
Model is improved.
/workdir/work/output/eff_b5_ap_bce_flowimage_1output/tf_efficientnet_b5_ap.pth is saved.
Epoch 7: Train l