# DFL benchmark - training
This is a simple benchmark script for DFL.  
It classifies each frame image in the video into 4 classes（'background','challenge','play','throwin'） 
It does not use temporal information, so it may not be competitive on its own for this competition, but it could be used as a feature extractor for more advanced models.

In [1]:
!nvidia-smi

Sun Sep 11 16:04:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0  On |                  N/A |
| 45%   52C    P8    36W / 350W |    640MiB / 24576MiB |     26%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import os
import numpy as np
import pandas as pd
import random
import gc
import cv2
import matplotlib.pyplot as plt
import time

import timm
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, OneCycleLR, CosineAnnealingLR, ReduceLROnPlateau, StepLR, LambdaLR
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import imageio
from PIL import Image
from tqdm.notebook import tqdm

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score


# setting

In [3]:
DEBUG = False

In [4]:
class CFG:
    EXP = "baseline_test"

    # model
    # model_path = "swint_large224" #effnetでやってみる？
    model_path =  "swin_large_patch4_window7_224"
    MODEL_SAVE_DIR = f"/workdir/work/output/{EXP}"
    out_features = 4 # output class
    inp_channels = 3 #RGB -> 3
    dropout = 0
    pretrained = True

    # train, valid
    # TRAIN_CSV = "/workdir/work/output/saved_train_images.csv"
    TRAIN_IMG_DIR = "/workdir/work/output/train_images"
    TRAIN_CSV = "/workdir/work/output/saved_train_event4images.csv"
    TRAIN_IMG_DIR = "/workdir/work/output/event4image"
    n_fold = 5
    random_seed = 42
    batch_size = 32
    num_workers = 8
    n_epoch = 20
    early_stopping_rounds = 3
    TRAIN_FOLD = [0, 1, 2, 3, 4]

    img_height = 224
    img_width = 224

    #optimizer
    opt_eps = 1e-5
    lr = 2e-5
    opt_wd_non_norm_bias = 0.01
    opt_wd_norm_bias = 0

    #scheduler
    scheduler_name = "CosineAnnealingWarmRestarts"
    T_0 = 5
    min_lr = 1e-7
    max_lr = 1e-4
    T_max = 5

    seed = 42

    if DEBUG:
        n_epoch = 1
        TRAIN_FOLD = [0, 1]

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
CFG.device = device
print(device)



cuda


# Logger

In [5]:
def init_logger(log_file=f'train_{CFG.EXP}.log'):
    """Output Log."""
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger
LOGGER = init_logger()

In [6]:
class AverageMeter(object):
    """Computes and stores the average and current value."""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

# Seed

In [7]:
def seed_torch(seed=0):
    """Fixed seed value."""
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_torch(seed=CFG.seed)

# Read Data

In [8]:
# save trained model in this dir
if not os.path.exists(CFG.MODEL_SAVE_DIR):
    os.makedirs(CFG.MODEL_SAVE_DIR)

In [9]:
train_df = pd.read_csv(CFG.TRAIN_CSV)
display(train_df)


Unnamed: 0,video_id,frame,time,event
0,1606b0e6_0,5005,200.20,background
1,1606b0e6_0,5028,201.12,challenge
2,1606b0e6_0,5070,202.80,background
3,1606b0e6_0,5252,210.08,background
4,1606b0e6_0,5271,210.84,challenge
...,...,...,...,...
11177,ecf251d4_0,76414,3056.56,challenge
11178,ecf251d4_0,76452,3058.08,background
11179,ecf251d4_0,76706,3068.24,background
11180,ecf251d4_0,76738,3069.52,throwin


# Dataset

In [10]:
event_encoding = {
    "background" : 0,
    "challenge" : 1,
    "play" : 2,
    "throwin" : 3,
}


In [11]:
class DFLDataset(Dataset):
    def __init__(self, video_id, frame, targets, transform=None):
        self.video_id = video_id
        self.frame = frame
        self.targets = targets
        # self.transform = transform
    
    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        image_path = f"{CFG.TRAIN_IMG_DIR}/{self.video_id[idx]}_{self.frame[idx]:06}.jpg"
        # with open(image_path, 'rb') as f:
        #     image = Image.open(f)
        #     image_rgb = image.convert('RGB')
        # image = np.array(image_rgb)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, dsize=(CFG.img_height, CFG.img_width))
        # if self.transform is not None:
        #     image = self.transform(image = image)["image"]
        
        image = image / 255 # convert to 0-1
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        target = event_encoding[self.targets[idx]]

        image = torch.tensor(image, dtype = torch.float)
        target = torch.tensor(target)
        # target = torch.tensor(target, dtype = torch.float)
        return image, target

# Model

In [12]:
# with meta
class DFLNet(nn.Module):
    def __init__(self, model_name=CFG.model_path, 
                 out_features=CFG.out_features, inp_channels=CFG.inp_channels,
                 pretrained=CFG.pretrained):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels)
        n_features = self.model.head.in_features
        self.model.head = nn.Linear(n_features, 128)
        self.fc = nn.Sequential(
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, out_features)
        )
        self.dropout = nn.Dropout(CFG.dropout)
    
    def forward(self, image):
        embeddings = self.model(image)
        x = self.dropout(embeddings)
        output = self.fc(x)
        return output

# Loss

In [13]:
class FocalLoss(nn.Module):
    def __init__(self, alpha= 0.25, gamma=2.0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.eps = eps
        self.ce = torch.nn.CrossEntropyLoss(reduction="none")

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = self.alpha * ((1 - p) ** self.gamma) * logp
        return loss.mean()

# optimizer

In [14]:
def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
            'lr': encoder_lr, 'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
            'lr': encoder_lr, 'weight_decay': 0.0},
        {'params': [p for n, p in model.named_parameters() if "model" not in n],
            'lr': decoder_lr, 'weight_decay': 0.0}
    ]
    return optimizer_parameters

In [15]:
def get_scheduler(optimizer):
    scheduler = None
    if CFG.scheduler_name == 'CosineAnnealingWarmRestarts':
        scheduler = CosineAnnealingWarmRestarts(
            optimizer,
            T_0 = CFG.T_0,
            eta_min = CFG.min_lr,
            last_epoch=-1
        )
    elif CFG.scheduler_name == 'OneCycleLR':
        scheduler = OneCycleLR(
            optimizer,
            max_lr = CFG.max_lr,
            steps_per_epoch = int( ( (CFG.n_fold-1) * train_df.shape[0]) / (CFG.n_fold * CFG.batch_size) ) + 1,
            epochs = CFG.n_epoch,
        )

    elif CFG.scheduler_name == 'CosineAnnealingLR':
        scheduler = CosineAnnealingLR(
            optimizer,
            T_max = CFG.T_max,
            eta_min = CFG.min_lr,
            last_epoch = -1
        )
    
    return scheduler

In [16]:
def divice_norm_bias(model): 
    norm_bias_params = []
    non_norm_bias_params = []
    except_wd_layers = ['norm', '.bias']
    for n, p in model.model.named_parameters():
        if any([nd in n for nd in except_wd_layers]):
            norm_bias_params.append(p)
        else:
            non_norm_bias_params.append(p)
    return norm_bias_params, non_norm_bias_params

# Train function

In [17]:
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler=None):
    model.train()
    stream = tqdm(train_loader)
    losses = AverageMeter()
    
    for i, (images, targets) in enumerate(stream, start=1):
        images = images.to(CFG.device, non_blocking=True)
        targets = targets.to(CFG.device, non_blocking=True)
        batch_size = targets.size(0) 

        preds = model(images)
        loss = criterion(preds, targets)
        losses.update(loss.item(), batch_size) 
        loss.backward()
        optimizer.step()
            
        if scheduler is not None:
            scheduler.step()
        
        optimizer.zero_grad()
    return losses.avg

# Valid function

In [18]:
def valid_fn(val_loader, model, criterion, epoch):
    model.eval()
    stream = tqdm(val_loader)
    losses = AverageMeter()
    final_targets = []
    final_preds = []
    with torch.no_grad():
        for i, (images, targets) in enumerate(stream, start=1):
            images = images.to(CFG.device, non_blocking=True)
            targets = targets.to(CFG.device, non_blocking=True)
            batch_size = targets.size(0)
            preds = model(images)
            loss = criterion(preds, targets)
            losses.update(loss.item(), batch_size)

            targets_list = (targets.detach().cpu().numpy()).tolist()
            # preds_list = (torch.sigmoid(preds).detach().cpu().numpy()).tolist()
            # preds_list = [score_onehot_inv[pred_idx] for pred_idx in torch.argmax(preds, dim=1).tolist()]
            preds_list = torch.argmax(preds, dim=1).tolist()
            
            final_targets.extend(targets_list)
            final_preds.extend(preds_list)
    return losses.avg, final_preds, final_targets

# Train

In [19]:
train_valid_videos = train_df["video_id"].unique()
train_videos = train_valid_videos[:10]
valid_videos = train_valid_videos[10:]
if DEBUG:
    train_videos = [train_videos[0]]
    valid_videos = [valid_videos[0]]
LOGGER.info(f"train_videos {train_videos}")
LOGGER.info(f"valid_videos {valid_videos}")

train_videos ['1606b0e6_0' '1606b0e6_1' '35bd9041_0' '35bd9041_1' '3c993bd2_0'
 '3c993bd2_1' '407c5a9e_1' '4ffd5986_0' '9a97dae4_1' 'cfbe2e94_0']
valid_videos ['cfbe2e94_1' 'ecf251d4_0']


## set dataset

In [20]:
# separate train/valid data 
X_train_videoid = train_df[train_df["video_id"].isin(train_videos)]["video_id"].values
X_train_frame = train_df[train_df["video_id"].isin(train_videos)]["frame"].values
y_train = train_df[train_df["video_id"].isin(train_videos)]["event"].values

X_valid_videoid = train_df[train_df["video_id"].isin(valid_videos)]["video_id"].values
X_valid_frame = train_df[train_df["video_id"].isin(valid_videos)]["frame"].values
y_valid = train_df[train_df["video_id"].isin(valid_videos)]["event"].values

In [21]:
# prepare dataset
train_dataset = DFLDataset(video_id= X_train_videoid, frame=X_train_frame, targets = y_train)
valid_dataset = DFLDataset(video_id= X_valid_videoid, frame=X_valid_frame, targets = y_valid)

# create dataloader
train_loader = DataLoader(train_dataset,
                        batch_size = CFG.batch_size,
                        shuffle = False,
                        num_workers = CFG.num_workers)
valid_loader = DataLoader(valid_dataset,
                        batch_size = CFG.batch_size,
                        shuffle = False,
                        num_workers = CFG.num_workers)

In [22]:
# instantiate model, cost function and optimizer
model = DFLNet()
model = model.to(device)
norm_bias_params, non_norm_bias_params = divice_norm_bias(model)
# criterion = nn.BCEWithLogitsLoss()
criterion = FocalLoss()
#print(f"norm bias params: {len(norm_bias_params)}, non norm bias params: {len(non_norm_bias_params)}")
optimizer = torch.optim.AdamW(
    [
        {'params': norm_bias_params, 'weight_decay': CFG.opt_wd_norm_bias},
        {'params': non_norm_bias_params, 'weight_decay': CFG.opt_wd_non_norm_bias},
    ],
    eps = CFG.opt_eps,
    lr = CFG.lr,
    amsgrad = False
)
scheduler = get_scheduler(optimizer)
scaler = GradScaler()
    


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [23]:
# train / valid loop
best_score = -9999.
ealry_stopping_count = 0

start_time = time.time()
for epoch in range(1, CFG.n_epoch + 1):
    train_avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler)
    valid_avg_loss, preds, targets = valid_fn(valid_loader, model, criterion, epoch)
    accuracy = accuracy_score(targets, preds)
    
    elapsed = time.time() - start_time
    LOGGER.info(f"Epoch {epoch}: Train loss {train_avg_loss:.4f},  Valid loss {valid_avg_loss:.4f}.")
    LOGGER.info(f"Accuracy {accuracy:4f}. elapsed time:{elapsed:.1f}")
    if accuracy > best_score:
        LOGGER.info(f"Model is improved.")
        best_score = accuracy
        model_name = CFG.model_path
        torch.save(model.state_dict(), f'{CFG.MODEL_SAVE_DIR}/{model_name}.pth')

    else:
        ealry_stopping_count += 1
        if ealry_stopping_count >= CFG.early_stopping_rounds:
            LOGGER.info(f"Early stopping. Model is not improved in {CFG.early_stopping_rounds} epochs")
            break
del model, train_loader, train_dataset
gc.collect()

torch.cuda.empty_cache()

  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

Epoch 1: Train loss 0.1188,  Valid loss 0.0990.
Accuracy 0.614368. elapsed time:152.6
Model improved in epoch 1.


  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2: Train loss 0.1099,  Valid loss 0.0959.
Accuracy 0.614368. elapsed time:153.6


  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

Epoch 3: Train loss 0.1065,  Valid loss 0.0952.
Accuracy 0.614368. elapsed time:152.5


  0%|          | 0/296 [00:00<?, ?it/s]

  0%|          | 0/55 [00:00<?, ?it/s]

Epoch 4: Train loss 0.1039,  Valid loss 0.0946.
Accuracy 0.614368. elapsed time:153.1


  0%|          | 0/296 [00:00<?, ?it/s]

KeyboardInterrupt: 