# 판교 AI Challenge
> 참치김치찌개팀<br>
> 팀장 손찬영, 팀원 김민정 김하림 이두현 차현수
* 과제명 : [아동 및 교통약자 보호를 위한 어린이 도로보행 위험행동 분류 과제]
* 과제 링크 : https://www.aiconnect.kr/main/competition/privateDetail/200

----------------------------------------------------------------

## Package

In [1]:
import glob
import math
import os
import random
import sys
import time
import timeit
import warnings

import easydict
import pandas as pd
import torchvideo.datasets as datasets
import torchvideo.samplers as samplers
import torchvideo.transforms as VT
from torch.utils.data import DataLoader, random_split
from torchvision.transforms import Compose

warnings.filterwarnings("ignore")

import gc
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import timm.optim.nadam as nadam
import torch
import torchcontrib
import torchvision.transforms as transforms
import wandb
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score
from source.focalloss import FocalLoss
from source.label_smooth import LabelSmoothSoftmaxCEV2
from source.model import C3D_model, R2Plus1D_model, R3D_model
from source.model.utils.vit import TimeSformer
from torch import nn, optim
from torch.autograd import Variable
from torch.optim.swa_utils import SWALR
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
from torchcontrib.optim import SWA
from tqdm import tqdm

----------------------------------------------------------------

## Parameter Settings

In [2]:
args = easydict.EasyDict(
    {
        ############## Experiment ##############
        "experiment": "EXP1",  # 매번 바꿔준다.
        "project_dir": os.getcwd(),  # '/home/stephencha/Hub/ai-challenge'
        "train": True,
        "inference": True,
        "submit_path": "./submit",
        ############## Dataset ##############
        "dataset_path": "./dataset/train",
        "test_dataset_path": "./dataset/test",
        "label_path": "./dataset/train_data.csv",
        "clip_length": 5,  # slice
        "frame_step": 1,
        "num_workers": 8,
        "autoaugment": True,
        "num_classes": 9,
        ############## Model ##############
        "model": "TimeSformer",  # Options: C3D, R2Plus1D, R3D, TimeSformer, Efficientnet_LSTM
        "attention_type": "divided_space_time",
        "img_size": 224,
        "pretrained_model": "./pretrained/TimeSformer_divST_96x4_224_K600.pyth",  # ./pretrained/c3d-pretrained.pth, ./pretrained/TimeSformer_divST_96x4_224_K600.pyth, 'efficientnet_b4'
        ############## Fine-Tuning ##############
        "randomseed": 100,
        "epoches": 50,
        "learning_rate": 0.005,
        "optimizer": "adam",
        "loss_function": "cross_entropy",
        "schedular": "step",
        "batch_size": 40,  # Depends on VRAM
        ############## GPU ##############
        "multi_gpu": False, 
        "device": "cuda",  # "cpu" for debugging
    }
)
NAME_ELEMENTS = [args.model, time.strftime("%m%d_%H%M", time.localtime(time.time()))]
MODEL_NAME = "_".join(NAME_ELEMENTS)

----------------------------------------------------------------

## Randomseed

In [3]:
if args.randomseed:
    torch.manual_seed(args.randomseed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(args.randomseed)
    random.seed(args.randomseed)

----------------------------------------------------------------

## Dataset Preprocessing

In [4]:
cls_li = {
    0: "driveway_walk",
    1: "fall_down",
    2: "fighting",
    3: "jay_walk",
    4: "normal",
    5: "putup_umbrella",
    6: "ride_cycle",
    7: "ride_kick",
    8: "ride_moto",
}

In [5]:
# make string of class to numbers.
def string_to_num(row):
    if row['class'] == cls_li[0]:
        row['class'] = 0
    elif row['class'] == cls_li[1]:
        row['class'] = 1
    elif row['class'] == cls_li[2]:
        row['class'] = 2
    elif row['class'] == cls_li[3]:
        row['class'] = 3
    elif row['class'] == cls_li[4]:
        row['class'] = 4
    elif row['class'] == cls_li[5]:
        row['class'] = 5
    elif row['class'] == cls_li[6]:
        row['class'] = 6
    elif row['class'] == cls_li[7]:
        row['class'] = 7
    elif row['class'] == cls_li[8]:
        row['class'] = 8
    return row

### Dataframe

In [6]:
df = pd.read_csv(args.label_path).set_index('video_filename')
df = df.apply(string_to_num, axis='columns')
df = df.drop(['id'], axis=1)

In [7]:
df.head(5)

Unnamed: 0_level_0,class
video_filename,Unnamed: 1_level_1
video_0000.mp4,5
video_0001.mp4,7
video_0002.mp4,1
video_0003.mp4,0
video_0004.mp4,0


### Load

In [8]:
## LABEL
label = datasets.CsvLabelSet(df, col="class")
## Transform (preprocess)
transform = Compose(
    [
        VT.ResizeVideo((224, 224)),
        VT.CollectFrames(),
        VT.PILVideoToTensor(rescale=True, ordering="CTHW"),
    ]
)
## Sampler (extract frames, make video to images)
sampler = samplers.ClipSampler(clip_length=args.clip_length, frame_step=args.frame_step)
## Make dataset to enter dataloader of pytorch
dataset = datasets.VideoFolderDataset(
    root_path=args.dataset_path, label_set=label, transform=transform, sampler=sampler
)

In [9]:
train_size = int(len(dataset)*0.8)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
print("Train Size: {}, Validation Size: {}".format(train_size, val_size))

Train Size: 2666, Validation Size: 667


### AutoAugmentation

In [10]:
class custom_dataset(Dataset):
    def __init__(self, xy):
        length = len(xy)
        x_temp = [0] * length
        y_temp = [0] * length
        for i in range(length):
            x_temp[i] = xy[i][0]
            y_temp[i] = xy[i][1]

        self.x = x_temp
        self.y = y_temp

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

def autoaugment(train_ds, pol):
    
    ########################## AUTOAUGMENTATION ##########################
    if pol == 'cifar':
        policy = transforms.AutoAugmentPolicy.CIFAR10
    elif pol == 'imagenet':
        policy = transforms.AutoAugmentPolicy.IMAGENET
    elif pol == 'svhn':
        policy = transforms.AutoAugmentPolicy.SVHN
    else:
        raise ThereAreNoPolicy
    transform = transforms.Compose([
        transforms.ConvertImageDtype(torch.uint8),
        transforms.AutoAugment(),
    ])
    transform2 = transforms.Compose([
        transforms.ConvertImageDtype(torch.float32),
    ])
    ######################################################################
    
    num_data = len(train_ds)
    
    train_y = torch.stack([torch.from_numpy(np.array(train_ds[i][1])) for i in range(num_data)])
    train_y = train_y.view(num_data, -1)
    train_y = torch.tensor(train_y)
    
    train_x = torch.stack([torch.from_numpy(np.array(train_ds[i][0])) for i in range(num_data)])
    # train_x = train_x.type(torch.uint8)
    transformed_img = []
    for j in range(train_x.size(0)):
        img = train_x[j, :, :, :, :]
        temp = []
        for i in range(img.size(1)):
            t_img = transform(img[:, i, :, :])
            t_im = transform2(t_img)
            temp.append(t_img)
        transformed_img.append(torch.stack(temp, dim=1))
    train_x = torch.stack(transformed_img, dim=0)
    # train_x = train_x.type(torch.float32)
    train_x = train_x.view(
        -1, 3, args.clip_length, train_ds[0][0].shape[2], train_ds[0][0].shape[3]
    )
    
    xy = [0] * train_x.shape[0]
    for i in range(train_x.shape[0]):
        xy[i] = (train_x[i], train_y[i])
    train_ds = custom_dataset(xy)
    
    del xy
    del train_x
    del train_y
    gc.collect()
    
    return train_ds

In [11]:
if args.autoaugment:
    ds_imnet = autoaugment(train_dataset, pol='imagenet')
    ds_svhn = autoaugment(train_dataset, pol='svhn')
    ds_cifar = autoaugment(train_dataset, pol='cifar')
                                    
    Dset = [train_dataset, ds_imnet, ds_svhn, ds_cifar]
    train_dataset = torch.utils.data.ConcatDataset(Dset)
    print("Data AutoAugmentation is succeed")
    del Dset

print(
    "Total Train set samples: {}, Val set samples: {}".format(
        len(train_dataset), len(val_dataset)
    )
)

Data AutoAugmentation is succeed
Total Train set samples: 10664, Val set samples: 667


### Dataset Function

In [12]:
def build_dataset(train_ds, val_ds):
    train_dataloader = DataLoader(
        train_ds,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
    )
    val_dataloader = DataLoader(
        val_ds,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
    )

    trainval_loaders = {"train": train_dataloader, "val": val_dataloader}
    trainval_sizes = {x: len(trainval_loaders[x].dataset) for x in ["train", "val"]}

    return trainval_loaders, trainval_sizes

----------------------------------------------------------------

## Model

In [13]:
# Use GPU if available else revert to CPU
device = torch.device(args.device)
print("Device being used:", device)
saveName = args.model + "-" + args.experiment
print("Save Name: ", saveName)

Device being used: cuda
Save Name:  TimeSformer-EXP1


### Choose model

In [14]:
if args.model == "TimeSformer":
    model = TimeSformer(
        img_size=args.img_size,
        num_classes=args.num_classes,
        num_frames=args.clip_length,
        attention_type=args.attention_type,
        pretrained_model=args.pretrained_model,
    )
elif args.model == "Efficientnet_LSTM":
    model = Efficientnet_LSTM.net(pretrain_model=args.pretrained_model, embed_size=1280, LSTM_UNITS=64, DO=0.3)
elif args.model == "C3D":
    model = C3D_model.C3D(
        model_dir=args.pretrained_model, num_classes=args.num_classes, pretrained=True
    )
elif args.model == "R2Plus1D":
    model = R2Plus1D_model.R2Plus1DClassifier(
        num_classes=args.num_classes, layer_sizes=(2, 2, 2, 2)
    )
elif args.model == "R3D":
    model = R3D_model.R3DClassifier(num_classes=args.num_classes, layer_sizes=(2, 2, 2, 2))
else:
    raise NotImplementedError

### GPU

In [15]:
print("Total params: %.2fM" % (sum(p.numel() for p in model.parameters()) / 1000000.0))
if args.multi_gpu:
    model = nn.DataParallel(model)
print("Architecture of {}".format(args.model))
model.to(device)

Total params: 121.26M
Architecture of TimeSformer


TimeSformer(
  (model): VisionTransformer(
    (dropout): Dropout(p=0.0, inplace=False)
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (time_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
          (attn_drop): Dropout(p=0.0, inplace=False)
        )
        (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (temporal_attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
   

### Freeze

In [16]:
for i, c in enumerate(model.children()):
    if i == 0:
        for k, param in enumerate(c.parameters()):
            if k <= 200:
                param.requires_grad = False
            else:
                param.requires_grad = True
        print(k)
    # print('#'*25)
    # print(i)
    # print(list(c.parameters()))
    # print('#'*25)

248


### Directory

In [17]:
# build run dir
runs = sorted(glob.glob(os.path.join(args.project_dir, "run", "run_*")))
runs.sort()

def get_dir_size(path='.'):
    total = 0
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                total += entry.stat().st_size
            elif entry.is_dir():
                total += get_dir_size(entry.path)
    return total

size = get_dir_size(runs[-1])

if int(size) > 5000: # 디렉토리 용량이 있으면
    run_id = int(runs[-1].split("_")[-1]) + 1 if runs else 0 # 다음 번째로 저장
else: # 디렉토리 용량이 없으면 거기다 저장
    run_id = int(runs[-1].split("_")[-1])
print("run id: ", run_id)
SAVE_DIR = os.path.join(args.project_dir, "run", "run_" + str(run_id).zfill(3))
model_save_dir = os.path.join(SAVE_DIR, "models")

if int(size) > 5000: # 새로운 디렉토리를 만들어야 할때만 make model directory
    os.makedirs(model_save_dir, exist_ok=True)
    
print("save directory: ", model_save_dir)

run id:  7
save directory:  /home/stephencha/Hub/ai-challenge/run/run_007/models


----------------------------------------------------------------

## Fine-Tuning

### Optimizer

In [18]:
def build_optimizer(model, opt):
    if args.model == "C3D":
        param = [
            {"params": C3D_model.get_1x_lr_params(model), "lr": args.learning_rate},
            {"params": C3D_model.get_10x_lr_params(model), "lr": args.learning_rate * 10},
        ]
    elif args.model == "R2Plus1D":
        param = [
            {"params": R2Plus1D_model.get_1x_lr_params(model), "lr": args.learning_rate},
            {"params": R2Plus1D_model.get_10x_lr_params(model), "lr": args.learning_rate * 10},
        ]
    elif args.model == "R3D":
        param = model.parameters()
    elif args.model == "TimeSformer":
        param = model.parameters()
    elif args.model == "Efficientnet_LSTM":
        param = model.parameters()
    else:
        raise NotImplementedError

    if opt == "sgd":
        optimizer = optim.SGD(param, lr=args.learning_rate, momentum=0.9, weight_decay=5e-4)
    elif opt == "adam":
        optimizer = optim.Adam(param, lr=args.learning_rate, amsgrad=True)
    elif opt == "adamw":
        optimizer = optim.AdamW(param, lr=args.learning_rate)
    elif opt == "adadelta":
        optimizer = optim.Adadelta(param, lr=args.learning_rate)
    elif opt == "nadam":
        optimizer = nadam.Nadam(param, lr=args.learning_rate)
    else:
        raise NotImplementedError
    return optimizer

### Loss Function

In [19]:
def build_loss_function(lf):
    if lf == "focal":
        lf = FocalLoss()
    elif lf == "cross_entropy":
        lf = nn.CrossEntropyLoss()
    elif lf == "label_smooth":
        lf = LabelSmoothSoftmaxCEV2()
    return lf

### Schedular

In [20]:
def build_schedular(optimizer, sche, epochs, length):
    if sche == "step":
        schedular = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    elif sche == "onecycle":
        schedular = optim.lr_scheduler.OneCycleLR(
            optimizer,
            pct_start=0.1,
            div_factor=1e5,
            max_lr=0.0001,
            epochs=epochs,
            steps_per_epoch=length,
        )
    elif sche == "cosineannealingwarmrestarts":
        schedular = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, T_0=10, T_mult=2, eta_min=1e-5, last_epoch=-1
        )
    elif sche == "swa":
        schedular = SWALR(optimizer, swa_lr=0.01)
    return schedular

----------------------------------------------------------------

## Train

In [21]:
def train():
    trainval_loaders, trainval_sizes = build_dataset(train_dataset, val_dataset)
    
    # standard crossentropy loss for classification
    criterion = build_loss_function(args.loss_function)
    optimizer = build_optimizer(model, opt=args.optimizer)
    # the scheduler divides the lr by 10 every 10 epochs
    if args.schedular == "swa":
        optimizer = torchcontrib.optim.SWA(optimizer)
    scheduler = build_schedular(
        optimizer,
        sche=args.schedular,
        epochs=args.epoches,
        length=trainval_sizes["train"],
    )
    
    best_score = 0  # np.Inf
    for epoch in range(args.epoches):
        # each epoch has a training and validation step
        for phase in ["train", "val"]:
            start_time = timeit.default_timer()

            # reset the running loss and corrects
            running_loss = 0.0
            running_corrects = 0.0

            # set model to train() or eval() mode depending on whether it is trained
            # or being validated. Primarily affects layers such as BatchNorm or Dropout.
            if phase == "train":
                # scheduler.step() is to be called once every epoch during training
                scheduler.step()
                model.train()
            else:
                model.eval()

            epoch_labels, epoch_preds = [], []

            for inputs, labels in tqdm(trainval_loaders[phase]):
                # move inputs and labels to the device the training is taking place on
                inputs = Variable(inputs, requires_grad=True).to(device)
                labels = Variable(labels).to(device)
                optimizer.zero_grad()

                if phase == "train":
                    outputs = model(inputs)
                else:
                    with torch.no_grad():
                        outputs = model(inputs)

                probs = nn.Softmax(dim=1)(outputs)
                preds = torch.max(probs, 1)[1]
                loss = criterion(outputs, labels)

                if phase == "train":
                    loss.backward()
                    optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                epoch_labels.extend(labels.tolist())
                epoch_preds.extend(preds.tolist())

            epoch_loss = running_loss / trainval_sizes[phase]
            epoch_acc = running_corrects.double() / trainval_sizes[phase]

            epoch_score = f1_score(epoch_preds, epoch_labels, average="weighted")
            print(f"{phase} | EPOCH {epoch} Weighted F1 SCORE: {epoch_score}")

            print(
                "[{}] Epoch: {}/{} Loss: {} Acc: {}".format(
                    phase, epoch + 1, epochs, epoch_loss, epoch_acc
                )
            )
            stop_time = timeit.default_timer()
            print("Execution time: " + str(stop_time - start_time) + "\n")

            if epoch_score > best_score and phase == "val":
                print(
                    f"Validation Weighted F1 Score increased ({best_score:.6f} --> {epoch_score:.6f}).  Saving model ..."
                )
                model_path = os.path.join(
                    model_save_dir,
                    saveName
                    + "_epoch-"
                    + str(epoch).zfill(3)
                    + "_epoch_score-{:.6f}.pt".format(epoch_score)
                    + ".pth.tar",
                )
                torch.save(
                    {
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                        "opt_dict": optimizer.state_dict(),
                    },
                    model_path,
                )
                print("Save model at {}\n".format(model_path))
                best_score = epoch_score

In [22]:
if args.train:
    train()
    print("Finish the train")
else:
    print("Skip the train")

  0%|          | 0/267 [00:00<?, ?it/s]


RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/anaconda3/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/anaconda3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 47, in fetch
    return self.collate_fn(data)
  File "/usr/anaconda3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/usr/anaconda3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 83, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/usr/anaconda3/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 55, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: result type Float can't be cast to the desired output type Byte


----------------------------------------------------------------

## Inference

### Load

In [None]:
sampler = samplers.ClipSampler(
    clip_length=args.clip_length, frame_step=args.frame_step, test=True
)
transform = Compose(
    [
        VT.ResizeVideo((224, 224)),
        VT.CollectFrames(),
        VT.PILVideoToTensor(rescale=True, ordering="CTHW"),
    ]
)
test_dataset = datasets.VideoFolderDataset(
    root_path=args.test_dataset_path, transform=transform, sampler=sampler
)
test_dataloader = DataLoader(
    test_dataset, batch_size=args.batch_size, num_workers=args.num_workers
)

In [None]:
runs_pt = os.listdir(model_save_dir)
runs_pt.sort()
model_path = model_save_dir + "/" + runs_pt[-1]  # Latest
checkpoint = torch.load(model_path)
print(f"Initializing weights from: {model_path.split('/')[-1]}...")
model.load_state_dict(checkpoint["state_dict"])
print("Total params: %.2fM" % (sum(p.numel() for p in model.parameters()) / 1000000.0))

### Inference Function

In [None]:
def inference():
    model.eval()
    start_time = timeit.default_timer()

    pred_li = []
    for inputs in tqdm(test_dataloader):
        inputs = inputs.to(device)

        with torch.no_grad():
            outputs = model(inputs)

        probs = nn.Softmax(dim=1)(outputs)
        preds = torch.max(probs, 1)[1]
        pred_li.extend(preds.tolist())

    stop_time = timeit.default_timer()
    print("Execution time: " + str(stop_time - start_time) + "\n")
    return pred_li

cls_li = {
    0: "driveway_walk",
    1: "fall_down",
    2: "fighting",
    3: "jay_walk",
    4: "normal",
    5: "putup_umbrella",
    6: "ride_cycle",
    7: "ride_kick",
    8: "ride_moto",
}

In [None]:
if args.inference:
    pred_li = inference()
    print("Finish the inference")
else:
    print("Skip the inference")

### Submission

In [None]:
submits = sorted(glob.glob(os.path.join(args.submit_path, "submit_*")))
submits.sort()

if len(submits)==0:
    os.makedirs(os.path.join(args.submit_path, "submit_000"), exist_ok=True)
    submits = sorted(glob.glob(os.path.join(args.submit_path, "submit_*")))
    submits.sort()

size = get_dir_size(submits[-1])

if int(size) > 5000: # 디렉토리 용량이 있으면
    submit_id = int(submits[-1].split("_")[-1]) + 1 if runs else 0 # 다음 번째로 저장
else: # 디렉토리 용량이 없으면 거기다 저장
    submit_id = int(submits[-1].split("_")[-1])

print("submit id: ", submit_id)
SAVE_DIR = os.path.join(args.submit_path, "submit_" + str(submit_id).zfill(3))
if int(size) > 5000: # 새로운 디렉토리를 만들어야 할때만 make model directory
    os.makedirs(SAVE_DIR, exist_ok=True)
print("save directory: ", SAVE_DIR)

In [None]:
sample_submission = pd.read_csv(args.submit_path + "/sample_submission.csv")

In [None]:
sample_submission["class"] = [cls_li[int(pred)] for pred in pred_li]
sample_submission.to_csv("submit_{}.csv".format(model_path.split('/')[-1]), index=False)

----------------------------------------------------------------