# pytorch

## library

In [None]:
import pickle
import gc
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import time

from tqdm import tqdm
from glob import glob
import random
import os
import json 
import math

import tensorflow as tf

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

!pip install onnx_tf
!pip install tflite-runtime
!pip install torchinfo

from torchinfo import summary
import onnx
import onnx_tf
from onnx_tf.backend import prepare

from sklearn.utils import shuffle
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split

!pip install transformers
from transformers import BertModel, BertConfig, GPT2Model, GPT2Config, RobertaModel, RobertaConfig
from transformers.optimization import AdamW, get_cosine_schedule_with_warmup

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Could not find a version that satisfies the requirement tflite-runtime (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for tflite-runtime[0m[31m
[0mLooking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## utils

In [None]:
def load_relevant_data_subset_with_imputation(args, pq_path):
  data_columns = ['x', 'y', 'z']
  data = pd.read_parquet(pq_path, columns=data_columns)
  data.replace(np.nan, 0, inplace=True)
  n_frames = int(len(data) / args.rows_per_frame)
  data = data.values.reshape(n_frames, args.rows_per_frame, len(data_columns))
  return data.astype(np.float32)

def load_relevant_data_subset(args, pq_path):
  data_columns = ['x', 'y', 'z']
  data = pd.read_parquet(pq_path, columns=data_columns)
  n_frames = int(len(data) / args.rows_per_frame)
  data = data.values.reshape(n_frames, args.rows_per_frame, len(data_columns))
  return data.astype(np.float32)

def read_dict(args, file_path):
  path = os.path.expanduser(file_path)
  with open(path, "r") as f:
    dic = json.load(f)
  return dic

## config

In [None]:
class CustomConfig():

  # training
  seed = 42
  batch_size = 128
  num_workers = 12
  device = 'cuda'
  folder = 'result'
  lr = 1e-3
  epoch_n = 40
  rows_per_frame = 75
  warmup_ratio = 0.2
  max_frame = 100
  data_path = "/content/asl-signs/"
  smoothing = 0.2
  fold_n = 5

  # modeling
  in_features = rows_per_frame * 3
  out_features = 32
  hidden_size = 64
  dense_dim = 512
  num_classes = 250
  drop_rate = 0.4


if __name__ == "__main__":
  args = CustomConfig()

## seed

In [None]:
def seed_everything(args):
    random.seed(args.seed)
    os.environ['PYTHONHASHSEED'] = str(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

if __name__ == "__main__":
  seed_everything(args)

## aggregate data

In [None]:
'''def collate_fn(batch):
  data = [item["data"] for item in batch]  
  label = [item["label"] for item in batch]  
  frame = [item["frame"] for item in batch]  
  batch_id = [item["batch_id"] for item in batch]  

  return {"data": torch.concat(data, dim = 0), 
          "label": torch.concat(label, dim = 0),  
          "frame": torch.concat(frame, dim = 0),
          "batch_id": torch.concat(batch_id, dim = 0)}

class PreprocessDataset(torch.utils.data.Dataset):
  def __init__(self, args, df):
    self.args = args
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    path = f"{self.args.data_path}{self.df.iloc[idx].path}"

    data = load_relevant_data_subset(args, path)#load_relevant_data_subset_with_imputation(args, path)
    label = self.df.iloc[idx].label
    frame = data.shape[0]
    batch_id = [idx] * frame
  
    return {'data' : torch.tensor(data, dtype = torch.float), 
            'label' : torch.tensor([label], dtype = torch.float), 
            'frame' : torch.tensor([frame], dtype = torch.float),
            'batch_id' : torch.tensor(batch_id, dtype = torch.float)}

if __name__ == "__main__":
  dataset = PreprocessDataset(args, train)
  dataloader = torch.utils.data.DataLoader(dataset, 
                                           batch_size = 128, 
                                           num_workers = 12, 
                                           shuffle = False, 
                                           drop_last = False, 
                                           collate_fn = collate_fn)

  data, label, frame, batch_id = [], [], [], []
  for k, sample in enumerate(tqdm(dataloader)):
    data.append(sample['data'])
    label.append(sample['label'])
    frame.append(sample['frame'])
    batch_id.append(sample['batch_id'])
    

  data = torch.concat(data, dim = 0)
  label = torch.concat(label, dim = 0)
  frame = torch.concat(frame, dim = 0)
  batch_id = torch.concat(batch_id, dim = 0)'''
pass

In [None]:
#data.shape, label.shape, frame.shape, batch_id.shape, train.shape

In [None]:
#np.save('/content/drive/MyDrive/Kaggle/aggregation/data_without_imputation.npy', data.numpy()) 
#np.save('/content/drive/MyDrive/Kaggle/aggregation/label_without_imputation.npy', label.numpy())
#np.save('/content/drive/MyDrive/Kaggle/aggregation/frame_without_imputation.npy', frame.numpy()) 
#np.save('/content/drive/MyDrive/Kaggle/aggregation/batch_id_without_imputation.npy', batch_id.numpy()) 

In [None]:
'''
rightEyebrowUpper: [156, 70, 63, 105, 66, 107, 55, 193],
rightEyebrowLower: [35, 124, 46, 53, 52, 65],
leftEyebrowUpper: [383, 300, 293, 334, 296, 336, 285, 417],
leftEyebrowLower: [265, 353, 276, 283, 282, 295],

rightEyebrow : [156, 70, 63, 105, 66, 107, 55, 193, 35, 124, 46, 53, 52, 65]
leftEyebrow : [383, 300, 293, 334, 296, 336, 285, 417, 265, 353, 276, 283, 282, 295]
'''
pass

In [None]:
'''
lipsUpperOuter: [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291],
lipsLowerOuter: [146, 91, 181, 84, 17, 314, 405, 321, 375, 291],
lipsUpperInner: [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],
lipsLowerInner: [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],
'''
pass

In [None]:
'''LIP = [
    61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308
    ]

lip = data[:, LIP, :]
without_face = data[:, 468:, :]
data_with_lip = np.concatenate([lip, without_face], axis = 1)
np.save('/content/drive/MyDrive/Kaggle/aggregation/data_m_with_lip.npy', data_with_lip) '''
pass

In [None]:
'''LIP = [
    61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308
    ]

EYEBROW = [
    156, 70, 63, 105, 66, 107, 55, 193, 35, 124, 46, 53, 52, 65, 
    383, 300, 293, 334, 296, 336, 285, 417, 265, 353, 276, 283, 282, 295
    ]

lip = data[:, LIP, :]
without_face = data[:, 468:, :]
eyebrow = data[:, EYEBROW, :]
data_with_lip_and_eyebrow = np.concatenate([lip, without_face, eyebrow], axis = 1)
np.save('/content/drive/MyDrive/Kaggle/aggregation/data_m_with_lip_and_eyebrow.npy', data_with_lip_and_eyebrow) '''
pass

In [None]:
data = np.load('/content/drive/MyDrive/Kaggle/aggregation/data_m_with_lip.npy')#np.load('/content/drive/MyDrive/Kaggle/aggregation/data.npy')
label = np.load('/content/drive/MyDrive/Kaggle/aggregation/label.npy')
frame = np.load('/content/drive/MyDrive/Kaggle/aggregation/frame.npy')
batch_id = np.load('/content/drive/MyDrive/Kaggle/aggregation/batch_id.npy')

In [None]:
data.shape

(3583987, 115, 3)

In [None]:
gc.collect()

33

## preprocess

In [None]:
def preprocess(args):
  participant_ids = np.array([26734, 28656, 16069, 25571, 62590, 32319, 37055, 29302, 49445,
                              36257, 22343, 27610, 61333, 53618, 34503, 18796,  4718, 55372,
                              2044, 37779, 30680])
                            
  df = pd.read_csv('/content/drive/MyDrive/Kaggle/train.csv')#df = pd.DataFrame()
  df['frame'] = frame
  df['label'] = label
  df['original_index'] = np.arange(len(df))
  
  kf = KFold(n_splits = args.fold_n, shuffle = False)

  folds = list()
  for train_index, test_index in kf.split(participant_ids):
    train_ids = participant_ids[train_index]
    test_ids = participant_ids[test_index]

    train_df = df[df['participant_id'].isin(train_ids)].reset_index(drop = True)
    test_df = df[df['participant_id'].isin(test_ids)].reset_index(drop = True)

    col = ['frame', 'label', 'original_index']
    folds.append([train_df[col], test_df[col]])
  return folds, df[col]

if __name__ == "__main__":
  folds, df = preprocess(args)

## dataset

In [None]:
# feature version2

class FeatureGen(nn.Module):
    def __init__(self):
        super(FeatureGen, self).__init__()
        self.triu = torch.tensor([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = torch.float).unsqueeze(0)
        self.ptriu = torch.tensor([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = torch.float).unsqueeze(0)
        self.ltriu = torch.tensor([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = torch.float).unsqueeze(0)
        #self.simple_pose = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
        pass
    
    def forward(self, x, mode):
        xfeat = x[:, 0:, :][:200]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]

        if (lefth_x!=0).sum() > (righth_x!=0).sum():
            
            lefth_x[:, :, 0] = -lefth_x[:, :, 0]
            pose_x[:, :, 0] = -pose_x[:, :, 0]
            lip_x[:, :, 0] = -lip_x[:, :, 0]
            xfeat = torch.cat([lefth_x, pose_x, lip_x], dim = 1)
            h_x = lefth_x.reshape(lefth_x.shape[0], -1)
            hand_mask = (h_x.sum(1)!=0)
            token_type_ids = (h_x.sum(1)!=0) + 1
            #if indices.sum() != 0:
            #    xfeat = xfeat[indices]
            
            #if mode == 'aug':
            #  if indices.sum() > 10:
            #      aug_indices = (torch.rand(xfeat.shape[0])>0.00).long()
            #      xfeat = xfeat[aug_indices]

            xfeat = torch.where(torch.isnan(xfeat), torch.tensor(0.0, dtype=torch.float32), xfeat)

            dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
            lhand = xfeat[:, :21, :3]
            ld = lhand.reshape(-1, 21, 1, 3) - lhand.reshape(-1, 1, 21, 3)
            ld = torch.sqrt((ld ** 2).sum(-1)) + 1
            ld = ld * self.triu
            indices = (ld.reshape(ld.shape[0], -1)!=0)
            ld = ld.reshape(ld.shape[0], -1)[indices].reshape(ld.shape[0], -1)
            dist = ld
            dist = dist - 1 
            
        
        else:
            xfeat = torch.cat([righth_x, pose_x, lip_x], dim = 1)
            h_x = righth_x.reshape(righth_x.shape[0], -1)
            hand_mask = (h_x.sum(1)!=0)
            token_type_ids = (h_x.sum(1)!=0) + 1
            #if indices.sum() != 0:
            #    xfeat = xfeat[indices]

            #if mode == 'aug':
            #  if indices.sum() > 10:
            #      aug_indices = (torch.rand(xfeat.shape[0])>0.00).long()
            #      xfeat = xfeat[aug_indices]

            xfeat = torch.where(torch.isnan(xfeat), torch.tensor(0.0, dtype=torch.float32), xfeat)

            dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
            rhand = xfeat[:, :21, :3]
            rd = rhand.reshape(-1, 21, 1, 3) - rhand.reshape(-1, 1, 21, 3)
            rd = torch.sqrt((rd ** 2).sum(-1)) + 1
            rd = rd * self.triu
            indices = (rd.reshape(rd.shape[0], -1)!=0)
            rd = rd.reshape(rd.shape[0], -1)[indices].reshape(rd.shape[0], -1)
            dist = rd
            dist = dist - 1

        pose = xfeat[:, 21:46, :2]
        pd = pose.reshape(-1, 25, 1, 2) - pose.reshape(-1, 1, 25, 2)
        pd = torch.sqrt((pd ** 2).sum(-1)) + 1
        pd = pd * self.ptriu
        indices = (pd.reshape(pd.shape[0], -1)!=0)
        pd = pd.reshape(pd.shape[0], -1)[indices].reshape(pd.shape[0], -1)
        pdist = pd
        pdist = pdist - 1

        olip = xfeat[:, 46:66, :2]
        old = olip.reshape(-1, 20, 1, 2) - olip.reshape(-1, 1, 20, 2)
        old = torch.sqrt((old ** 2).sum(-1)) + 1
        old = old * self.ltriu
        indices = (old.reshape(old.shape[0], -1)!=0)
        old = old.reshape(old.shape[0], -1)[indices].reshape(old.shape[0], -1)
        oldist = old
        oldist = oldist - 1

        ilip = xfeat[:, 66:86, :2]
        ild = ilip.reshape(-1, 20, 1, 2) - ilip.reshape(-1, 1, 20, 2)
        ild = torch.sqrt((ild ** 2).sum(-1)) + 1
        ild = ild * self.ltriu
        indices = (ild.reshape(ild.shape[0], -1)!=0)
        ild = ild.reshape(ild.shape[0], -1)[indices].reshape(ild.shape[0], -1)
        ildist = ild
        ildist = ildist - 1

        xfeat = torch.cat([
            xfeat[:, :21, :3].reshape(xfeat.shape[0], -1), 
            xfeat[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            xfeat[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, :21, :3].reshape(xfeat.shape[0], -1), 
            dxyz[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dist.reshape(xfeat.shape[0], -1),
            pdist.reshape(xfeat.shape[0], -1),
            oldist.reshape(xfeat.shape[0], -1),
            ildist.reshape(xfeat.shape[0], -1),
            token_type_ids.reshape(xfeat.shape[0], -1),
            hand_mask.reshape(xfeat.shape[0], -1)
        ], dim = -1)

        
        return xfeat
    
feature_converter = FeatureGen()

In [None]:
# feature version1

class FeatureGen(nn.Module):
    def __init__(self):
        super(FeatureGen, self).__init__()
        self.triu = torch.tensor([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = torch.float).unsqueeze(0)
        self.ptriu = torch.tensor([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = torch.float).unsqueeze(0)
        self.ltriu = torch.tensor([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = torch.float).unsqueeze(0)
        #self.simple_pose = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
        pass
    
    def forward(self, x, mode):
        xfeat = x[:, 0:, :]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]

        if (lefth_x!=0).sum() > (righth_x!=0).sum():
            
            lefth_x[:, :, 0] = -lefth_x[:, :, 0]
            pose_x[:, :, 0] = -pose_x[:, :, 0]
            lip_x[:, :, 0] = -lip_x[:, :, 0]
            xfeat = torch.cat([lefth_x, pose_x, lip_x], dim = 1)
            h_x = lefth_x.reshape(lefth_x.shape[0], -1)
            indices = (h_x.sum(1)!=0)
            if indices.sum() != 0:
                xfeat = xfeat[indices]


            xfeat = torch.where(torch.isnan(xfeat), torch.tensor(0.0, dtype=torch.float32), xfeat)

            dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
            lhand = xfeat[:, :21, :3]
            ld = lhand.reshape(-1, 21, 1, 3) - lhand.reshape(-1, 1, 21, 3)
            ld = torch.sqrt((ld ** 2).sum(-1)) + 1
            ld = ld * self.triu
            indices = (ld.reshape(ld.shape[0], -1)!=0)
            ld = ld.reshape(ld.shape[0], -1)[indices].reshape(ld.shape[0], -1)
            dist = ld
            dist = dist - 1 
            
        
        else:
            xfeat = torch.cat([righth_x, pose_x, lip_x], dim = 1)
            h_x = righth_x.reshape(righth_x.shape[0], -1)
            indices = (h_x.sum(1)!=0)
            if indices.sum() != 0:
                xfeat = xfeat[indices]



            xfeat = torch.where(torch.isnan(xfeat), torch.tensor(0.0, dtype=torch.float32), xfeat)

            dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
            rhand = xfeat[:, :21, :3]
            rd = rhand.reshape(-1, 21, 1, 3) - rhand.reshape(-1, 1, 21, 3)
            rd = torch.sqrt((rd ** 2).sum(-1)) + 1
            rd = rd * self.triu
            indices = (rd.reshape(rd.shape[0], -1)!=0)
            rd = rd.reshape(rd.shape[0], -1)[indices].reshape(rd.shape[0], -1)
            dist = rd
            dist = dist - 1

        pose = xfeat[:, 21:46, :2]
        pd = pose.reshape(-1, 25, 1, 2) - pose.reshape(-1, 1, 25, 2)
        pd = torch.sqrt((pd ** 2).sum(-1)) + 1
        pd = pd * self.ptriu
        indices = (pd.reshape(pd.shape[0], -1)!=0)
        pd = pd.reshape(pd.shape[0], -1)[indices].reshape(pd.shape[0], -1)
        pdist = pd
        pdist = pdist - 1

        olip = xfeat[:, 46:66, :2]
        old = olip.reshape(-1, 20, 1, 2) - olip.reshape(-1, 1, 20, 2)
        old = torch.sqrt((old ** 2).sum(-1)) + 1
        old = old * self.ltriu
        indices = (old.reshape(old.shape[0], -1)!=0)
        old = old.reshape(old.shape[0], -1)[indices].reshape(old.shape[0], -1)
        oldist = old
        oldist = oldist - 1

        ilip = xfeat[:, 66:86, :2]
        ild = ilip.reshape(-1, 20, 1, 2) - ilip.reshape(-1, 1, 20, 2)
        ild = torch.sqrt((ild ** 2).sum(-1)) + 1
        ild = ild * self.ltriu
        indices = (ild.reshape(ild.shape[0], -1)!=0)
        ild = ild.reshape(ild.shape[0], -1)[indices].reshape(ild.shape[0], -1)
        ildist = ild
        ildist = ildist - 1

        xfeat = torch.cat([
            xfeat[:, :21, :3].reshape(xfeat.shape[0], -1), 
            xfeat[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            xfeat[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, :21, :3].reshape(xfeat.shape[0], -1), 
            dxyz[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dist.reshape(xfeat.shape[0], -1),
            pdist.reshape(xfeat.shape[0], -1),
            oldist.reshape(xfeat.shape[0], -1),
            ildist.reshape(xfeat.shape[0], -1),
        ], dim = -1)
        

        
        return xfeat
    
feature_converter = FeatureGen()

In [None]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self, args, df, data, mode='aug'):
    self.args = args
    self.df = df
    self.data = data
    self.mode = mode
    
    frame = np.load('/content/drive/MyDrive/Kaggle/aggregation/frame.npy')
    self.indices = self.cumulative_sum_tuples(frame.astype(int))

  def __getitem__(self, idx):
    frame, label, original_index  = self.df.iloc[idx]

    start_idx, end_idx = self.indices[int(original_index)]
    
    x = torch.tensor(self.data[start_idx:end_idx], dtype = torch.float)
    x = feature_converter(x, self.mode)
    x = self.pad(x)
    y = torch.tensor(int(label), dtype = torch.long)

    return x, y

  def pad(self, x):
    if x.shape[0] > self.args.max_frame:
      padded_x = x[:self.args.max_frame]
    else:
      padded_x = torch.zeros(self.args.max_frame, x.shape[-1])
      padded_x[:x.shape[0]] = x

    return padded_x

  def cumulative_sum_tuples(self, lst):
    result = [(0, lst[0])]
    if len(lst) > 0:
      cum_sum = lst[0]
      for i in range(1, len(lst)):
        cum_sum += lst[i]
        result.append((cum_sum-lst[i], cum_sum))
    return result

  def __len__(self):
    return len(self.df)

if __name__ == "__main__":
  train_df, val_df = folds[1]
  dataset = CustomDataset(args, val_df, data, 'aug')
  i = random.randint(0, len(val_df)-1)
  sample = dataset[i]
  print(sample[0].shape)
  print(sample[-1])

torch.Size([100, 1196])
tensor(193)


## model

In [None]:
# model version 2, 3

from transformers import RobertaPreLayerNormConfig, RobertaPreLayerNormModel

class CustomModel(nn.Module):
  def __init__(self, args):
    super(CustomModel, self).__init__()
    self.args = args

    self.hidden = 384
     
    self.xy_embeddings = nn.Linear(153, self.hidden)
    self.motion_embeddings = nn.Linear(153, self.hidden)
    self.dist_embeddings = nn.Linear(210, self.hidden)
    self.pdist_embeddings = nn.Linear(300, self.hidden)
    self.oldist_embeddings = nn.Linear(190, self.hidden)
    self.ildist_embeddings = nn.Linear(190, self.hidden)
    self.relu = nn.ReLU()
    self.content_embeddings = nn.Linear(self.hidden * 6, self.hidden)
    
    self.encoder = RobertaPreLayerNormModel(
        RobertaPreLayerNormConfig(
            hidden_size = self.hidden,
            num_hidden_layers = 1,
            num_attention_heads = 4,
            intermediate_size = 1024,
            hidden_act = 'relu',
            type_vocab_size = 3
            )
        )
    
    self.fc = nn.Linear(self.hidden * 3, 1024)
    self.bn = nn.BatchNorm1d(1024)
    self.drop = nn.Dropout(0.4)

    self.out = nn.Linear(1024, 250)
    
    torch.nn.init.xavier_uniform_(self.xy_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.motion_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.dist_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.pdist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.oldist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.ildist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.content_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.fc.weight)  
    torch.nn.init.xavier_uniform_(self.out.weight)  

  def get_att_mask(self, x):
    att_mask = x.sum(-1)
    att_mask = (att_mask!=0).float()
    return att_mask

  def get_pool(self, x, x_mask):
    x = x * x_mask.unsqueeze(-1)  # apply mask
    nonzero_count = x_mask.sum(1).unsqueeze(-1)  # count nonzero elements
    max_discount = (1-x_mask)*1e10

    apool = x.sum(1) / nonzero_count
    mpool, _ = torch.max(x - max_discount.unsqueeze(-1), dim = 1)
    spool = torch.sqrt((torch.sum(((x - apool.unsqueeze(1)) ** 2)*x_mask.unsqueeze(-1), dim = 1) / nonzero_count)+1e-9)
    return torch.cat([apool, mpool, spool], dim = -1)

  def forward(self, x):
    token_type_ids = x[:, :, -2].long()
    hand_mask = x[:, :, -1].long()
    x = x[:, :, :1196]
    x_mask = self.get_att_mask(x)

    xy = self.xy_embeddings(x[:, :, :153])
    motion = self.motion_embeddings(x[:, :, 153:306])
    dist = self.dist_embeddings(x[:, :, 306:516])
    pdist = self.pdist_embeddings(x[:, :, 516:816])
    oldist = self.oldist_embeddings(x[:, :, 816:1006])
    ildist = self.ildist_embeddings(x[:, :, 1006:1196])

    x = torch.cat([xy, motion, dist, pdist, oldist, ildist], dim = -1)
    x = self.relu(x)
    x = self.content_embeddings(x)
    x = self.encoder(inputs_embeds = x, attention_mask = x_mask, token_type_ids = token_type_ids).last_hidden_state
    
    #x = self.get_pool(x, hand_mask)
    x = self.get_pool(x, x_mask)

    x = self.fc(x)
    x = self.bn(x)
    x = self.relu(x)
    x = self.drop(x)

    x = self.out(x)
    return x

if __name__ == "__main__":
  model = CustomModel(args)
  model.eval()
  output = model(sample[0].unsqueeze(0))
  print(output.shape)

torch.Size([1, 250])


In [None]:
# model version 1

from transformers import RobertaPreLayerNormConfig, RobertaPreLayerNormModel

class CustomModel(nn.Module):
  def __init__(self, args):
    super(CustomModel, self).__init__()
    self.args = args

    self.hidden = 384
     
    self.xy_embeddings = nn.Linear(153, self.hidden)
    self.motion_embeddings = nn.Linear(153, self.hidden)
    self.dist_embeddings = nn.Linear(210, self.hidden)
    self.pdist_embeddings = nn.Linear(300, self.hidden)
    self.oldist_embeddings = nn.Linear(190, self.hidden)
    self.ildist_embeddings = nn.Linear(190, self.hidden)
    self.relu = nn.ReLU()
    self.content_embeddings = nn.Linear(self.hidden * 6, self.hidden)
    
    self.encoder = RobertaPreLayerNormModel(
        RobertaPreLayerNormConfig(
            hidden_size = self.hidden,
            num_hidden_layers = 1,
            num_attention_heads = 4,
            intermediate_size = 1024,
            hidden_act = 'relu',
            )
        )
    
    self.fc1 = nn.Linear(self.hidden * 3, 1024)
    self.bn1 = nn.BatchNorm1d(1024)
    self.drop = nn.Dropout(0.4)

    self.out = nn.Linear(1024, 250)
    
    torch.nn.init.xavier_uniform_(self.xy_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.motion_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.dist_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.pdist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.oldist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.ildist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.content_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.fc1.weight)  
    torch.nn.init.xavier_uniform_(self.out.weight)  

  def get_att_mask(self, x):
    att_mask = x.sum(-1)
    att_mask = (att_mask!=0).float()
    return att_mask

  def get_pool(self, x, x_mask):
    x = x * x_mask.unsqueeze(-1)  # apply mask
    nonzero_count = x_mask.sum(1).unsqueeze(-1)  # count nonzero elements
    max_discount = (1-x_mask)*1e10

    apool = x.sum(1) / nonzero_count
    mpool, _ = torch.max(x - max_discount.unsqueeze(-1), dim = 1)
    spool = torch.sqrt((torch.sum(((x - apool.unsqueeze(1)) ** 2)*x_mask.unsqueeze(-1), dim = 1) / nonzero_count)+1e-9)
    return torch.cat([apool, mpool, spool], dim = -1)

  def forward(self, x):
    x_mask = self.get_att_mask(x)

    xy = self.xy_embeddings(x[:, :, :153])
    motion = self.motion_embeddings(x[:, :, 153:306])
    dist = self.dist_embeddings(x[:, :, 306:516])
    pdist = self.pdist_embeddings(x[:, :, 516:816])
    oldist = self.oldist_embeddings(x[:, :, 816:1006])
    ildist = self.ildist_embeddings(x[:, :, 1006:1196])

    x = torch.cat([xy, motion, dist, pdist, oldist, ildist], dim = -1)
    x = self.relu(x)
    x = self.content_embeddings(x)
    x = self.encoder(inputs_embeds = x, attention_mask = x_mask).last_hidden_state
    
    x = self.get_pool(x, x_mask)

    x = self.fc1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.drop(x)

    x = self.out(x)
    return x

if __name__ == "__main__":
  model = CustomModel(args)
  model.eval()
  output = model(sample[0].unsqueeze(0))
  print(output.shape)

torch.Size([1, 250])


In [None]:
# model version 4

class CustomModel(nn.Module):
  def __init__(self, args):
    super(CustomModel, self).__init__()
    self.args = args

    self.hidden = 768
     
    self.xy_embeddings = nn.Linear(153, self.hidden)
    self.motion_embeddings = nn.Linear(153, self.hidden)
    self.dist_embeddings = nn.Linear(210, self.hidden)
    self.pdist_embeddings = nn.Linear(300, self.hidden)
    self.oldist_embeddings = nn.Linear(190, self.hidden)
    self.ildist_embeddings = nn.Linear(190, self.hidden)
    self.relu = nn.ReLU()
    self.content_embeddings = nn.Linear(self.hidden * 6, self.hidden)
    
    self.encoder = nn.Linear(self.hidden, self.hidden)
    
    self.fc1 = nn.Linear(self.hidden * 3, 1024)
    self.bn1 = nn.BatchNorm1d(1024)
    self.drop = nn.Dropout(0.4)

    self.out = nn.Linear(1024, 250)
    
    torch.nn.init.xavier_uniform_(self.xy_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.motion_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.dist_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.pdist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.oldist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.ildist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.content_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.encoder.weight)  
    torch.nn.init.xavier_uniform_(self.fc1.weight)  
    torch.nn.init.xavier_uniform_(self.out.weight)  

  def get_att_mask(self, x):
    att_mask = x.sum(-1)
    att_mask = (att_mask!=0).float()
    return att_mask

  def get_pool(self, x, x_mask):
    x = x * x_mask.unsqueeze(-1)  # apply mask
    nonzero_count = x_mask.sum(1).unsqueeze(-1)  # count nonzero elements
    max_discount = (1-x_mask)*1e10

    apool = x.sum(1) / nonzero_count
    mpool, _ = torch.max(x - max_discount.unsqueeze(-1), dim = 1)
    spool = torch.sqrt((torch.sum(((x - apool.unsqueeze(1)) ** 2)*x_mask.unsqueeze(-1), dim = 1) / nonzero_count)+1e-9)
    return torch.cat([apool, mpool, spool], dim = -1)

  def forward(self, x):
    x_mask = self.get_att_mask(x)

    xy = self.xy_embeddings(x[:, :, :153])
    motion = self.motion_embeddings(x[:, :, 153:306])
    dist = self.dist_embeddings(x[:, :, 306:516])
    pdist = self.pdist_embeddings(x[:, :, 516:816])
    oldist = self.oldist_embeddings(x[:, :, 816:1006])
    ildist = self.ildist_embeddings(x[:, :, 1006:1196])

    x = torch.cat([xy, motion, dist, pdist, oldist, ildist], dim = -1)
    x = self.relu(x)
    x = self.content_embeddings(x)
    x = self.relu(x)
    x = self.encoder(x)
    
    x = self.get_pool(x, x_mask)

    x = self.fc1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.drop(x)

    x = self.out(x)
    return x

if __name__ == "__main__":
  model = CustomModel(args)
  model.eval()
  output = model(sample[0].unsqueeze(0))
  print(output.shape)

torch.Size([1, 250])


In [None]:
# model version 5

from transformers import RobertaPreLayerNormConfig, RobertaPreLayerNormModel

class CustomModel(nn.Module):
  def __init__(self, args):
    super(CustomModel, self).__init__()
    self.args = args

    self.hidden = 384
     
    self.xy_embeddings = nn.Linear(153, self.hidden)
    self.motion_embeddings = nn.Linear(153, self.hidden)
    self.dist_embeddings = nn.Linear(210, self.hidden)
    self.pdist_embeddings = nn.Linear(300, self.hidden)
    self.oldist_embeddings = nn.Linear(190, self.hidden)
    self.ildist_embeddings = nn.Linear(190, self.hidden)
    self.relu = nn.ReLU()
    self.content_embeddings = nn.Linear(self.hidden * 6, self.hidden)
    
    self.encoder = nn.GRU(self.hidden, self.hidden, batch_first = True)
    
    self.fc1 = nn.Linear(self.hidden * 3, 1024)
    self.bn1 = nn.BatchNorm1d(1024)
    self.drop = nn.Dropout(0.4)

    self.out = nn.Linear(1024, 250)
    
    torch.nn.init.xavier_uniform_(self.xy_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.motion_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.dist_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.pdist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.oldist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.ildist_embeddings.weight) 
    torch.nn.init.xavier_uniform_(self.content_embeddings.weight)  
    torch.nn.init.xavier_uniform_(self.fc1.weight)  
    torch.nn.init.xavier_uniform_(self.out.weight)  

  def get_att_mask(self, x):
    att_mask = x.sum(-1)
    att_mask = (att_mask!=0).float()
    return att_mask

  def get_pool(self, x, x_mask):
    x = x * x_mask.unsqueeze(-1)  # apply mask
    nonzero_count = x_mask.sum(1).unsqueeze(-1)  # count nonzero elements
    max_discount = (1-x_mask)*1e10

    apool = x.sum(1) / nonzero_count
    mpool, _ = torch.max(x - max_discount.unsqueeze(-1), dim = 1)
    spool = torch.sqrt((torch.sum(((x - apool.unsqueeze(1)) ** 2)*x_mask.unsqueeze(-1), dim = 1) / nonzero_count)+1e-9)
    return torch.cat([apool, mpool, spool], dim = -1)

  def forward(self, x):
    x_mask = self.get_att_mask(x)

    xy = self.xy_embeddings(x[:, :, :153])
    motion = self.motion_embeddings(x[:, :, 153:306])
    dist = self.dist_embeddings(x[:, :, 306:516])
    pdist = self.pdist_embeddings(x[:, :, 516:816])
    oldist = self.oldist_embeddings(x[:, :, 816:1006])
    ildist = self.ildist_embeddings(x[:, :, 1006:1196])

    x = torch.cat([xy, motion, dist, pdist, oldist, ildist], dim = -1)
    x = self.relu(x)
    x = self.content_embeddings(x)
    x, _ = self.encoder(x)
    
    x = self.get_pool(x, x_mask)

    x = self.fc1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.drop(x)

    x = self.out(x)
    return x

if __name__ == "__main__":
  model = CustomModel(args)
  model.eval()
  output = model(sample[0].unsqueeze(0))
  print(output.shape)

torch.Size([1, 250])


## train

In [None]:
import logging
from copy import deepcopy
from collections import OrderedDict

_logger = logging.getLogger(__name__)

class ModelEma:
    """ Model Exponential Moving Average (DEPRECATED)
    Keep a moving average of everything in the model state_dict (parameters and buffers).
    This version is deprecated, it does not work with scripted models. Will be removed eventually.
    This is intended to allow functionality like
    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    A smoothed version of the weights is necessary for some training schemes to perform well.
    E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
    RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
    smoothing of weights to match results. Pay attention to the decay constant you are using
    relative to your update count per epoch.
    To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
    disable validation of the EMA weights. Validation will have to be done manually in a separate
    process, or after the training stops converging.
    This class is sensitive where it is initialized in the sequence of model init,
    GPU assignment and distributed training wrappers.
    """
    def __init__(self, model, decay=0.9999, device='', resume=''):
        # make a copy of the model for accumulating moving average of weights
        self.ema = deepcopy(model)
        self.ema.eval()
        self.decay = decay
        self.device = device  # perform ema on different device from model if set
        if device:
            self.ema.to(device=device)
        self.ema_has_module = hasattr(self.ema, 'module')
        if resume:
            self._load_checkpoint(resume)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def _load_checkpoint(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        assert isinstance(checkpoint, dict)
        if 'state_dict_ema' in checkpoint:
            new_state_dict = OrderedDict()
            for k, v in checkpoint['state_dict_ema'].items():
                # ema model may have been wrapped by DataParallel, and need module prefix
                if self.ema_has_module:
                    name = 'module.' + k if not k.startswith('module') else k
                else:
                    name = k
                new_state_dict[name] = v
            self.ema.load_state_dict(new_state_dict)
            _logger.info("Loaded state_dict_ema")
        else:
            _logger.warning("Failed to find state_dict_ema, starting from loaded model weights")

    def update(self, model):
        # correct a mismatch in state dict keys
        needs_module = hasattr(model, 'module') and not self.ema_has_module
        with torch.no_grad():
            msd = model.state_dict()
            for k, ema_v in self.ema.state_dict().items():
                if needs_module:
                    k = 'module.' + k
                model_v = msd[k].detach()
                if self.device:
                    model_v = model_v.to(device=self.device)
                ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v)

In [None]:
class CustomTrainer:
  def __init__(self, args, model, train_data, save_dir):
    self.model = model

    #self.model_ema = ModelEma(model, decay=0.9997)

    self.save_dir = save_dir
    if not os.path.exists(self.save_dir):
      os.makedirs(self.save_dir)

    self.log_path = f'{self.save_dir}/log.txt'

    self.optimizer = AdamW(model.parameters(), lr = args.lr)

    self.scaler = torch.cuda.amp.GradScaler() 

    total_steps = int(len(train_data) * args.epoch_n/(args.batch_size))
    warmup_steps = int(total_steps * args.warmup_ratio)
    print('total_steps: ', total_steps)
    print('warmup_steps: ', warmup_steps)

    self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, 
                                                     num_warmup_steps = warmup_steps, 
                                                     num_training_steps = total_steps)
    
    self.loss_fn = nn.CrossEntropyLoss(label_smoothing = args.smoothing)
    self.val_loss_fn = nn.CrossEntropyLoss()

    self.best_score = 0.0

    self.log(f'trainer is ready')


  def run(self, args, train_loader, val_loader):
    for epoch in range(args.epoch_n):
      gc.collect()
      learning_rate = self.optimizer.param_groups[0]['lr']
      print('learning_rate: ', learning_rate)
      print(f'----- train, epoch{epoch + 1} -----')
      train_loss, train_score = self.train_function(args, train_loader)
      print(' ')
      print(f'train_loss: {train_loss:.6f}, train_score: {train_score:.6f}')

      self.log(f'learning_rate: {learning_rate}')
      self.log(f'----- train, epoch{epoch + 1} -----') 
      self.log(' ')
      self.log(f'train_loss: {train_loss:.6f}, train_score: {train_score:.6f}')
      

      print('----------------------------------')

      print(f'----- val, epoch{epoch + 1} -----')
      with torch.no_grad():
        val_loss, val_score = self.val_function(args, val_loader)
      print(' ')
      print(f'val_loss: {val_loss:.6f}, val_score: {val_score:.6f}')

      self.log(f'----- val, epoch{epoch+1} -----') 
      self.log(' ')
      self.log(f'val_loss: {val_loss:.6f}, val_score: {val_score:.6f}')


      if val_score >= self.best_score:
        torch.save(self.model.state_dict(), self.save_dir + '/best-acc-epoch' + f'{epoch+1}'.zfill(3) + '.bin')#torch.save(self.model_ema.ema.state_dict(), self.save_dir + '/best-acc-epoch' + f'{epoch+1}'.zfill(3) + '.bin')
        self.best_score = val_score
        print(f'model is saved when epoch is : {epoch + 1}')
        self.log(f'model is saved when epoch is : {epoch + 1}')

      print('----------------------------------')
      print(' ')
      self.log('----------------------------------')
      self.log(' ')


  def train_function(self, args, train_loader):
      self.model.train()

      total_loss = 0.0
      total_score = 0.0
      for bi, data in enumerate(tqdm(train_loader)):
        data = [x.to(args.device) for x in data]
        video, label = data

        self.optimizer.zero_grad()
        
        #with torch.cuda.amp.autocast():
        out = self.model(video)
        loss = self.loss_fn(out, label.reshape(-1))
        if loss.isnan():
          break
      
        loss.backward()  
        #torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        self.optimizer.step()

        #self.scaler.scale(loss).backward()  
        #self.scaler.step(self.optimizer) 
        #self.scaler.update()   

        #self.model_ema.update(self.model)

        self.scheduler.step()
        

        pred = out.argmax(1).cpu()
        label = label.cpu().detach().numpy()
        batch_acc = accuracy_score(label, pred)

        total_loss += loss.detach().cpu()
        total_score += batch_acc

      return total_loss/len(train_loader), total_score/len(train_loader)

  def val_function(self, args, val_loader):
    self.model.eval()#self.model_ema.ema.eval()

    total_loss = 0.0
    preds, labels = [], []
    for bi, data in enumerate(tqdm(val_loader)):
      data = [x.to(args.device) for x in data]
      video, label = data

      out = self.model(video)#out = self.model_ema.ema(video)

      loss = self.val_loss_fn(out, label.reshape(-1))

      total_loss += loss.detach().cpu() 

      pred = out.argmax(1).detach().cpu().tolist()
      label = label.reshape(-1).detach().cpu().tolist()

      preds.extend(pred)
      labels.extend(label)

    total_score = accuracy_score(labels, preds)
    return total_loss/len(val_loader), total_score

  def log(self, message):
    with open(self.log_path, 'a+') as logger:
      logger.write(f'{message}\n')

## run

In [None]:
# k-fold training

if __name__ == "__main__":
  args = CustomConfig()
  folds, df = preprocess(args)

  for i in range(1, args.fold_n):
    seed_everything(args)

    args.smoothing = 0.75
    args.max_frame = 100
    args.epoch_n = 40
    args.num_workers = 12

    train_df, val_df = folds[i]

    #train_df = train_df[train_df['ratio']>0.7].reset_index(drop = True)
    #val_df = val_df[val_df['ratio']>0.7].reset_index(drop = True)

    train_dataset = CustomDataset(args, train_df, data)
    val_dataset = CustomDataset(args, val_df, data, 'noaug')

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = args.batch_size, num_workers = args.num_workers, shuffle = True, drop_last = True)
    test_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size = args.batch_size, num_workers = args.num_workers, shuffle = False, drop_last = False)

    model = CustomModel(args)
    model = model.to(args.device)
     
    save_dir = f'/content/drive/MyDrive/Kaggle/model/robertaprelm-ls0.75-feature-ldist-divide-head-gru/fold{i + 1}'

    trainer = CustomTrainer(args, model, train_df, save_dir)
    result = trainer.run(args, train_dataloader, test_dataloader)

## inference

In [None]:
# inference test setting

model = CustomModel(args)
checkpoint = torch.load('/content/drive/MyDrive/Kaggle/model/main/robertaprelm-ls0.75-feature-ldist-divide-head/fold2/best-acc-epoch034.bin', map_location = 'cpu')
model.load_state_dict(checkpoint)
model.eval()
pass

label_index = read_dict(args, f"/content/drive/MyDrive/Kaggle/sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
train_df, val_df = folds[1]
dataset = CustomDataset(args, val_df, data, 'aug')

In [None]:
# for multiple run

train_df, val_df = folds[1]
dataset = CustomDataset(args, val_df, data, 'aug')
i = random.randint(0, len(val_df)-1)
sample = dataset[i]

print('true: ', index_label[sample[-1].tolist()])
print('pred: ', index_label[model(sample[0].unsqueeze(0)).argmax(1)[0].tolist()])

true:  pretty
pred:  pretty
