# keras

In [None]:
import tensorflow as tf

# GPU 메모리 할당 방식 변경
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # 현재 GPU에 할당되어 있는 메모리 양을 동적으로 할당하도록 설정
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

## library

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m84.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m108.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import Sequence
from transformers import TFRobertaPreLayerNormModel, RobertaPreLayerNormConfig

#import tflite_runtime.interpreter as tflite
import numpy as np
import pandas as pd

from tqdm import tqdm
from glob import glob
import random
import os
import json 
import math
import gc

import time

from sklearn.utils import shuffle
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split

from keras import backend as K
from tensorflow.keras.optimizers import AdamW, Adam
from keras.losses import CategoricalCrossentropy
from keras.callbacks import ModelCheckpoint

## utils

In [None]:
def load_relevant_data_subset_with_imputation(args, pq_path):
  data_columns = ['x', 'y', 'z']
  data = pd.read_parquet(pq_path, columns=data_columns)
  data.replace(np.nan, 0, inplace=True)
  n_frames = int(len(data) / args.rows_per_frame)
  data = data.values.reshape(n_frames, args.rows_per_frame, len(data_columns))
  return data.astype(np.float32)

def load_relevant_data_subset(args, pq_path):
  data_columns = ['x', 'y', 'z']
  data = pd.read_parquet(pq_path, columns=data_columns)
  n_frames = int(len(data) / args.rows_per_frame)
  data = data.values.reshape(n_frames, args.rows_per_frame, len(data_columns))
  return data.astype(np.float32)

def read_dict(args, file_path):
  path = os.path.expanduser(file_path)
  with open(path, "r") as f:
    dic = json.load(f)
  return dic

## config

In [None]:
class CustomConfig():

  # training
  seed = 42
  batch_size = 128
  num_workers = 12
  device = 'cuda'
  folder = 'result'
  lr = 1e-3
  epoch_n = 40
  rows_per_frame = 75
  warmup_ratio = 0.2
  max_frame = 100
  data_path = "/content/asl-signs/"
  smoothing = 0.75
  fold_n = 5

  # modeling
  in_features = rows_per_frame * 3
  out_features = 32
  hidden_size = 64
  dense_dim = 512
  num_classes = 250
  drop_rate = 0.4


if __name__ == "__main__":
  args = CustomConfig()

## seed

In [None]:
def seed_everything(seed: int = 1):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)
    keras.utils.set_random_seed(seed)

    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

## load data

In [None]:
data = np.load('/content/drive/MyDrive/Kaggle/aggregation/data_m_with_lip.npy')
label = np.load('/content/drive/MyDrive/Kaggle/aggregation/label.npy')
frame = np.load('/content/drive/MyDrive/Kaggle/aggregation/frame.npy')
batch_id = np.load('/content/drive/MyDrive/Kaggle/aggregation/batch_id.npy')

## preprocess

In [None]:
def preprocess(args):
  participant_ids = np.array([26734, 28656, 16069, 25571, 62590, 32319, 37055, 29302, 49445,
                              36257, 22343, 27610, 61333, 53618, 34503, 18796,  4718, 55372,
                              2044, 37779, 30680])
                            
  df = pd.read_csv('/content/drive/MyDrive/Kaggle/train.csv')#df = pd.DataFrame()
  df['frame'] = frame
  df['label'] = label
  df['original_index'] = np.arange(len(df))
  
  kf = KFold(n_splits = args.fold_n, shuffle = False)

  folds = list()
  for train_index, test_index in kf.split(participant_ids):
    train_ids = participant_ids[train_index]
    test_ids = participant_ids[test_index]

    train_df = df[df['participant_id'].isin(train_ids)].reset_index(drop = True)
    test_df = df[df['participant_id'].isin(test_ids)].reset_index(drop = True)

    col = ['frame', 'label', 'original_index']
    folds.append([train_df[col], test_df[col]])
  return folds, df[col]

if __name__ == "__main__":
  folds, df = preprocess(args)

## feature gen

In [None]:
import torch
import torch.nn as nn

class FeatureGenPytorch(nn.Module):
    def __init__(self):
        super(FeatureGenPytorch, self).__init__()
        self.htriu = torch.tensor([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = torch.float).unsqueeze(0)
        self.ptriu = torch.tensor([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = torch.float).unsqueeze(0)
        self.ltriu = torch.tensor([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = torch.float).unsqueeze(0)
        self.lip_indices = [
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308
            ]
        pass
    
    def forward(self, x):
        x = torch.where(torch.isnan(x), torch.tensor(0.0, dtype=torch.float32), x)

        #lefth_x = x[:, 468:489, :]
        #righth_x = x[:, 522:, :]
        #pose_x = x[:, 489:514, :]
        #lip_x = x[:, self.lip_indices, :]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]
        
        lefth_sum = (lefth_x!=0).float().sum()
        righth_sum = (righth_x!=0).float().sum()
        
        cond = lefth_sum > righth_sum
            
        h_x = torch.where(cond, lefth_x, righth_x)
        xfeat = torch.where(cond, torch.cat([lefth_x, pose_x, lip_x], dim = 1), torch.cat([righth_x, pose_x, lip_x], dim = 1) )

        xfeat_xcoordi = xfeat[:, :, 0]
        xfeat_else = xfeat[:, :, 1:]
        xfeat_xcoordi = torch.where(cond, -xfeat_xcoordi, xfeat_xcoordi)
        xfeat = torch.cat([xfeat_xcoordi.unsqueeze(2), xfeat_else], dim = -1)
        
        h_x = h_x.reshape(h_x.shape[0], -1) 
        indices = (h_x.sum(1) != 0)
        if indices.sum() != 0:
            xfeat = xfeat[indices]

        dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
        
        hand = xfeat[:, :21, :3]
        hd = hand.reshape(-1, 21, 1, 3) - hand.reshape(-1, 1, 21, 3)
        hd = torch.sqrt((hd ** 2).sum(-1)) + 1
        hd = hd * self.htriu
        indices = (hd.reshape(hd.shape[0], -1)!=0)
        hd = hd.reshape(hd.shape[0], -1)[indices].reshape(hd.shape[0], -1)
        hdist = hd - 1
        
        pose = xfeat[:, 21:46, :2]
        pd = pose.reshape(-1, 25, 1, 2) - pose.reshape(-1, 1, 25, 2)
        pd = torch.sqrt((pd ** 2).sum(-1)) + 1
        pd = pd * self.ptriu
        indices = (pd.reshape(pd.shape[0], -1)!=0)
        pd = pd.reshape(pd.shape[0], -1)[indices].reshape(pd.shape[0], -1)
        pdist = pd - 1

        olip = xfeat[:, 46:66, :2]
        old = olip.reshape(-1, 20, 1, 2) - olip.reshape(-1, 1, 20, 2)
        old = torch.sqrt((old ** 2).sum(-1)) + 1
        old = old * self.ltriu
        indices = (old.reshape(old.shape[0], -1)!=0)
        old = old.reshape(old.shape[0], -1)[indices].reshape(old.shape[0], -1)
        oldist = old
        oldist = oldist - 1

        ilip = xfeat[:, 66:86, :2]
        ild = ilip.reshape(-1, 20, 1, 2) - ilip.reshape(-1, 1, 20, 2)
        ild = torch.sqrt((ild ** 2).sum(-1)) + 1
        ild = ild * self.ltriu
        indices = (ild.reshape(ild.shape[0], -1)!=0)
        ild = ild.reshape(ild.shape[0], -1)[indices].reshape(ild.shape[0], -1)
        ildist = ild
        ildist = ildist - 1
        
        
        xfeat = torch.cat([
            xfeat[:, :21, :3].reshape(xfeat.shape[0], -1), 
            xfeat[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            xfeat[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, :21, :3].reshape(xfeat.shape[0], -1), 
            dxyz[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            hdist.reshape(xfeat.shape[0], -1),
            pdist.reshape(xfeat.shape[0], -1),
            oldist.reshape(xfeat.shape[0], -1),
            ildist.reshape(xfeat.shape[0], -1),
        ], dim = -1)
        
        xfeat = xfeat[:100]
        #pad_length = 100 - xfeat.shape[0]
        #xfeat = torch.cat([xfeat, torch.zeros(pad_length, xfeat.shape[1])])
        #xfeat = xfeat.reshape(100, 1196)
        
        return xfeat

feature_converter_pt = FeatureGenPytorch()

In [None]:
import torch
import torch.nn as nn

class FeatureGenPytorchV2(nn.Module):
    def __init__(self):
        super(FeatureGenPytorchV2, self).__init__()
        self.htriu = torch.tensor([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = torch.float).unsqueeze(0)
        self.ptriu = torch.tensor([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = torch.float).unsqueeze(0)
        self.ltriu = torch.tensor([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = torch.float).unsqueeze(0)
        self.lip_indices = [
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308
            ]
        pass
    
    def forward(self, x):
        x = x[:200]
        x = torch.where(torch.isnan(x), torch.tensor(0.0, dtype=torch.float32), x)

        #lefth_x = x[:, 468:489, :]
        #righth_x = x[:, 522:, :]
        #pose_x = x[:, 489:514, :]
        #lip_x = x[:, self.lip_indices, :]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]
        
        lefth_sum = (lefth_x!=0).float().sum()
        righth_sum = (righth_x!=0).float().sum()
        
        cond = lefth_sum > righth_sum
            
        h_x = torch.where(cond, lefth_x, righth_x)
        xfeat = torch.where(cond, torch.cat([lefth_x, pose_x, lip_x], dim = 1), torch.cat([righth_x, pose_x, lip_x], dim = 1) )

        xfeat_xcoordi = xfeat[:, :, 0]
        xfeat_else = xfeat[:, :, 1:]
        xfeat_xcoordi = torch.where(cond, -xfeat_xcoordi, xfeat_xcoordi)
        xfeat = torch.cat([xfeat_xcoordi.unsqueeze(2), xfeat_else], dim = -1)
        
        h_x = h_x.reshape(h_x.shape[0], -1) 
        #indices = (h_x.sum(1) != 0)
        #if indices.sum() != 0:
        #    xfeat = xfeat[indices]
        hand_mask = (h_x.sum(1) != 0)
        if hand_mask.sum()==0:
          print(0)
        token_type_ids = (h_x.sum(1) != 0) + 1

        dxyz = torch.cat([xfeat[:-1] - xfeat[1:], torch.zeros(1, xfeat.shape[1], xfeat.shape[2])], dim = 0)
        
        hand = xfeat[:, :21, :3]
        hd = hand.reshape(-1, 21, 1, 3) - hand.reshape(-1, 1, 21, 3)
        hd = torch.sqrt((hd ** 2).sum(-1)) + 1
        hd = hd * self.htriu
        indices = (hd.reshape(hd.shape[0], -1)!=0)
        hd = hd.reshape(hd.shape[0], -1)[indices].reshape(hd.shape[0], -1)
        hdist = hd - 1
        
        pose = xfeat[:, 21:46, :2]
        pd = pose.reshape(-1, 25, 1, 2) - pose.reshape(-1, 1, 25, 2)
        pd = torch.sqrt((pd ** 2).sum(-1)) + 1
        pd = pd * self.ptriu
        indices = (pd.reshape(pd.shape[0], -1)!=0)
        pd = pd.reshape(pd.shape[0], -1)[indices].reshape(pd.shape[0], -1)
        pdist = pd - 1

        olip = xfeat[:, 46:66, :2]
        old = olip.reshape(-1, 20, 1, 2) - olip.reshape(-1, 1, 20, 2)
        old = torch.sqrt((old ** 2).sum(-1)) + 1
        old = old * self.ltriu
        indices = (old.reshape(old.shape[0], -1)!=0)
        old = old.reshape(old.shape[0], -1)[indices].reshape(old.shape[0], -1)
        oldist = old
        oldist = oldist - 1

        ilip = xfeat[:, 66:86, :2]
        ild = ilip.reshape(-1, 20, 1, 2) - ilip.reshape(-1, 1, 20, 2)
        ild = torch.sqrt((ild ** 2).sum(-1)) + 1
        ild = ild * self.ltriu
        indices = (ild.reshape(ild.shape[0], -1)!=0)
        ild = ild.reshape(ild.shape[0], -1)[indices].reshape(ild.shape[0], -1)
        ildist = ild
        ildist = ildist - 1
        
        
        xfeat = torch.cat([
            xfeat[:, :21, :3].reshape(xfeat.shape[0], -1), 
            xfeat[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            xfeat[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, :21, :3].reshape(xfeat.shape[0], -1), 
            dxyz[:, 21:46, :2].reshape(xfeat.shape[0], -1), 
            dxyz[:, 46:66, :2].reshape(xfeat.shape[0], -1), 
            hdist.reshape(xfeat.shape[0], -1),
            pdist.reshape(xfeat.shape[0], -1),
            oldist.reshape(xfeat.shape[0], -1),
            ildist.reshape(xfeat.shape[0], -1),
            hand_mask.reshape(xfeat.shape[0], -1),
            token_type_ids.reshape(xfeat.shape[0], -1)
        ], dim = -1)
        
        xfeat = xfeat[:200]
        #pad_length = 100 - xfeat.shape[0]
        #xfeat = torch.cat([xfeat, torch.zeros(pad_length, xfeat.shape[1])])
        #xfeat = xfeat.reshape(100, 1196)
        
        return xfeat

feature_converter_pt_v2 = FeatureGenPytorchV2()

In [None]:
import tensorflow as tf
from tensorflow import keras

class FeatureGenKeras(keras.Model):
    def __init__(self):
        super(FeatureGenKeras, self).__init__()
        self.htriu = tf.constant([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = tf.float32)
        self.ptriu = tf.constant([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = tf.float32)
        self.ltriu = tf.constant([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = tf.float32)
        self.lip_indices = tf.constant([
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308
            ])

    
    def call(self, x):
        x = tf.where(tf.math.is_nan(x), tf.constant(0.0, dtype=tf.float32), x)
        xfeat = x[:, 468:, :]

        #lefth_x = x[:, 468:489, :]
        #righth_x = x[:, 522:, :]
        #pose_x = x[:, 489:514, :]
        #lip_x = tf.gather(x, self.lip_indices, axis=1)#x[:, self.lip_indices, :]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]
        
        lefth_sum = tf.reduce_sum(tf.cast(tf.not_equal(lefth_x, 0), dtype=tf.float32))
        righth_sum = tf.reduce_sum(tf.cast(tf.not_equal(righth_x, 0), dtype=tf.float32))
        
        cond = lefth_sum > righth_sum
            
        h_x = tf.where(cond, lefth_x, righth_x)
        xfeat = tf.where(cond, tf.concat([lefth_x, pose_x, lip_x], axis = 1), tf.concat([righth_x, pose_x, lip_x], axis = 1))
        
        xfeat_xcoordi = xfeat[:, :, 0]
        xfeat_else = xfeat[:, :, 1:]
        xfeat_xcoordi = tf.where(cond, -xfeat_xcoordi, xfeat_xcoordi)
        xfeat = tf.concat([xfeat_xcoordi[:, :, tf.newaxis], xfeat_else], axis = -1)
        
        h_x = tf.reshape(h_x, (-1, 21 * 3))
        indices = tf.squeeze(tf.math.reduce_sum(h_x, axis=1) != 0)

        dynamic_size = tf.shape(h_x)[0]
        indices = tf.reshape(indices, (dynamic_size,))

        xfeat = tf.boolean_mask(xfeat, indices)

        dxyz = tf.concat([xfeat[:-1] - xfeat[1:], tf.zeros((1, xfeat.shape[1], xfeat.shape[2]))], axis = 0)
        
        # hand
        hand = xfeat[:, :21, :3]
        hdist = tf.reshape(hand, (-1, 21, 1, 3)) - tf.reshape(hand, (-1, 1, 21, 3))
        hdist = tf.sqrt(tf.reduce_sum(tf.square(hdist), axis=-1)) + 1
        hdist = hdist * self.htriu
        indices = tf.reshape(hdist, (-1, 21 * 21)) != 0
        
        dynamic_size = tf.shape(hdist)[0]
        indices = tf.reshape(indices, (dynamic_size, 21 * 21))
        hdist = tf.boolean_mask(tf.reshape(hdist, (-1, 21 * 21)), indices)
        hdist = hdist - 1
        
        # pose
        pose = xfeat[:, 21:46, :2]
        pdist = tf.reshape(pose, (-1, 25, 1, 2)) - tf.reshape(pose, (-1, 1, 25, 2))
        pdist = tf.sqrt(tf.reduce_sum(tf.square(pdist), axis=-1)) + 1
        pdist = pdist * self.ptriu
        indices = tf.reshape(pdist, (-1, 25 * 25)) != 0
        
        dynamic_size = tf.shape(pdist)[0]
        indices = tf.reshape(indices, (dynamic_size, 25 * 25))
        pdist = tf.boolean_mask(tf.reshape(pdist, (-1, 25 * 25)), indices)
        pdist = pdist - 1
        
        # outlip
        olip = xfeat[:, 46:66, :2]
        oldist = tf.reshape(olip, (-1, 20, 1, 2)) - tf.reshape(olip, (-1, 1, 20, 2))
        oldist = tf.sqrt(tf.reduce_sum(tf.square(oldist), axis=-1)) + 1
        oldist = oldist * self.ltriu
        indices = tf.reshape(oldist, (-1, 20 * 20)) != 0
        
        dynamic_size = tf.shape(oldist)[0]
        indices = tf.reshape(indices, (dynamic_size, 20 * 20))
        oldist = tf.boolean_mask(tf.reshape(oldist, (-1, 20 * 20)), indices)
        oldist = oldist - 1
        
        # inlip
        ilip = xfeat[:, 66:86, :2]
        ildist = tf.reshape(ilip, (-1, 20, 1, 2)) - tf.reshape(ilip, (-1, 1, 20, 2))
        ildist = tf.sqrt(tf.reduce_sum(tf.square(ildist), axis=-1)) + 1
        ildist = ildist * self.ltriu
        indices = tf.reshape(ildist, (-1, 20 * 20)) != 0
        
        dynamic_size = tf.shape(ildist)[0]
        indices = tf.reshape(indices, (dynamic_size, 20 * 20))
        ildist = tf.boolean_mask(tf.reshape(ildist, (-1, 20 * 20)), indices)
        ildist = ildist - 1
        
        xfeat = tf.concat([
            tf.reshape(xfeat[:, :21, :3], [-1, 21 * 3]), 
            tf.reshape(xfeat[:, 21:46, :2], [-1, 25 * 2]), 
            tf.reshape(xfeat[:, 46:66, :2], [-1, 20 * 2]), 
            tf.reshape(dxyz[:, :21, :3], [-1, 21 * 3]), 
            tf.reshape(dxyz[:, 21:46, :2], [-1, 25 * 2]), 
            tf.reshape(dxyz[:, 46:66, :2], [-1, 20 * 2]), 
            tf.reshape(hdist, [-1, 210]),
            tf.reshape(pdist, [-1, 300]),
            tf.reshape(oldist, [-1, 190]),
            tf.reshape(ildist, [-1, 190]),
        ], axis=-1)
        
        xfeat = xfeat[:100]
        #pad_length = 100 - xfeat.shape[0]
        #xfeat = tf.concat([xfeat, tf.zeros((pad_length, xfeat.shape[1]), dtype=tf.float32)], axis = 0)
        xfeat = tf.reshape(xfeat, (1, -1, 1196))
        
        return xfeat

feature_converter_kr = FeatureGenKeras()

In [None]:
import tensorflow as tf
from tensorflow import keras

class FeatureGenKerasV2(keras.Model):
    def __init__(self):
        super(FeatureGenKerasV2, self).__init__()
        self.htriu = tf.constant([[0] * (bi + 1) + [1] * (20 - bi) for bi in range(21)], dtype = tf.float32)
        self.ptriu = tf.constant([[0] * (bi + 1) + [1] * (24 - bi) for bi in range(25)], dtype = tf.float32)
        self.ltriu = tf.constant([[0] * (bi + 1) + [1] * (19 - bi) for bi in range(20)], dtype = tf.float32)
        self.lip_indices = tf.constant([
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308
            ])

    
    def call(self, x):
        x = x[:200]
        x = tf.where(tf.math.is_nan(x), tf.constant(0.0, dtype=tf.float32), x)
        xfeat = x[:, 468:, :]

        #lefth_x = x[:, 468:489, :]
        #righth_x = x[:, 522:, :]
        #pose_x = x[:, 489:514, :]
        #lip_x = tf.gather(x, self.lip_indices, axis=1)#x[:, self.lip_indices, :]

        lefth_x = x[:,40:61,:]
        righth_x = x[:,94:,:]
        pose_x = x[:, 61:86, :]#[:, self.simple_pose]
        lip_x = x[:, :40, :]
        
        lefth_sum = tf.reduce_sum(tf.cast(tf.not_equal(lefth_x, 0), dtype=tf.float32))
        righth_sum = tf.reduce_sum(tf.cast(tf.not_equal(righth_x, 0), dtype=tf.float32))
        
        cond = lefth_sum > righth_sum
            
        h_x = tf.where(cond, lefth_x, righth_x)
        xfeat = tf.where(cond, tf.concat([lefth_x, pose_x, lip_x], axis = 1), tf.concat([righth_x, pose_x, lip_x], axis = 1))
        
        xfeat_xcoordi = xfeat[:, :, 0]
        xfeat_else = xfeat[:, :, 1:]
        xfeat_xcoordi = tf.where(cond, -xfeat_xcoordi, xfeat_xcoordi)
        xfeat = tf.concat([xfeat_xcoordi[:, :, tf.newaxis], xfeat_else], axis = -1)
        
        h_x = tf.reshape(h_x, (-1, 21 * 3))
        indices = tf.squeeze(tf.math.reduce_sum(h_x, axis=1) != 0)

        dynamic_size = tf.shape(h_x)[0]
        #indices = tf.reshape(indices, (dynamic_size,))

        #xfeat = tf.boolean_mask(xfeat, indices)
        indices = tf.reshape(indices, (dynamic_size,))
        indices = tf.cast(indices, dtype = tf.float32)
        hand_mask = indices + 0.0
        token_type_ids = indices + 1.0

        dxyz = tf.concat([xfeat[:-1] - xfeat[1:], tf.zeros((1, xfeat.shape[1], xfeat.shape[2]))], axis = 0)
        
        # hand
        hand = xfeat[:, :21, :3]
        hdist = tf.reshape(hand, (-1, 21, 1, 3)) - tf.reshape(hand, (-1, 1, 21, 3))
        hdist = tf.sqrt(tf.reduce_sum(tf.square(hdist), axis=-1)) + 1
        hdist = hdist * self.htriu
        indices = tf.reshape(hdist, (-1, 21 * 21)) != 0
        
        dynamic_size = tf.shape(hdist)[0]
        indices = tf.reshape(indices, (dynamic_size, 21 * 21))
        hdist = tf.boolean_mask(tf.reshape(hdist, (-1, 21 * 21)), indices)
        hdist = hdist - 1
        
        # pose
        pose = xfeat[:, 21:46, :2]
        pdist = tf.reshape(pose, (-1, 25, 1, 2)) - tf.reshape(pose, (-1, 1, 25, 2))
        pdist = tf.sqrt(tf.reduce_sum(tf.square(pdist), axis=-1)) + 1
        pdist = pdist * self.ptriu
        indices = tf.reshape(pdist, (-1, 25 * 25)) != 0
        
        dynamic_size = tf.shape(pdist)[0]
        indices = tf.reshape(indices, (dynamic_size, 25 * 25))
        pdist = tf.boolean_mask(tf.reshape(pdist, (-1, 25 * 25)), indices)
        pdist = pdist - 1
        
        # outlip
        olip = xfeat[:, 46:66, :2]
        oldist = tf.reshape(olip, (-1, 20, 1, 2)) - tf.reshape(olip, (-1, 1, 20, 2))
        oldist = tf.sqrt(tf.reduce_sum(tf.square(oldist), axis=-1)) + 1
        oldist = oldist * self.ltriu
        indices = tf.reshape(oldist, (-1, 20 * 20)) != 0
        
        dynamic_size = tf.shape(oldist)[0]
        indices = tf.reshape(indices, (dynamic_size, 20 * 20))
        oldist = tf.boolean_mask(tf.reshape(oldist, (-1, 20 * 20)), indices)
        oldist = oldist - 1
        
        # inlip
        ilip = xfeat[:, 66:86, :2]
        ildist = tf.reshape(ilip, (-1, 20, 1, 2)) - tf.reshape(ilip, (-1, 1, 20, 2))
        ildist = tf.sqrt(tf.reduce_sum(tf.square(ildist), axis=-1)) + 1
        ildist = ildist * self.ltriu
        indices = tf.reshape(ildist, (-1, 20 * 20)) != 0
        
        dynamic_size = tf.shape(ildist)[0]
        indices = tf.reshape(indices, (dynamic_size, 20 * 20))
        ildist = tf.boolean_mask(tf.reshape(ildist, (-1, 20 * 20)), indices)
        ildist = ildist - 1
        
        xfeat = tf.concat([
            tf.reshape(xfeat[:, :21, :3], [-1, 21 * 3]), 
            tf.reshape(xfeat[:, 21:46, :2], [-1, 25 * 2]), 
            tf.reshape(xfeat[:, 46:66, :2], [-1, 20 * 2]), 
            tf.reshape(dxyz[:, :21, :3], [-1, 21 * 3]), 
            tf.reshape(dxyz[:, 21:46, :2], [-1, 25 * 2]), 
            tf.reshape(dxyz[:, 46:66, :2], [-1, 20 * 2]), 
            tf.reshape(hdist, [-1, 210]),
            tf.reshape(pdist, [-1, 300]),
            tf.reshape(oldist, [-1, 190]),
            tf.reshape(ildist, [-1, 190]),
            tf.reshape(hand_mask, [-1, 1]),
            tf.reshape(token_type_ids, [-1, 1])
        ], axis=-1)
        
        xfeat = xfeat[:200]
        #pad_length = 100 - xfeat.shape[0]
        #xfeat = tf.concat([xfeat, tf.zeros((pad_length, xfeat.shape[1]), dtype=tf.float32)], axis = 0)
        xfeat = tf.reshape(xfeat, (1, -1, 1198))
        
        return xfeat

feature_converter_kr_v2 = FeatureGenKerasV2()

In [None]:
x = torch.randn(12, 115, 3).numpy()

time_kr = time.time()
print(feature_converter_kr(x).numpy().sum())
print(time.time() - time_kr)

time_pt = time.time()
print(feature_converter_pt(torch.Tensor(x)).numpy().sum())
print(time.time() - time_pt)

time_kr = time.time()
print(feature_converter_kr_v2(x).numpy().sum())
print(time.time() - time_kr)

time_pt = time.time()
print(feature_converter_pt_v2(torch.Tensor(x)).numpy().sum())
print(time.time() - time_pt)

20748.96
0.7069005966186523
20748.96
0.06843876838684082
20784.959
0.03371691703796387
20784.959
0.003103971481323242


## dataset

In [None]:
def cumulative_sum_tuples(lst):
  result = [(0, lst[0])]
  if len(lst) > 0:
    cum_sum = lst[0]
    for i in range(1, len(lst)):
      cum_sum += lst[i]
      result.append((cum_sum-lst[i], cum_sum))
  return result

def pad(self, x, max_frame):
  if x.shape[0] > max_frame:
    padded_x = x[:max_frame]
  else:
    padded_x = torch.zeros(max_frame, x.shape[-1])
    padded_x[:x.shape[0]] = x

  return padded_x

frame = np.load('/content/drive/MyDrive/Kaggle/aggregation/frame.npy')
frame_index = cumulative_sum_tuples(frame.astype(int))
def get_data(df, version):
  frame, label, original_index = df

  start_idx, end_idx = frame_index[int(original_index)]
  x = torch.tensor(data[start_idx:end_idx], dtype = torch.float)
  if version in ['v1', 'v4', 'v5']:
    x = feature_converter_pt(x)
  elif version in ['v2', 'v3']:
    x = feature_converter_pt_v2(x)
  y = tf.one_hot(int(label), 250)

  x = x.numpy()
  y = y.numpy()

  return x, y

def get_inputs(df, version):
  if version in ['v1', 'v4', 'v5']:
    inputs_x = np.zeros((len(df), 100, 1196), dtype=np.float32)
  elif version in ['v2', 'v3']:
    inputs_x = np.zeros((len(df), 200, 1198), dtype=np.float32)
  inputs_y = np.zeros((len(df), 250), dtype=np.float32)

  for i in tqdm(range(len(df))):
    x, y = get_data(df.iloc[i], version)
    inputs_x[i, :x.shape[0]] = x
    inputs_y[i, :] = y
  return inputs_x, inputs_y

class Dataloader(Sequence):
  def __init__(self, args, df, data, batch_size, version, shuffle=False):
    self.args = args
    self.df = df
    self.data = data
    self.batch_size = batch_size
    self.version = version
    self.shuffle = shuffle

    self.on_epoch_end()
    
    self.x, self.y = get_inputs(df, version)

    self.indices = np.arange(len(self.df))

  def __getitem__(self, idx):
    indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]

    batch_x = [self.x[i] for i in indices]
    batch_y = [self.y[i] for i in indices]

    return tf.convert_to_tensor(np.array(batch_x)), tf.convert_to_tensor(np.array(batch_y))

  def on_epoch_end(self):
    self.indices = np.arange(len(self.df))
    if self.shuffle == True:
      np.random.shuffle(self.indices)

  def __len__(self):
      return math.ceil(len(self.df) / self.batch_size)

## model

In [None]:
from transformers import TFRobertaPreLayerNormModel, TFDebertaV2Model, TFGPT2Model, RobertaPreLayerNormConfig, DebertaV2Config, GPT2Config

class CustomModel(keras.Model):
    def __init__(self, args):
        super(CustomModel, self).__init__()
        
        self.args = args
        self.hidden = 300

        self.xy_embeddings = keras.layers.Dense(units=self.hidden, name="xy_embeddings")
        self.motion_embeddings = keras.layers.Dense(units=self.hidden, name="motion_embeddings")
        self.hdist_embeddings = keras.layers.Dense(units=self.hidden, name="hdist_embeddings")
        self.pdist_embeddings = keras.layers.Dense(units=self.hidden, name="pdist_embeddings")
        self.oldist_embeddings = keras.layers.Dense(units=self.hidden, name="oldist_embeddings")
        self.ildist_embeddings = keras.layers.Dense(units=self.hidden, name="ildist_embeddings")
        self.relu = keras.layers.ReLU()
        self.content_embeddings = keras.layers.Dense(units=self.hidden, name="content_embeddings")
        
        if args == 'tfrobertaprelayernorm':
          self.encoder = TFRobertaPreLayerNormModel(
              RobertaPreLayerNormConfig(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  ),
                  name="encoder"
                  )
        elif args == 'tfdebertav2':
          self.encoder = TFDebertaV2Model(
              DebertaV2Config(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  ),
                  name="encoder"
                  )
        
        self.fc = keras.layers.Dense(units=1024, name="fc")
        self.bn = keras.layers.BatchNormalization(name="bn")
        self.relu = keras.layers.ReLU()
        self.drop = keras.layers.Dropout(rate=0.4, name="drop")

        self.out = keras.layers.Dense(units=250, activation='softmax', name="out")

        self.xy_embeddings.kernel_initializer = 'glorot_uniform'
        self.motion_embeddings.kernel_initializer = 'glorot_uniform'
        self.hdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.pdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.oldist_embeddings.kernel_initializer = 'glorot_uniform'
        self.ildist_embeddings.kernel_initializer = 'glorot_uniform'
        self.content_embeddings.kernel_initializer = 'glorot_uniform'
        self.fc.kernel_initializer = 'glorot_uniform'
        self.out.kernel_initializer = 'glorot_uniform'

    def get_att_mask(self, x):
        att_mask = tf.math.reduce_sum(x, axis=-1)
        att_mask = tf.cast(tf.math.not_equal(att_mask, 0), tf.float32)
        return att_mask

    def get_pool(self, x, x_mask):
        x = x * tf.expand_dims(x_mask, axis=-1)  # apply mask
        nonzero_count = tf.reduce_sum(x_mask, axis=1, keepdims=True)  # count nonzero elements
        max_discount = (1-x_mask)*1e10

        apool = tf.reduce_sum(x, axis=1) / nonzero_count
        mpool = tf.reduce_max(x - tf.expand_dims(max_discount, axis=-1), axis=1)
        spool = tf.sqrt((tf.reduce_sum(((x - tf.expand_dims(apool, axis=1)) ** 2) * tf.expand_dims(x_mask, axis=-1), axis=1) / nonzero_count) + 1e-9)
        return tf.concat([apool, mpool, spool], axis=-1)

    def call(self, x):
        x_mask = self.get_att_mask(x)

        xy = self.xy_embeddings(x[:, :, :153])
        motion = self.motion_embeddings(x[:, :, 153:306])
        dist = self.hdist_embeddings(x[:, :, 306:516])
        pdist = self.pdist_embeddings(x[:, :, 516:816])
        oldist = self.oldist_embeddings(x[:, :, 816:1006])
        ildist = self.ildist_embeddings(x[:, :, 1006:1196])

        x = tf.concat([xy, motion, dist, pdist, oldist, ildist], axis=-1)
        x = self.relu(x)
        x = self.content_embeddings(x)
        x = self.encoder(input_ids = None, inputs_embeds=x, attention_mask=x_mask).last_hidden_state

        x = self.get_pool(x, x_mask)

        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)

        x = self.out(x)
        return x

model = CustomModel('tfrobertaprelayernorm')

input_shape = (None, 1196)  # dynamic input shape

# Create a model with an InputLayer to allow dynamic input shape
inputs = keras.layers.Input(shape=input_shape, name='input')
model(inputs)

<KerasTensor: shape=(None, 250) dtype=float32 (created by layer 'custom_model')>

In [None]:
from transformers import TFRobertaPreLayerNormModel, TFDebertaV2Model, RobertaPreLayerNormConfig, DebertaV2Config

class CustomModelV2(keras.Model):
    def __init__(self, args):
        super(CustomModelV2, self).__init__()
        
        self.args = args
        self.hidden = 300

        self.xy_embeddings = keras.layers.Dense(units=self.hidden, name="xy_embeddings")
        self.motion_embeddings = keras.layers.Dense(units=self.hidden, name="motion_embeddings")
        self.hdist_embeddings = keras.layers.Dense(units=self.hidden, name="hdist_embeddings")
        self.pdist_embeddings = keras.layers.Dense(units=self.hidden, name="pdist_embeddings")
        self.oldist_embeddings = keras.layers.Dense(units=self.hidden, name="oldist_embeddings")
        self.ildist_embeddings = keras.layers.Dense(units=self.hidden, name="ildist_embeddings")
        self.relu = keras.layers.ReLU()
        self.content_embeddings = keras.layers.Dense(units=self.hidden, name="content_embeddings")
        
        if args == 'tfrobertaprelayernorm':
          self.encoder = TFRobertaPreLayerNormModel(
              RobertaPreLayerNormConfig(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  type_vocab_size = 3
                  ),
                  name="encoder"
                  )
        elif args == 'tfdebertav2':
          self.encoder = TFDebertaV2Model(
              DebertaV2Config(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  type_vocab_size = 3
                  ),
                  name="encoder"
                  )
        
        self.fc = keras.layers.Dense(units=1024, name="fc")
        self.bn = keras.layers.BatchNormalization(name="bn")
        self.relu = keras.layers.ReLU()
        self.drop = keras.layers.Dropout(rate=0.4, name="drop")

        self.out = keras.layers.Dense(units=250, activation='softmax', name="out")

        self.xy_embeddings.kernel_initializer = 'glorot_uniform'
        self.motion_embeddings.kernel_initializer = 'glorot_uniform'
        self.hdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.pdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.oldist_embeddings.kernel_initializer = 'glorot_uniform'
        self.ildist_embeddings.kernel_initializer = 'glorot_uniform'
        self.content_embeddings.kernel_initializer = 'glorot_uniform'
        self.fc.kernel_initializer = 'glorot_uniform'
        self.out.kernel_initializer = 'glorot_uniform'

    def get_att_mask(self, x):
        att_mask = tf.math.reduce_sum(x, axis=-1)
        att_mask = tf.cast(tf.math.not_equal(att_mask, 0), tf.float32)
        return att_mask

    def get_pool(self, x, x_mask):
        x = x * tf.expand_dims(x_mask, axis=-1)  # apply mask
        nonzero_count = tf.reduce_sum(x_mask, axis=1, keepdims=True)  # count nonzero elements
        max_discount = (1-x_mask)*1e10

        apool = tf.reduce_sum(x, axis=1) / nonzero_count
        mpool = tf.reduce_max(x - tf.expand_dims(max_discount, axis=-1), axis=1)
        spool = tf.sqrt((tf.reduce_sum(((x - tf.expand_dims(apool, axis=1)) ** 2) * tf.expand_dims(x_mask, axis=-1), axis=1) / nonzero_count) + 1e-9)
        return tf.concat([apool, mpool, spool], axis=-1)

    def call(self, x):
        token_type_ids = tf.cast(x[:, :, -1], dtype = tf.int64)
        hand_mask = x[:, :, -2]
        x = x[:, :, :1196]
        
        x_mask = self.get_att_mask(x)

        xy = self.xy_embeddings(x[:, :, :153])
        motion = self.motion_embeddings(x[:, :, 153:306])
        dist = self.hdist_embeddings(x[:, :, 306:516])
        pdist = self.pdist_embeddings(x[:, :, 516:816])
        oldist = self.oldist_embeddings(x[:, :, 816:1006])
        ildist = self.ildist_embeddings(x[:, :, 1006:1196])

        x = tf.concat([xy, motion, dist, pdist, oldist, ildist], axis=-1)
        x = self.relu(x)
        x = self.content_embeddings(x)
        x = self.encoder(input_ids = None, inputs_embeds=x, attention_mask=x_mask, token_type_ids = token_type_ids).last_hidden_state

        x = self.get_pool(x, hand_mask)

        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)

        x = self.out(x)
        return x


model = CustomModelV2('tfrobertaprelayernorm')

input_shape = (None, 1198)  # dynamic input shape

# Create a model with an InputLayer to allow dynamic input shape
inputs = keras.layers.Input(shape=input_shape, name='input')
model(inputs)

<KerasTensor: shape=(None, 250) dtype=float32 (created by layer 'custom_model_v2')>

In [None]:
from transformers import TFRobertaPreLayerNormModel, TFDebertaV2Model, RobertaPreLayerNormConfig, DebertaV2Config

class CustomModelV3(keras.Model):
    def __init__(self, args):
        super(CustomModelV3, self).__init__()
        
        self.args = args
        self.hidden = 300

        self.xy_embeddings = keras.layers.Dense(units=self.hidden, name="xy_embeddings")
        self.motion_embeddings = keras.layers.Dense(units=self.hidden, name="motion_embeddings")
        self.hdist_embeddings = keras.layers.Dense(units=self.hidden, name="hdist_embeddings")
        self.pdist_embeddings = keras.layers.Dense(units=self.hidden, name="pdist_embeddings")
        self.oldist_embeddings = keras.layers.Dense(units=self.hidden, name="oldist_embeddings")
        self.ildist_embeddings = keras.layers.Dense(units=self.hidden, name="ildist_embeddings")
        self.relu = keras.layers.ReLU()
        self.content_embeddings = keras.layers.Dense(units=self.hidden, name="content_embeddings")
        
        if args == 'tfrobertaprelayernorm':
          self.encoder = TFRobertaPreLayerNormModel(
              RobertaPreLayerNormConfig(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  type_vocab_size = 3
                  ),
                  name="encoder"
                  )
        elif args == 'tfdebertav2':
          self.encoder = TFDebertaV2Model(
              DebertaV2Config(
                  hidden_size = self.hidden,
                  num_hidden_layers = 1,
                  num_attention_heads = 4,
                  intermediate_size = 900,
                  hidden_act = 'relu',
                  vocab_size = 3, 
                  type_vocab_size = 3
                  ),
                  name="encoder"
                  )
        
        self.fc = keras.layers.Dense(units=1024, name="fc")
        self.bn = keras.layers.BatchNormalization(name="bn")
        self.relu = keras.layers.ReLU()
        self.drop = keras.layers.Dropout(rate=0.4, name="drop")

        self.out = keras.layers.Dense(units=250, activation='softmax', name="out")

        self.xy_embeddings.kernel_initializer = 'glorot_uniform'
        self.motion_embeddings.kernel_initializer = 'glorot_uniform'
        self.hdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.pdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.oldist_embeddings.kernel_initializer = 'glorot_uniform'
        self.ildist_embeddings.kernel_initializer = 'glorot_uniform'
        self.content_embeddings.kernel_initializer = 'glorot_uniform'
        self.fc.kernel_initializer = 'glorot_uniform'
        self.out.kernel_initializer = 'glorot_uniform'

    def get_att_mask(self, x):
        att_mask = tf.math.reduce_sum(x, axis=-1)
        att_mask = tf.cast(tf.math.not_equal(att_mask, 0), tf.float32)
        return att_mask

    def get_pool(self, x, x_mask):
        x = x * tf.expand_dims(x_mask, axis=-1)  # apply mask
        nonzero_count = tf.reduce_sum(x_mask, axis=1, keepdims=True)  # count nonzero elements
        max_discount = (1-x_mask)*1e10

        apool = tf.reduce_sum(x, axis=1) / nonzero_count
        mpool = tf.reduce_max(x - tf.expand_dims(max_discount, axis=-1), axis=1)
        spool = tf.sqrt((tf.reduce_sum(((x - tf.expand_dims(apool, axis=1)) ** 2) * tf.expand_dims(x_mask, axis=-1), axis=1) / nonzero_count) + 1e-9)
        return tf.concat([apool, mpool, spool], axis=-1)

    def call(self, x):
        token_type_ids = tf.cast(x[:, :, -1], dtype = tf.int64)
        #hand_mask = x[:, :, -2]
        x = x[:, :, :1196]
        
        x_mask = self.get_att_mask(x)

        xy = self.xy_embeddings(x[:, :, :153])
        motion = self.motion_embeddings(x[:, :, 153:306])
        dist = self.hdist_embeddings(x[:, :, 306:516])
        pdist = self.pdist_embeddings(x[:, :, 516:816])
        oldist = self.oldist_embeddings(x[:, :, 816:1006])
        ildist = self.ildist_embeddings(x[:, :, 1006:1196])

        x = tf.concat([xy, motion, dist, pdist, oldist, ildist], axis=-1)
        x = self.relu(x)
        x = self.content_embeddings(x)
        x = self.encoder(input_ids = None, inputs_embeds=x, attention_mask=x_mask, token_type_ids = token_type_ids).last_hidden_state

        x = self.get_pool(x, x_mask)

        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)

        x = self.out(x)
        return x


model = CustomModelV3('tfrobertaprelayernorm')

input_shape = (None, 1198)  # dynamic input shape

# Create a model with an InputLayer to allow dynamic input shape
inputs = keras.layers.Input(shape=input_shape, name='input')
model(inputs)

<KerasTensor: shape=(None, 250) dtype=float32 (created by layer 'custom_model_v3')>

In [None]:
from transformers import TFRobertaPreLayerNormModel, TFDebertaV2Model, TFGPT2Model, RobertaPreLayerNormConfig, DebertaV2Config, GPT2Config

class CustomModelV4(keras.Model):
    def __init__(self, args):
        super(CustomModelV4, self).__init__()
        
        self.args = args
        self.hidden = 512

        self.xy_embeddings = keras.layers.Dense(units=self.hidden, name="xy_embeddings")
        self.motion_embeddings = keras.layers.Dense(units=self.hidden, name="motion_embeddings")
        self.hdist_embeddings = keras.layers.Dense(units=self.hidden, name="hdist_embeddings")
        self.pdist_embeddings = keras.layers.Dense(units=self.hidden, name="pdist_embeddings")
        self.oldist_embeddings = keras.layers.Dense(units=self.hidden, name="oldist_embeddings")
        self.ildist_embeddings = keras.layers.Dense(units=self.hidden, name="ildist_embeddings")
        self.relu = keras.layers.ReLU()
        self.content_embeddings = keras.layers.Dense(units=self.hidden, name="content_embeddings")
        
        if args == 'mlp':
          self.encoder = keras.layers.Dense(units=self.hidden, name="encoder")
        
        self.fc = keras.layers.Dense(units=1024, name="fc")
        self.bn = keras.layers.BatchNormalization(name="bn")
        self.relu = keras.layers.ReLU()
        self.drop = keras.layers.Dropout(rate=0.4, name="drop")

        self.out = keras.layers.Dense(units=250, activation='softmax', name="out")

        self.xy_embeddings.kernel_initializer = 'glorot_uniform'
        self.motion_embeddings.kernel_initializer = 'glorot_uniform'
        self.hdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.pdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.oldist_embeddings.kernel_initializer = 'glorot_uniform'
        self.ildist_embeddings.kernel_initializer = 'glorot_uniform'
        self.content_embeddings.kernel_initializer = 'glorot_uniform'
        self.encoder.kernel_initializer = 'glorot_uniform'
        self.fc.kernel_initializer = 'glorot_uniform'
        self.out.kernel_initializer = 'glorot_uniform'

    def get_att_mask(self, x):
        att_mask = tf.math.reduce_sum(x, axis=-1)
        att_mask = tf.cast(tf.math.not_equal(att_mask, 0), tf.float32)
        return att_mask

    def get_pool(self, x, x_mask):
        x = x * tf.expand_dims(x_mask, axis=-1)  # apply mask
        nonzero_count = tf.reduce_sum(x_mask, axis=1, keepdims=True)  # count nonzero elements
        max_discount = (1-x_mask)*1e10

        apool = tf.reduce_sum(x, axis=1) / nonzero_count
        mpool = tf.reduce_max(x - tf.expand_dims(max_discount, axis=-1), axis=1)
        spool = tf.sqrt((tf.reduce_sum(((x - tf.expand_dims(apool, axis=1)) ** 2) * tf.expand_dims(x_mask, axis=-1), axis=1) / nonzero_count) + 1e-9)
        return tf.concat([apool, mpool, spool], axis=-1)

    def call(self, x):
        x_mask = self.get_att_mask(x)

        xy = self.xy_embeddings(x[:, :, :153])
        motion = self.motion_embeddings(x[:, :, 153:306])
        dist = self.hdist_embeddings(x[:, :, 306:516])
        pdist = self.pdist_embeddings(x[:, :, 516:816])
        oldist = self.oldist_embeddings(x[:, :, 816:1006])
        ildist = self.ildist_embeddings(x[:, :, 1006:1196])

        x = tf.concat([xy, motion, dist, pdist, oldist, ildist], axis=-1)
        x = self.relu(x)
        x = self.content_embeddings(x)
        x = self.relu(x)
        x = self.encoder(x)

        x = self.get_pool(x, x_mask)

        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)

        x = self.out(x)
        return x

model = CustomModelV4('mlp')

input_shape = (None, 1196)  # dynamic input shape

# Create a model with an InputLayer to allow dynamic input shape
inputs = keras.layers.Input(shape=input_shape, name='input')
model(inputs)

<KerasTensor: shape=(None, 250) dtype=float32 (created by layer 'custom_model_v4')>

In [None]:
from transformers import TFRobertaPreLayerNormModel, TFDebertaV2Model, TFGPT2Model, RobertaPreLayerNormConfig, DebertaV2Config, GPT2Config

class CustomModelV5(keras.Model):
    def __init__(self, args):
        super(CustomModelV5, self).__init__()
        
        self.args = args
        self.hidden = 384

        self.xy_embeddings = keras.layers.Dense(units=self.hidden, name="xy_embeddings")
        self.motion_embeddings = keras.layers.Dense(units=self.hidden, name="motion_embeddings")
        self.hdist_embeddings = keras.layers.Dense(units=self.hidden, name="hdist_embeddings")
        self.pdist_embeddings = keras.layers.Dense(units=self.hidden, name="pdist_embeddings")
        self.oldist_embeddings = keras.layers.Dense(units=self.hidden, name="oldist_embeddings")
        self.ildist_embeddings = keras.layers.Dense(units=self.hidden, name="ildist_embeddings")
        self.relu = keras.layers.ReLU()
        self.content_embeddings = keras.layers.Dense(units=self.hidden, name="content_embeddings")
        
        if args == 'gru':
          self.encoder = keras.layers.GRU(self.hidden, return_sequences=True, return_state=True)
        
        self.fc = keras.layers.Dense(units=1024, name="fc")
        self.bn = keras.layers.BatchNormalization(name="bn")
        self.relu = keras.layers.ReLU()
        self.drop = keras.layers.Dropout(rate=0.4, name="drop")

        self.out = keras.layers.Dense(units=250, activation='softmax', name="out")

        self.xy_embeddings.kernel_initializer = 'glorot_uniform'
        self.motion_embeddings.kernel_initializer = 'glorot_uniform'
        self.hdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.pdist_embeddings.kernel_initializer = 'glorot_uniform'
        self.oldist_embeddings.kernel_initializer = 'glorot_uniform'
        self.ildist_embeddings.kernel_initializer = 'glorot_uniform'
        self.content_embeddings.kernel_initializer = 'glorot_uniform'
        self.fc.kernel_initializer = 'glorot_uniform'
        self.out.kernel_initializer = 'glorot_uniform'

    def get_att_mask(self, x):
        att_mask = tf.math.reduce_sum(x, axis=-1)
        att_mask = tf.cast(tf.math.not_equal(att_mask, 0), tf.float32)
        return att_mask

    def get_pool(self, x, x_mask):
        x = x * tf.expand_dims(x_mask, axis=-1)  # apply mask
        nonzero_count = tf.reduce_sum(x_mask, axis=1, keepdims=True)  # count nonzero elements
        max_discount = (1-x_mask)*1e10

        apool = tf.reduce_sum(x, axis=1) / nonzero_count
        mpool = tf.reduce_max(x - tf.expand_dims(max_discount, axis=-1), axis=1)
        spool = tf.sqrt((tf.reduce_sum(((x - tf.expand_dims(apool, axis=1)) ** 2) * tf.expand_dims(x_mask, axis=-1), axis=1) / nonzero_count) + 1e-9)
        return tf.concat([apool, mpool, spool], axis=-1)

    def call(self, x):
        x_mask = self.get_att_mask(x)

        xy = self.xy_embeddings(x[:, :, :153])
        motion = self.motion_embeddings(x[:, :, 153:306])
        dist = self.hdist_embeddings(x[:, :, 306:516])
        pdist = self.pdist_embeddings(x[:, :, 516:816])
        oldist = self.oldist_embeddings(x[:, :, 816:1006])
        ildist = self.ildist_embeddings(x[:, :, 1006:1196])

        x = tf.concat([xy, motion, dist, pdist, oldist, ildist], axis=-1)
        x = self.relu(x)
        x = self.content_embeddings(x)
        x = self.relu(x)
        x, _ = self.encoder(x)

        x = self.get_pool(x, x_mask)

        x = self.fc(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.drop(x)

        x = self.out(x)
        return x

model = CustomModelV5('gru')

input_shape = (None, 1196)  # dynamic input shape

# Create a model with an InputLayer to allow dynamic input shape
inputs = keras.layers.Input(shape=input_shape, name='input')
model(inputs)

<KerasTensor: shape=(None, 250) dtype=float32 (created by layer 'custom_model_v5')>

## scheduler

In [None]:
'''def lr_warmup_cosine_decay(global_step,
                           warmup_steps,
                           hold = 0,
                           total_steps=0,
                           start_lr=0.0,
                           target_lr=1e-3):
    # Cosine decay
    # There is no tf.pi so we wrap np.pi as a TF constant
    learning_rate = 0.5 * target_lr * (1 + tf.cos(tf.constant(np.pi) * (global_step - warmup_steps - hold) / float(total_steps - warmup_steps - hold)))

    # Target LR * progress of warmup (=1 at the final warmup step)
    warmup_lr = target_lr * (global_step / warmup_steps)

    # Choose between `warmup_lr`, `target_lr` and `learning_rate` based on whether `global_step < warmup_steps` and we're still holding.
    # i.e. warm up if we're still warming up and use cosine decayed lr otherwise
    if hold > 0:
        learning_rate = tf.where(global_step > warmup_steps + hold,
                                 learning_rate, target_lr)
    
    learning_rate = tf.where(global_step < warmup_steps, warmup_lr, learning_rate)
    return learning_rate

class WarmUpCosineDecay(keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, start_lr, target_lr, warmup_steps, total_steps, hold):
        super().__init__()
        self.start_lr = start_lr
        self.target_lr = target_lr
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.hold = hold

    def __call__(self, step):
        lr = lr_warmup_cosine_decay(global_step=step,
                                    total_steps=self.total_steps,
                                    warmup_steps=self.warmup_steps,
                                    start_lr=self.start_lr,
                                    target_lr=self.target_lr,
                                    hold=self.hold)

        return tf.where(
            step > self.total_steps, 0.0, lr, name="learning_rate"
        )'''

pass

In [None]:
def lr_warmup_cosine_decay(global_step,
                           warmup_steps,
                           hold = 0,
                           total_steps=0,
                           start_lr=0.0,
                           target_lr=1e-3):
    # Cosine decay
    learning_rate = 0.5 * target_lr * (1 + np.cos(np.pi * (global_step - warmup_steps - hold) / float(total_steps - warmup_steps - hold)))

    # Target LR * progress of warmup (=1 at the final warmup step)
    warmup_lr = target_lr * (global_step / warmup_steps)

    # Choose between `warmup_lr`, `target_lr` and `learning_rate` based on whether `global_step < warmup_steps` and we're still holding.
    # i.e. warm up if we're still warming up and use cosine decayed lr otherwise
    if hold > 0:
        learning_rate = np.where(global_step > warmup_steps + hold,
                                 learning_rate, target_lr)
    
    learning_rate = np.where(global_step < warmup_steps, warmup_lr, learning_rate)
    return learning_rate

class WarmupCosineDecay(keras.callbacks.Callback):
    def __init__(self, total_steps=0, warmup_steps=0, start_lr=0.0, target_lr=1e-3, hold=0):

        super(WarmupCosineDecay, self).__init__()
        self.start_lr = start_lr
        self.hold = hold
        self.total_steps = total_steps
        self.global_step = 0
        self.target_lr = target_lr
        self.warmup_steps = warmup_steps
        self.lrs = []

    def on_batch_end(self, batch, logs=None):
        self.global_step = self.global_step + 1
        lr = self.model.optimizer.lr.numpy()
        self.lrs.append(lr)

    def on_batch_begin(self, batch, logs=None):
        lr = lr_warmup_cosine_decay(global_step=self.global_step,
                                    total_steps=self.total_steps,
                                    warmup_steps=self.warmup_steps,
                                    start_lr=self.start_lr,
                                    target_lr=self.target_lr,
                                    hold=self.hold)
        K.set_value(self.model.optimizer.lr, lr)

class TrainLoggerCallback(tf.keras.callbacks.Callback):
    def __init__(self, log_file, model):
        super().__init__()
        self.log_file = log_file
        self.model = model
    
    def on_train_begin(self, logs=None):
        with open(self.log_file, 'a+') as f:
            f.write("train start! \n")
    
    def on_epoch_end(self, epoch, logs=None):
        with open(self.log_file, 'a') as f:
            f.write(f"epoch : {epoch+1}, lr : {self.model.optimizer.lr.numpy()}, loss : {logs['loss']}, accuracy : {logs['accuracy']}, val_loss : {logs['val_categorical_crossentropy']}, val_accuracy : {logs['val_accuracy']}\n")


## train

In [None]:
def train(train_df, val_df, seed, path, model_name, version, epoch):
  seed_everything(seed)
  print('number of train data : ', len(train_df))
  print('number of val data : ', len(val_df))
  print('seed : ', seed)

  num_train_data =len(train_df)
  batch_size = 128
  num_epochs = epoch
  warmup_ratio = 0.2
  lr = 1e-3
  smoothing = 0.75
  log_file = path + "./log.txt"

  train_loader = Dataloader(args, train_df, data, batch_size, version, shuffle=True)
  val_loader = Dataloader(args, val_df, data, batch_size, version, shuffle=True)

  
  if not os.path.exists(path):
    os.makedirs(path)

  weights_name = "weights/epoch_{epoch:02d}-val_acc_{val_accuracy:.4f}.h5"

  checkpoint = ModelCheckpoint(path + weights_name, 
                              monitor='val_accuracy', 
                              verbose=1, 
                              save_weights_only=True, 
                              mode='max')
                            
  if version == 'v1':
    model = CustomModel(model_name)
    input_shape = (None, 1196) 
  elif version == 'v2':
    model = CustomModelV2(model_name)
    input_shape = (None, 1198) 
  elif version == 'v3':
    model = CustomModelV3(model_name)
    input_shape = (None, 1198) 
  elif version == 'v4':
    model = CustomModelV4(model_name)
    input_shape = (None, 1196) 
  elif version == 'v5':
    model = CustomModelV5(model_name)
    input_shape = (None, 1196) 

  inputs = keras.layers.Input(shape=input_shape, name='input')
  model(inputs)

  total_steps = (num_train_data // batch_size) * num_epochs
  warmup_steps = int(warmup_ratio*total_steps)
  print('total_steps: ', total_steps)
  print('warmup_steps: ', warmup_steps)


  callback = WarmupCosineDecay(total_steps=total_steps, 
                              warmup_steps=warmup_steps,
                              hold=0, 
                              start_lr=0.0, 
                              target_lr=lr)

  logger = TrainLoggerCallback(log_file, model)

  optimizer = AdamW(learning_rate=lr)
  loss = CategoricalCrossentropy(label_smoothing=smoothing, from_logits=False)
  val_loss = CategoricalCrossentropy(label_smoothing=0.0, from_logits=False)


  model.compile(
      optimizer=optimizer,
      loss=loss,
      metrics=['accuracy', val_loss]
  )


  model.fit(
      train_loader ,
      validation_data = val_loader,
      epochs = num_epochs,
      batch_size = batch_size,
      workers = 12, 
      verbose = 1,
      callbacks = [checkpoint, callback, logger]
  )

  #del train_x
  #del train_y
  #del train_dataset
  #del val_x
  #del val_y
  #del val_dataset
  del model
  gc.collect()


## run

In [None]:
!nvidia-smi

Mon May  1 11:11:30 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    48W / 400W |    817MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# version 1
folds, df = preprocess(args)
i = 1
train_df, val_df = df, folds[i][1][:1000]
epoch = 40

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-robertaprelm-ls0.75-adamw-divide-head-emb300-v1/', 
      model_name = 'tfrobertaprelayernorm',
      version = 'v1',
      epoch = epoch)

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-debertav2-ls0.75-adamw-divide-head-emb300-v1/', 
      model_name = 'tfdebertav2',
      version = 'v1',
      epoch = epoch)

In [None]:
# version 2
folds, df = preprocess(args)
i = 1
train_df, val_df = df, folds[i][1][:1000]
epoch = 40

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-robertaprelm-ls0.75-adamw-divide-head-emb300-v2/', 
      model_name = 'tfrobertaprelayernorm',
      version = 'v2',
      epoch = epoch)

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-debertav2-ls0.75-adamw-divide-head-emb300-v2/', 
      model_name = 'tfdebertav2',
      version = 'v2',
      epoch = epoch)

In [None]:
# version 3
folds, df = preprocess(args)
i = 1
train_df, val_df = df, folds[i][1][:1000]
epoch = 40

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-robertaprelm-ls0.75-adamw-divide-head-emb300-v3/', 
      model_name = 'tfrobertaprelayernorm',
      version = 'v3',
      epoch = epoch)

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/version-ensemble/keras-debertav2-ls0.75-adamw-divide-head-emb300-v3/', 
      model_name = 'tfdebertav2',
      version = 'v3',
      epoch = epoch)

In [None]:
# version 4
folds, df = preprocess(args)
i = 1
train_df, val_df = df, folds[i][1][:1000]
epoch = 40

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/keras-mlp-ls0.75-adamw-divide-head-emb512/', 
      model_name = 'mlp',
      version = 'v4',
      epoch = epoch)

In [None]:
# version 5
folds, df = preprocess(args)
i = 1
train_df, val_df = df, folds[i][1][:1000]
epoch = 40

train(train_df = train_df, 
      val_df = val_df,
      seed = 1,
      path = f'/content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/', 
      model_name = 'gru',
      version = 'v5',
      epoch = epoch)

number of train data :  94477
number of val data :  1000
seed :  1


100%|██████████| 94477/94477 [04:46<00:00, 329.90it/s]
100%|██████████| 1000/1000 [00:03<00:00, 287.81it/s]


total_steps:  29520
warmup_steps:  5904
Epoch 1/40
Epoch 1: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_01-val_acc_0.2230.h5
Epoch 2/40
Epoch 2: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_02-val_acc_0.4720.h5
Epoch 3/40
Epoch 3: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_03-val_acc_0.5410.h5
Epoch 4/40
Epoch 4: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_04-val_acc_0.5740.h5
Epoch 5/40
Epoch 5: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_05-val_acc_0.6510.h5
Epoch 6/40
Epoch 6: saving model to /content/drive/MyDrive/Kaggle/model/keras-gru-ls0.75-adamw-divide-head-emb384/weights/epoch_06-val_acc_0.6700.h5
Epoch 7/40
Epoch 7: saving model to /content/drive/MyDrive/Kaggle/

In [None]:
from google.colab import runtime
runtime.unassign()