In [1]:
import numpy as np   
import pandas as pd
import random
from sklearn.model_selection import train_test_split, GroupKFold, StratifiedGroupKFold

from typing import Tuple
import torch
from torch import nn
import json
from torch.utils.data import Dataset, DataLoader, SequentialSampler, RandomSampler

from transformers import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup

# import asl_utils
import torch.nn.functional as F
from timeit import default_timer as timer
from scipy.special import softmax
from sklearn.metrics import accuracy_score

import os, gc, pickle, math, time, random, copy, json

data_dir = '/home/rashmi/Documents/kaggle/asl_signs/'
root_dir = '/home/rashmi/Documents/kaggle/asl_signs/input/'
feature_dir = '/home/rashmi/Documents/kaggle/asl_signs/src/exp25/'
ext_dir = '/home/rashmi/Documents/kaggle/asl_signs/input/ext_data/'


In [2]:
# ====================================================
# Directory settings
# ====================================================
import os
EXP_NAME = 'exp81'
OUTPUT_DIR = f'{data_dir}/src/models_' + EXP_NAME + "/"
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

sign_to_label = json.load(open(f"{root_dir}sign_to_prediction_index_map.json", "r"))

np.seterr(invalid='ignore')


LHAND = np.arange(468, 489).tolist()
RHAND = np.arange(522, 543).tolist()
REYE = [
    33, 7, 163, 144, 145, 153, 154, 155, 133,
    246, 161, 160, 159, 158, 157, 173,
]
LEYE = [
    263, 249, 390, 373, 374, 380, 381, 382, 362,
    466, 388, 387, 386, 385, 384, 398,
]


LIP = [
    61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
    291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
    78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
    95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]
SPOSE = [504, 502, 500, 501, 503, 505, 512, 513]
NOSE=[
    1,2,98,327
]

max_length = 256 #512 #256  ## Number of frames to be considered for each video
num_point  = 1050 #960 #82

embed_dim  = 384
num_head   = 8
num_block  = 1

start_lr   = 1e-3

skip_save_epoch = 0
# num_epoch = 50 #200


DEBUG = False

class Config:
    seed = 42
    n_fold = 21
    trn_fold = [0,4,8,9,10,18] #[0,1,2,3,4]
    train = True
    TRAIN_CSV = f'{data_dir}input/folds_data/asl_train_21folds_sgkf.csv'
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    epochs = 50
    apex = False #True
    val_strategy = 'batch'
    val_steps = 1000
    scheduler = 'cosine'
    batch_scheduler=True
    batch_size = 1024 #768 #64 
    num_warmup_steps = 0.02
    num_cycles=0.5
    dropout1 = 0.0

CFG = Config()
df_folds  = pd.read_csv(CFG.TRAIN_CSV) #, nrows=1000)
df_folds.loc[:, 'label'] = df_folds.sign.map(sign_to_label)

true_cols = ['label']
pred_cols = ['pred_label']

df_folds['sequence_id'] = df_folds.index



In [3]:

ROWS_PER_FRAME = 543

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

#assum zero-mean one-std, input
def do_random_affine(xyz,
    scale  = (0.8,1.5),
    shift  = (-0.1,0.1),
    degree = (-15,15),
    p=0.5
):

    if np.random.rand()<p:
        if scale is not None:
            scale = np.random.uniform(*scale)
            xyz = scale*xyz

        if shift is not None:
            shift = np.random.uniform(*shift)
            xyz = xyz + shift

        if degree is not None:
            degree = np.random.uniform(*degree)
            radian = degree/180*np.pi
            c = np.cos(radian)
            s = np.sin(radian)
            rotate = np.array([
                [c,-s],
                [s, c],
            ]).T
            xyz[...,:2] = xyz[...,:2] @rotate

    return xyz

#-----------------------------------------------------
def train_augment(xyz):
    xyz = do_random_affine(
        xyz,
        scale  = (0.7,1.3),
        shift  = (-0.08,0.08),
        degree = (-20,20),
        p=0.5
    )
    return xyz


def pre_process(xyz):
    #xyz = xyz - xyz[~torch.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
    #xyz = xyz / xyz[~torch.isnan(xyz)].std(0, keepdims=True)
    lip   = xyz[:, LIP]
    lhand = xyz[:, LHAND]
    rhand = xyz[:, RHAND]

    xyz = torch.cat([ #(none, 82, 3)
        lip,
        lhand,
        rhand,
    ],1)
    xyz[torch.isnan(xyz)] = 0
    xyz = xyz[:max_length]  ## Limiting number of frames to max_length
    return xyz


    ...
## assume zero mean xyz. so flip can be implement by multuiplication of -1
def do_hflip_hand(lhand, rhand):
    rhand[...,0] *= -1
    lhand[...,0] *= -1
    rhand, lhand = lhand,rhand
    return lhand, rhand

def do_hflip_spose(spose):
    spose[...,0] *= -1
    spose = spose[:,[3,4,5,0,1,2,7,6]]
    return spose

def do_hflip_slip(slip):
    slip[...,0] *= -1
    slip = slip[:,[10,9,8,7,6,5,4,3,2,1,0]+[19,18,17,16,15,14,13,12,11]]
    return slip



In [4]:
offset = (np.arange(1000)- max_length)//2
offset = np.clip(offset,0, 1000).tolist()

triu_index= [
			1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
			14, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26, 27, 28,
			29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
			45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
			58, 59, 60, 61, 62, 67, 68, 69, 70, 71, 72, 73, 74,
			75, 76, 77, 78, 79, 80, 81, 82, 83, 89, 90, 91, 92,
			93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 111,
			112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124,
			125, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
			145, 146, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165,
			166, 167, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187,
			188, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 221,
			222, 223, 224, 225, 226, 227, 228, 229, 230, 243, 244, 245, 246,
			247, 248, 249, 250, 251, 265, 266, 267, 268, 269, 270, 271, 272,
			287, 288, 289, 290, 291, 292, 293, 309, 310, 311, 312, 313, 314,
			331, 332, 333, 334, 335, 353, 354, 355, 356, 375, 376, 377, 397,
			398, 419,
		]


def do_normalise_by_ref(xyz, ref):  
    K = xyz.shape[-1]
    xyz_flat = ref.reshape(-1,K)

    m = np.nanmean(xyz_flat,0).reshape(1,1,K)
    s = np.nanstd(xyz_flat, 0).reshape(1,1,K) # .mean() 
    xyz = xyz - m
    xyz = xyz / s
    return xyz


## assume zero mean xyz. so flip can be implement by multuiplication of -1
def do_hflip_hand(lhand, rhand):
    rhand[...,0] *= -1
    lhand[...,0] *= -1
    rhand, lhand = lhand,rhand
    return lhand, rhand

def do_hflip_spose(spose):
    spose[...,0] *= -1
    spose = spose[:,[3,4,5,0,1,2,7,6]]
    return spose

def do_hflip_slip(slip):
    slip[...,0] *= -1
    slip = slip[:,[10,9,8,7,6,5,4,3,2,1,0]+[19,18,17,16,15,14,13,12,11]]
    return slip

FLIP_LABELS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, \
	27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, \
	55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, \
	84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, \
	110, 113, 114, 115, 116, 117, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, \
	136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 155, 156, 157, 158, \
	159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, \
	181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 198, 199, 200, 201, 203, 205, 206, \
	207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, \
	230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249]


In [5]:
class SignDataset(Dataset):
    def __init__(self, df, augment=None):
        self.df = df
        self.augment = augment
        self.length = len(self.df)

    def __str__(self):
        # num_participant_id = self.df.participant_id.nunique()
        # string = ''
        # string += f'\tlen = {len(self)}\n'
        # string += f'\tnum_participant_id = {num_participant_id}\n'
        # return string
        return "Not working yet"

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]
        
        root_dir_new=root_dir
        pq_file = f'{root_dir_new}{d.path}'

        xyz = load_relevant_data_subset(pq_file)
        xyz = torch.from_numpy(xyz).float()

       
        L = len(xyz)
        if L>max_length:
            #xyz = xyz[:self.max_length] #first
            #xyz = xyz[-self.max_length:] #last
            i = offset[L]
            xyz = xyz[i:i+max_length] #center
        
        L = len(xyz)
        REF = LIP + SPOSE + LHAND + RHAND 
        xyz = do_normalise_by_ref(xyz, xyz[:,REF])

      
        lhand = xyz[:,LHAND]
        rhand = xyz[:,RHAND]
      
        if self.augment is not None:
            # if d.label in FLIP_LABELS:
            if np.random.rand()<0.5:
                lhand, rhand = do_hflip_hand(lhand, rhand)

                xyz[:,LHAND] = lhand
                xyz[:,RHAND] = rhand
        


        # add distance
        lhand2 = lhand[:, :21, :2]
        ld = lhand2.reshape(-1, 21, 1, 2) - lhand2.reshape(-1, 1, 21, 2)
        ld = np.sqrt((ld ** 2).sum(-1))
        ld = ld.reshape(L, -1)
        ld = ld[:,triu_index]
        
        rhand2 = rhand[:, :21, :2]
        rd = rhand2.reshape(-1, 21, 1, 2) - rhand2.reshape(-1, 1, 21, 2)
        rd = np.sqrt((rd ** 2).sum(-1))
        rd = rd.reshape(L, -1)
        rd = rd[:,triu_index]

        xyz = torch.cat([  # (none, 82, 3)
            lhand,
            rhand,
            xyz[:, LIP],
            xyz[:, SPOSE],
            
        ], 1).contiguous()
        
        dxyz = F.pad(xyz[:-1] - xyz[1:], [0, 0, 0, 0, 0, 1])

        a = xyz[1:,:,0] - xyz[:-1,:,0]
        b = xyz[1:,:,1] - xyz[:-1,:,1]
        axyz = ((a**2 + b**2)**0.5)
        axyz = F.pad(axyz, [0, 0, 0, 1])

        x = torch.cat([
            xyz.reshape(L,-1),
            dxyz.reshape(L,-1),
            axyz.reshape(L,-1),  
            rd.reshape(L,-1),
            ld.reshape(L,-1),
        ], -1)
        x[torch.isnan(x)] = 0
                    
       
        r = {}
        r['index'] = d.index
        r['d'    ] = d
        r['xyz'  ] = x
        r['label'] = d.label
        return r
    
    
tensor_key = ['xyz', 'label', 'index']
def null_collate(batch):
    batch_size = len(batch)
    d = {}
    key = batch[0].keys()
    for k in key:
        d[k] = [b[k] for b in batch]
    d['label'] = torch.LongTensor(d['label'])
    return d

In [6]:
def run_check_dataset():

    fold = 0 
    train_df = df_folds[df_folds.fold!=fold].reset_index(drop=True)
    valid_df = df_folds[df_folds.fold==fold].reset_index(drop=True) 

    dataset = SignDataset(train_df, augment=True)
    print(dataset)

    # for i in range(12):
    #     r = dataset[i]
    #     print(r['index'], '--------------------')
    #     print(r["d"], '\n')
    #     for k in tensor_key:
    #         if k =='label': continue
    #         v = r[k]
    #         print(k)
    #         print('\t', 'dtype:', v.dtype)
    #         print('\t', 'shape:', v.shape)
    #         if len(v)!=0:
    #             print('\t', 'min/max:', v.min().item(),'/', v.max().item())
    #             print('\t', 'is_contiguous:', v.is_contiguous())
    #             print('\t', 'values:')
    #             print('\t\t', v.reshape(-1)[:5].data.numpy().tolist(), '...')
    #             print('\t\t', v.reshape(-1)[-5:].data.numpy().tolist())
    #     print('')
       

    loader = DataLoader(
        dataset,
        sampler=SequentialSampler(dataset),
        batch_size=8,
        drop_last=True,
        num_workers=0,
        pin_memory=False,
        worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn=null_collate,
    )
    print(f'batch_size   : {loader.batch_size}')
    print(f'len(loader)  : {len(loader)}')
    print(f'len(dataset) : {len(dataset)}')
    print('')

    for t, batch in enumerate(loader):
        if t > 0: break
        print('batch ', t, '===================')
        print('index', batch['index'])

        for k in tensor_key:
            v = batch[k]

            if k =='label':
                print('label:')
                print('\t', v.data.numpy().tolist())

            if k =='x':
                print('x:')
                print('\t', v.data.shape)

            if k =='xyz':
                print('xyz:')
                for i in range(len(v)):
                    print('\t', v[i].shape)

        if 1:
            pass
        print('')


# main #################################################################
if 1: #DEBUG:
    run_check_dataset()

Not working yet
batch_size   : 8
len(loader)  : 11227
len(dataset) : 89821

index [Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object'), Index(['path', 'participant_id', 'sequence_id', 'sign', 'fold', 'label'], dtype='object')]
xyz:
	 torch.Size([23, 1050])
	 torch.Size([11, 1050])
	 torch.Size([105, 1050])
	 torch.Size([12, 1050])
	 torch.Size([18, 1050])
	 torch.Size([30, 1050])
	 torch.Size([23, 1050])
	 torch.

In [7]:
num_class  = 250
point_dim = num_point

def pack_seq(
    seq,):
    length = [min(len(s), max_length)  for s in seq]
    batch_size = len(seq)
    K = seq[0].shape[1]
    L = max(length)

    x = torch.zeros((batch_size, L, point_dim)).to(seq[0].device)
    x_mask = torch.zeros((batch_size, L)).to(seq[0].device)
    for b in range(batch_size):
        l = length[b]
        x[b, :l] = seq[b][:l,:]
        x_mask[b, l:] = 1
    x_mask = (x_mask>0.5)

    return x, x_mask

def positional_encoding(length, embed_dim):
    dim = embed_dim//2
    position = np.arange(length)[:, np.newaxis]     # (seq, 1)
    dim = np.arange(dim)[np.newaxis, :]/dim   # (1, dim)
    angle = 1 / (10000**dim)         # (1, dim)
    angle = position * angle    # (pos, dim)
    pos_embed = np.concatenate(
        [np.sin(angle), np.cos(angle)],
        axis=-1
    )
    pos_embed = torch.from_numpy(pos_embed).float()
    return pos_embed


class XEmbed(nn.Module):
    def __init__(self,
    ):
        super().__init__()
        self.v = nn.Sequential(
            nn.Linear(point_dim, embed_dim*2, bias=True),
            nn.LayerNorm(embed_dim*2),
            nn.ReLU(inplace=True),
            nn.Linear(embed_dim*2, embed_dim, bias=True),
            nn.LayerNorm(embed_dim),
            nn.ReLU(inplace=True),
        )
    def forward(self, x, x_mask):
        B,L,_ = x.shape
        v = self.v(x)
        x = v
        return x, x_mask

class TransformerBlock(nn.Module):
    def __init__(self,
        embed_dim,
        num_head,
        out_dim,
    ):
        super().__init__()
        self.attn  = MyMultiHeadAttention(
            embed_dim=embed_dim,
            out_dim=embed_dim,
            qk_dim=embed_dim // num_head,
            v_dim=embed_dim // num_head,
            num_head=num_head,

        )
        self.ffn   = FeedForward(embed_dim, out_dim)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(out_dim)

    def forward(self, x, x_mask=None):
        x = x + self.attn((self.norm1(x)), x_mask)
        x = x + self.ffn((self.norm2(x)))
        return x
    
class MyMultiHeadAttention(nn.Module):
    def __init__(self,
            embed_dim,
            out_dim,
            qk_dim,
            v_dim,
            num_head,
        ):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_head  = num_head
        self.qk_dim = qk_dim
        self.v_dim  = v_dim

        self.q = nn.Linear(embed_dim, qk_dim*num_head)
        self.k = nn.Linear(embed_dim, qk_dim*num_head)
        self.v = nn.Linear(embed_dim, v_dim*num_head)

        self.out = nn.Linear(v_dim*num_head, out_dim)
        self.scale = 1/(qk_dim**0.5)

    #https://github.com/pytorch/pytorch/issues/40497
    def forward(self, x, x_mask):
        B,L,dim = x.shape
        #out, _ = self.mha(x,x,x, key_padding_mask=x_mask)
        num_head = self.num_head
        qk_dim = self.qk_dim
        v_dim = self.v_dim

        q = self.q(x)
        k = self.k(x)
        v = self.v(x)
        q = q.reshape(B, L, num_head, qk_dim).permute(0,2,1,3).contiguous()
        k = k.reshape(B, L, num_head, qk_dim).permute(0,2,3,1).contiguous()
        v = v.reshape(B, L, num_head, v_dim ).permute(0,2,1,3).contiguous()

        dot = torch.matmul(q, k) *self.scale  # H L L
        x_mask = x_mask.reshape(B,1,1,L).expand(-1,num_head,L,-1)
        #dot[x_mask]= -1e4
        dot.masked_fill_(x_mask, -1e4)
        attn = F.softmax(dot, -1)    # L L

        v = torch.matmul(attn, v)  # L H dim
        v = v.permute(0,2,1,3).reshape(B,L, v_dim*num_head).contiguous()
        out = self.out(v)

        return out


class FeedForward(nn.Module):
    def __init__(self, embed_dim, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.LayerNorm(embed_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, embed_dim),
        )
    def forward(self, x):
        return self.mlp(x)
    
class Net(nn.Module):

    def __init__(self, num_class=num_class):
        super().__init__()
        self.output_type = ['inference', 'loss']

        self.x_embed = XEmbed()

        pos_embed = positional_encoding(max_length,16) #max_length, embed_dim)
        self.pos_embed = nn.Parameter(pos_embed) 

        # Linear layer to map pos_enc_dim to emb_dim
        self.pos_enc_linear = nn.Linear(16, embed_dim)

        self.cls_embed = nn.Parameter(torch.zeros((1, embed_dim)))

        self.encoder = nn.ModuleList([
            TransformerBlock(
                embed_dim,
                num_head,
                embed_dim,
            ) for i in range(num_block)
        ])

        self.seq_layer = nn.Sequential(
            nn.Linear(embed_dim*2, embed_dim),
            nn.BatchNorm1d(embed_dim),
            nn.ReLU()
        )
        self.class_layer = nn.Linear(embed_dim, num_class)
        

    def forward(self, batch):
        xyz = batch['xyz']

        #----
        x, x_mask = pack_seq(xyz)
        x, x_mask = self.x_embed(x, x_mask)
        B,L,_ = x.shape

        x = x + self.pos_enc_linear(self.pos_embed[:L].unsqueeze(0))

        x = torch.cat([
            self.cls_embed.unsqueeze(0).repeat(B,1,1),
            x
        ],1)
        x_mask = torch.cat([
            torch.zeros(B,1).to(x_mask),
            x_mask
        ],1)
        
        for block in self.encoder:
            x = block(x,x_mask)
        x = F.dropout(x,p=CFG.dropout1,training=self.training)

        #---
        #mask pool
        x_mask = x_mask.unsqueeze(-1)
        x_mask = 1-x_mask.float()
        last = (x*x_mask).sum(1)/x_mask.sum(1)
       
        
        x = torch.cat([x[:,0],last],1)
        
        x = self.seq_layer(x)
        x = F.dropout(x,p=CFG.dropout1, training=self.training)
        logit = self.class_layer(x)

        output = {}
        output['logit'] = logit
 
        if 'inference' in self.output_type:
            output['sign'] = torch.softmax(logit,-1)

        return output


def run_check_net():

    length = [12,16,20,180]
    batch_size = len(length)
    xyz = [
        np.random.uniform(-1,1,(length[b],num_point)) for b in range(batch_size)
    ]
    #---
    batch = {
        'label' : torch.from_numpy( np.random.choice(250,(batch_size))).to(CFG.device).long(),
        'xyz' : [torch.from_numpy(x).to(CFG.device).float() for x in xyz]
    }

    net = Net().to(CFG.device)
    output = net(batch)


    #---

    print('batch')
    for k, v in batch.items():
        if k in ['label','x']:
            print(f'{k:>32} : {v.shape} ')
        if k=='xyz':
            print(f'{k:>32} : {v[0].shape} ')
            for i in range(1,len(v)):
                print(f'{" ":>32} : {v[i].shape} ')

    print('output')
    for k, v in output.items():
        if 'loss' not in k:
            print(f'{k:>32} : {v.shape} ')
    print('loss')
    for k, v in output.items():
        if 'loss' in k:
            print(f'{k:>32} : {v.item()} ')



# main #################################################################
if 1: #DEBUG:
    run_check_net()


batch
                           label : torch.Size([4]) 
                             xyz : torch.Size([12, 1050]) 
                                 : torch.Size([16, 1050]) 
                                 : torch.Size([20, 1050]) 
                                 : torch.Size([180, 1050]) 
output
                           logit : torch.Size([4, 250]) 
                            sign : torch.Size([4, 250]) 
loss


In [8]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

"""# Random seed"""

def seed_everything(seed, use_cuda = True):
    np.random.seed(seed) # cpu vars
    torch.manual_seed(seed) # cpu  vars
    random.seed(seed) # Python
    os.environ['PYTHONHASHSEED'] = str(seed) 
    if use_cuda:
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True  
        torch.backends.cudnn.benchmark = False


def get_logger(filename=OUTPUT_DIR+'train'):
    from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

seed_everything(CFG.seed)

In [9]:
def get_score(outputs,targets): #outputs=preds, targets=groundtruth
    return accuracy_score(targets, outputs)


# ====================================================
# scheduler
# ====================================================
def get_scheduler(cfg, optimizer, num_train_steps):
    if cfg.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
        )
    elif cfg.scheduler == 'cosine':
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
    return scheduler

In [10]:

def np_cross_entropy(probability, truth):
	p = np.clip(probability,1e-4,1-1e-4)
	logp = -np.log(p)
	loss = logp[np.arange(len(logp)),truth]
	loss = loss.mean()
	return loss

def valid_fn(model, valid_loader, valid_idx, iteration):

	valid_num = 0
	valid_sign = []
	valid_loss = 0

	model = model.eval()
	start_timer = timer()
	for t, batch in enumerate(valid_loader):
		
		model.output_type = ['inference']
		with torch.no_grad():
			with torch.cuda.amp.autocast(enabled = True):
				
				batch_size = len(batch['index'])
				batch['xyz'] = [xyz.to(CFG.device) for xyz in batch['xyz']]
				output = model(batch)  

		valid_sign.append(output['sign'].cpu().numpy())
		valid_num += batch_size



	assert(valid_num == len(valid_loader.dataset))
	#------
	truth = valid_loader.dataset.df.label.values
	sign = np.concatenate(valid_sign)
	predict = np.argsort(-sign, -1)
	correct = predict==truth.reshape(valid_num,1)
	topk = correct.cumsum(-1).mean(0)[:5]

	loss = np_cross_entropy(sign, truth)
	
	output_map = {}
	for x, y in zip(valid_idx, sign):
		output_map[x] = y

	return [loss, topk[0], topk[1],  topk[4], sign, output_map]


if DEBUG:
	fold  = 4
	valid_df = df_folds[df_folds.fold==fold].reset_index(drop=True) 

	valid_idx = valid_df.sequence_id.tolist()
	valid_dataset = SignDataset(valid_df,)

	valid_loader = DataLoader(
		valid_dataset, shuffle=False,
		sampler = SequentialSampler(valid_dataset),
		batch_size  = 64,
		drop_last   = False,
		num_workers = 8,
		pin_memory  = False,
		collate_fn = null_collate,
	)

	cfile = '/home/rashmi/Documents/kaggle/asl_signs/src/models_exp4/asl_model_fold1.pth'
	cfile = '/home/rashmi/Documents/kaggle/asl_signs/src/models_exp18/asl_model_fold4.pth'
	f = torch.load(cfile, map_location=lambda storage, loc: storage)
	state_dict = f['model']
	# state_dict['pos_embed'] = state_dict['pos_embed'][:max_length]

	model = Net().to(CFG.device)
	model.load_state_dict(state_dict, strict=False) 
	model.eval()
	valid_info= valid_fn(model, valid_loader, valid_idx, iteration=100)
	print(accuracy_score(valid_df.label,np.argmax(valid_info[4],axis=1)))

In [11]:
def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, \
             valid_loader=None, valid_idx=None, best_score=0, len_train=94000):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0

    train_loss_sum = 0.
    train_correct = 0
    train_total = 0
    
    LOGGER.info(f"========== Epoch: {epoch} training ==========")

    for step, batch in enumerate(train_loader):
        model.train()
        gc.collect()
        
        xyz = batch['xyz']
        x = [torch.Tensor(x).to(CFG.device).float() for x in xyz]
        y = torch.Tensor(batch['label']).long().to(CFG.device)
        b = {'label' : y, 'xyz': x}

        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_pred = model(b)
            loss = criterion(y_pred['logit'], y)
        
        if CFG.apex:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        
        optimizer.zero_grad()
        global_step += 1
        if CFG.batch_scheduler:
            scheduler.step()

        train_loss_sum += loss.item()
        train_correct += np.sum((np.argmax(y_pred['logit'].detach().cpu().numpy(), axis=1) == y.cpu().numpy()))
        train_total += 1

    end = time.time()
    LOGGER.info(f"Epoch:{epoch} > Train Loss: {(train_loss_sum/train_total):.04f}, Train Acc: {train_correct/len_train:0.04f}")
    return train_loss_sum, best_score

    

In [12]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
     
    seed_everything(seed=CFG.seed)

    LOGGER.info(f"========== fold: {fold} training ==========")

    device = CFG.device
    
    train_df = folds[folds.fold!=fold].reset_index(drop=True)
    valid_df = folds[folds.fold==fold].reset_index(drop=True) 

    valid_idx = valid_df.sequence_id.tolist()
    train_dataset = SignDataset(train_df,train_augment)
    valid_dataset = SignDataset(valid_df,)

    len_train = len(train_dataset)
    train_loader  = DataLoader(
        train_dataset,
        sampler = RandomSampler(train_dataset),
        #sampler = BalanceSampler(train_dataset),
        batch_size  = CFG.batch_size,
        drop_last   = True,
        num_workers = 8,
        pin_memory  = True,
        worker_init_fn = lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn = null_collate,
    )

    valid_loader = DataLoader(
        valid_dataset, shuffle=False,
        sampler = SequentialSampler(valid_dataset),
        batch_size  = 64,
        drop_last   = False,
        num_workers = 8,
        pin_memory  = True,
        collate_fn = null_collate,
    )

    model = Net().to(CFG.device)
    
    optimizer = torch.optim.AdamW( model.parameters(), lr=start_lr, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.75)
    num_train_steps = int(len(train_df) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)
    
    num_iteration = CFG.epochs*len(train_loader)
    scaler = torch.cuda.amp.GradScaler(enabled = CFG.apex)	

    best_score = 0

    for epoch in range(CFG.epochs):
        start_time = time.time()
        
        if epoch < 15:
            CFG.dropout1=0.0
        else:
            if epoch >=15 and epoch <=35:
                CFG.dropout1=0.4
            else:
                CFG.dropout1=0.2
        
        # train and validate
        avg_loss, best_score = train_fn(fold, train_loader, model, criterion, optimizer, epoch, \
                                        scheduler, device, valid_loader, valid_idx, best_score, len_train)

        # eval
        valid_info =  valid_fn(model, valid_loader, valid_idx, iteration=100) 
        # scoring
        valid_df = df_folds[df_folds.sequence_id.isin(valid_idx)].copy()
        output_map = valid_info[5]
        valid_df.loc[:, 'pred_label'] = valid_df['sequence_id'].apply(lambda x: np.argmax(output_map[x]))

        valid_labels = valid_df[true_cols].values
        valid_preds = valid_df[pred_cols].values
        score = get_score(np.argmax(valid_info[4],axis=1),valid_labels)
        save_preds = valid_df[pred_cols].values 
        elapsed = time.time() - start_time

        LOGGER.info(f"Epoch:{epoch} > Valid Loss: {(valid_info[0]):.04f}, top(1): {(valid_info[1]):.04f},top(2): {(valid_info[2]):.04f}, top(5): {(valid_info[3]):.04f}, time: {elapsed:.0f}s ")
    
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': save_preds},
                        OUTPUT_DIR+f"asl_model_fold{fold}.pth")
                                        
    predictions = torch.load(OUTPUT_DIR+f"asl_model_fold{fold}.pth", 
                             map_location=torch.device('cpu'))['predictions']
    
    valid_df['pred_label'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_df


In [13]:
if __name__ == '__main__':   
    
    def get_result(oof_df):
        labels = oof_df[true_cols].values
        preds = oof_df[pred_cols].values
        score = get_score(preds, labels)
        LOGGER.info(f'Score: {score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(df_folds, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                _oof_df.to_csv(OUTPUT_DIR+f'oof_df{fold}.csv',index=False)
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_DIR+'oof_df.pkl')
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv',index=False)
        

Epoch:0 > Train Loss: 5.3758, Train Acc: 0.2016
Epoch:0 > Valid Loss: 3.6831, top(1): 0.4141,top(2): 0.5595, top(5): 0.7255, time: 86s 
Epoch 0 - Save Best Score: 0.4141 Model
Epoch:1 > Train Loss: 5.1370, Train Acc: 0.5305
Epoch:1 > Valid Loss: 3.0141, top(1): 0.5788,top(2): 0.7079, top(5): 0.8288, time: 88s 
Epoch 1 - Save Best Score: 0.5788 Model
Epoch:2 > Train Loss: 5.0393, Train Acc: 0.6387
Epoch:2 > Valid Loss: 2.8763, top(1): 0.6130,top(2): 0.7395, top(5): 0.8578, time: 86s 
Epoch 2 - Save Best Score: 0.6128 Model
Epoch:3 > Train Loss: 4.9923, Train Acc: 0.6921
Epoch:3 > Valid Loss: 2.6777, top(1): 0.6366,top(2): 0.7657, top(5): 0.8737, time: 87s 
Epoch 3 - Save Best Score: 0.6362 Model
Epoch:4 > Train Loss: 4.9647, Train Acc: 0.7230
Epoch:4 > Valid Loss: 2.7192, top(1): 0.6641,top(2): 0.7788, top(5): 0.8784, time: 86s 
Epoch 4 - Save Best Score: 0.6643 Model
Epoch:5 > Train Loss: 4.9421, Train Acc: 0.7504
Epoch:5 > Valid Loss: 2.5910, top(1): 0.6845,top(2): 0.7998, top(5): 0.8