### **Version 3**: adding inference for saved models
### **Version 4**: adding fine-tuning pipeline

In [2]:
import os

import json
from tqdm import tqdm
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn

In [3]:
max_length = 80
num_point  = 82

embed_dim  = 512
num_head   = 4
num_block  = 1
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
import numpy as np
import torch

num_class  = 250
num_landmark = 543

class HardSwish(nn.Module):
    def __init__(self,):
        super().__init__()
    def forward(self, x):
        return x * F.relu6(x+3) * 0.16666667

class FeedForward(nn.Module):
    def __init__(self, embed_dim, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(hidden_dim, embed_dim),
        )
    def forward(self, x):
        return self.mlp(x)

#https://pytorch.org/docs/stable/generated/torch.nn.MultiheadAttention.html
class MultiHeadAttention(nn.Module):
    def __init__(self,
            embed_dim,
            num_head,
            batch_first,
        ):
        super().__init__()
        self.mha = nn.MultiheadAttention(
            embed_dim,
            num_heads=num_head,
            bias=True,
            add_bias_kv=False,
            kdim=None,
            vdim=None,
            dropout=0.0,
            batch_first=batch_first,
        )

    def forward(self, x, x_mask):
        out, _ = self.mha(x,x,x, key_padding_mask=x_mask)
        return out

class TransformerBlock(nn.Module):
    def __init__(self,
        embed_dim,
        num_head,
        out_dim,
        batch_first=True,
    ):
        super().__init__()
        self.attn  = MultiHeadAttention(embed_dim, num_head,batch_first)
        self.ffn   = FeedForward(embed_dim, out_dim)
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(out_dim)

    def forward(self, x, x_mask=None):
        x = x + self.attn((self.norm1(x)), x_mask)
        x = x + self.ffn((self.norm2(x)))
        return x


def positional_encoding(length, embed_dim):
    dim = embed_dim//2
    position = np.arange(length)[:, np.newaxis]     # (seq, 1)
    dim = np.arange(dim)[np.newaxis, :]/dim   # (1, dim)
    angle = 1 / (10000**dim)         # (1, dim)
    angle = position * angle    # (pos, dim)
    pos_embed = np.concatenate(
        [np.sin(angle), np.cos(angle)],
        axis=-1
    )
    pos_embed = torch.from_numpy(pos_embed).float()
    return pos_embed

def pack_seq(
    seq,
):
    length = [min(s.shape[0], max_length)  for s in seq]
    batch_size = len(seq)
    K = seq[0].shape[1]
    L = max(length)

    x = torch.zeros((batch_size, L, K, 3)).to(seq[0].device)
    x_mask = torch.zeros((batch_size, L)).to(seq[0].device)
    for b in range(batch_size):
        l = length[b]
        x[b, :l] = seq[b][:l]
        x_mask[b, l:] = 1
    x_mask = (x_mask>0.5)
    x = x.reshape(batch_size,-1,K*3)
    return x, x_mask

#########################################################################

class Net(nn.Module):

    def __init__(self, num_class=num_class):
        super().__init__()
        self.output_type = ['inference', 'loss']

        pos_embed = positional_encoding(max_length, embed_dim)
        # self.register_buffer('pos_embed', pos_embed)
        self.pos_embed = nn.Parameter(pos_embed)

        self.cls_embed = nn.Parameter(torch.zeros((1, embed_dim)))
        self.x_embed = nn.Sequential(
            nn.Linear(num_point * 3, embed_dim, bias=False),
        )

        self.encoder = nn.ModuleList([
            TransformerBlock(
                embed_dim,
                num_head,
                embed_dim,
            ) for i in range(num_block)
        ])
        self.logit = nn.Linear(embed_dim, num_class)

    def forward(self, batch):
        xyz = batch['xyz']
        x, x_mask = pack_seq(xyz)
        #print(x.shape, x_mask.shape)
        B,L,_ = x.shape
        x = self.x_embed(x)
        x = x + self.pos_embed[:L].unsqueeze(0)

        x = torch.cat([
            self.cls_embed.unsqueeze(0).repeat(B,1,1),
            x
        ],1)
        x_mask = torch.cat([
            torch.zeros(B,1).to(x_mask),
            x_mask
        ],1)


        #x = F.dropout(x,p=0.25,training=self.training)
        for block in self.encoder:
            x = block(x,x_mask)

        cls = x[:,0]
        cls = F.dropout(cls,p=0.4,training=self.training)
        logit = self.logit(cls)

        output = {}
        if 'loss' in self.output_type:
            output['label_loss'] = F.cross_entropy(logit, batch['label'])

        if 'inference' in self.output_type:
            output['sign'] = torch.softmax(logit,-1)

        return output





def run_check_net():

    length = [3,4]
    batch_size = len(length)
    xyz = [
        np.random.uniform(-1,1,(length[b],num_point,3)) for b in range(batch_size)
    ]
    #---
    batch = {
        'label' : torch.from_numpy( np.random.choice(250,(batch_size))).long(),
        'xyz' : [torch.from_numpy(x).float() for x in xyz]
    }

    net = Net()
    output = net(batch)


    #---
    """

    print('batch')
    for k, v in batch.items():
        if k in ['label','x']:
            print(f'{k:>32} : {v.shape} ')
        if k=='xyz':
            print(f'{k:>32} : {v[0].shape} ')
            for i in range(1,len(v)):
                print(f'{" ":>32} : {v[i].shape} ')

    print('output')
    for k, v in output.items():
        if 'loss' not in k:
            print(f'{k:>32} : {v.shape} ')
    print(output['sign'])
    print('loss')
    for k, v in output.items():
        if 'loss' in k:
            print(f'{k:>32} : {v.item()} ')

"""

# main #################################################################
if __name__ == '__main__':
    run_check_net()

# Dataset

In [5]:
# additional helper functions
ROWS_PER_FRAME = 543
def load_relevant_data_subset(pq_path, type='parquet'):
    data_columns = ['x', 'y', 'z']
    if type=='parquet':    
        data = pd.read_parquet(pq_path, columns=data_columns)
    else:
        data = pd.read_csv(pq_path, usecols=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [6]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedGroupKFold
from torch.utils.data import SequentialSampler, RandomSampler

def read_kaggle_csv_by_random(fold=0):
    num_fold = 5

    kaggle_df = pd.read_csv('/kaggle/input/asl-demo/train_prepared.csv')
    train_df = kaggle_df[kaggle_df.fold!=fold].reset_index(drop=True)
    valid_df = kaggle_df[kaggle_df.fold==fold].reset_index(drop=True)
    return train_df, valid_df

def read_kaggle_csv_by_part(fold=0):
    num_fold = 5

    kaggle_df = pd.read_csv('/kaggle/input/asl-demo/train_prepared.csv')
    kaggle_df.loc[:, 'fold' ] = -1

    sgkf = StratifiedGroupKFold(n_splits=num_fold, random_state=123, shuffle=True)
    for i, (train_index, valid_index) in enumerate(sgkf.split(kaggle_df.path, kaggle_df.label, kaggle_df.participant_id)):
        kaggle_df.loc[valid_index,'fold'] = i

    #kaggle_df.loc[:, 'fold'] = np.arange(len(kaggle_df))%num_fold
    train_df = kaggle_df[kaggle_df.fold!=fold].reset_index(drop=True)
    valid_df = kaggle_df[kaggle_df.fold==fold].reset_index(drop=True)
    return train_df, valid_df

def read_christ_csv_by_part(fold=0):
    kaggle_df = pd.read_csv('/kaggle/input/asl-demo/train_prepared.csv')
    christ_df = kaggle_df
    
    christ_df = christ_df.merge(kaggle_df[['path']], on='path',validate='1:1') # also kaggle_df['num_frame'] was there removed it
    valid_df = christ_df[christ_df.fold==fold].reset_index(drop=True)
    train_df = christ_df[christ_df.fold!=fold].reset_index(drop=True)
    return train_df, valid_df


def pre_process(xyz):
    #xyz = xyz - xyz[~torch.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
    #xyz = xyz / xyz[~torch.isnan(xyz)].std(0, keepdims=True)
    
    LIP = [
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
        ]
    
    lip   = xyz[:, LIP]
    lhand = xyz[:, 468:489]
    rhand = xyz[:, 522:543]
    xyz = torch.cat([ #(none, 82, 3)
        lip,
        lhand,
        rhand,
    ],1)
    xyz[torch.isnan(xyz)] = 0
    xyz = xyz[:max_length]
    return xyz


#-----------------------------------------------------
def train_augment(xyz):
    xyz = do_random_affine(
        xyz,
        scale  = (0.7,1.3),
        shift  = (-0.08,0.08),
        degree = (-20,20),
        p=0.8
    )
    return xyz


class SignDataset(Dataset):
    def __init__(self, df, augment=None):
        self.df = df
        self.augment = augment
        self.length = len(self.df)

    def __str__(self):
        num_participant_id = self.df.participant_id.nunique()
        string = ''
        string += f'\tlen = {len(self)}\n'
        string += f'\tnum_participant_id = {num_participant_id}\n'
        return string

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]

        pq_file = f'/kaggle/input/asl-signs/{d.path}'
        xyz = load_relevant_data_subset(pq_file)
#         print(xyz)
        xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
        xyz = xyz / xyz[~np.isnan(xyz)].std(0, keepdims=True)

        #--
#         if self.augment is not None:
#             xyz = self.augment(xyz)
#         print(xyz)
        #--
        xyz = torch.from_numpy(xyz).float()
        xyz = pre_process(xyz)

        r = {}
        r['index'] = index
        r['d'    ] = d
        r['xyz'  ] = xyz
        r['label'] = d.label
        return r


tensor_key = ['xyz', 'label']
def null_collate(batch):
    batch_size = len(batch)
    d = {}
    key = batch[0].keys()
    for k in key:
        d[k] = [b[k] for b in batch]
    d['label'] = torch.LongTensor(d['label'])
    return d




#################################################################################

def run_check_dataset():

    train_df, valid_df = read_kaggle_csv_by_part(fold=0)
    dataset = SignDataset(valid_df)
    print(dataset)

    for i in range(12):
        r = dataset[i]
        print(r['index'], '--------------------')
        print(r["d"], '\n')
        for k in tensor_key:
            if k =='label': continue
            v = r[k]
            print(k)
            print('\t', 'dtype:', v.dtype)
            print('\t', 'shape:', v.shape)
            if len(v)!=0:
                print('\t', 'min/max:', v.min().item(),'/', v.max().item())
                print('\t', 'is_contiguous:', v.is_contiguous())
                print('\t', 'values:')
                print('\t\t', v.reshape(-1)[:5].data.numpy().tolist(), '...')
                print('\t\t', v.reshape(-1)[-5:].data.numpy().tolist())
        print('')
        if 0:
            #draw
            cv2.waitKey(1)



    loader = DataLoader(
        dataset,
        sampler=SequentialSampler(dataset),
        batch_size=8,
        drop_last=True,
        num_workers=0,
        pin_memory=False,
        worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn=null_collate,
    )
    print(f'batch_size   : {loader.batch_size}')
    print(f'len(loader)  : {len(loader)}')
    print(f'len(dataset) : {len(dataset)}')
    print('')

    for t, batch in enumerate(loader):
        if t > 5: break
        print('batch ', t, '===================')
        print('index', batch['index'])

        for k in tensor_key:
            v = batch[k]

            if k =='label':
                print('label:')
                print('\t', v.data.numpy().tolist())

            if k =='x':
                print('x:')
                print('\t', v.data.shape)

            if k =='xyz':
                print('xyz:')
                for i in range(len(v)):
                    print('\t', v[i].shape)

        if 1:
            pass
        print('')


# main #################################################################
if __name__ == '__main__':
    run_check_dataset()

  kaggle_df.loc[:, 'fold' ] = -1


	len = 22959
	num_participant_id = 5

0 --------------------
path                      train_landmark_files/49445/1000397667.parquet
participant_id                                                    49445
sequence_id                                                  1000397667
sign                                                             vacuum
landmark_file_path    /home/user/Data/asl-signs/train_landmark_files...
npy_file_path         /home/user/Data/asl-signs/train_features_npy_f...
label                                                               231
fold                                                                  0
Name: 0, dtype: object 

xyz
	 dtype: torch.float32
	 shape: torch.Size([33, 82, 3])
	 min/max: -1.4137358665466309 / 1.5335360765457153
	 is_contiguous: True
	 values:
		 [0.4290144741535187, 0.43524935841560364, -0.8092206120491028, 0.4351654648780823, 0.4245232045650482] ...
		 [1.5183755159378052, -0.9306944608688354, 0.30079182982444763, 1.5102177858352661

# Train

In [7]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES']='0'

In [8]:
# additional helper functions 2
#assum zero-mean one-std, input
def do_random_affine(xyz, scale=(0.8, 1.5), shift=(-0.1, 0.1), degree=(-15, 15), p=0.5):
    transformed_xyz = np.copy(xyz)  # Create a copy of xyz to store transformed values

    if np.random.rand() < p:
        if scale is not None:
            scale_factor = np.random.uniform(*scale)
            transformed_xyz = scale_factor * transformed_xyz

        if shift is not None:
            shift_value = np.random.uniform(*shift)
            transformed_xyz = transformed_xyz + shift_value

        if degree is not None:
            degree_value = np.random.uniform(*degree)
            radian = degree_value / 180 * np.pi
            c = np.cos(radian)
            s = np.sin(radian)
            rotate = np.array([[c, -s], [s, c]]).T
            transformed_xyz[..., :2] = transformed_xyz[..., :2] @ rotate

    return transformed_xyz
            
def get_learning_rate(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
def time_to_str(t, mode='min'):
    if mode=='min':
        t  = int(t)/60
        hr = t//60
        min = t%60
        return '%2d hr %02d min'%(hr,min)

    elif mode=='sec':
        t   = int(t)
        min = t//60
        sec = t%60
        return '%2d min %02d sec'%(min,sec)

    else:
        raise NotImplementedError


### RAdam and Lookahead

In [9]:
import torch
from torch.optim import Optimizer
from collections import defaultdict
import math
from timeit import default_timer as timer

In [10]:
def np_cross_entropy(probability, truth):
    p = np.clip(probability,1e-4,1-1e-4)
    logp = -np.log(p)
    loss = logp[np.arange(len(logp)),truth]
    loss = loss.mean()
    return loss

In [11]:
from torch.optim import AdamW
from tqdm import tqdm

fold_type = 'christ-part'
fold     = 2
out_dir  = '/kaggle/input' + '/result/run10/transfomer-80-256-lip-hand-3a'
fold_dir = out_dir+ f'/fold-{fold}-{fold_type}'

start_lr = 1e-4
batch_size = 64
num_epoch = 16
save_iter = 2

train_df, valid_df = read_christ_csv_by_part(fold)
train_dataset = SignDataset(train_df,train_augment)
valid_dataset = SignDataset(valid_df,)

train_loader  = DataLoader(
        train_dataset,
        sampler = RandomSampler(train_dataset),
        #sampler = BalanceSampler(train_dataset),
        batch_size  = batch_size,
        drop_last   = True,
        num_workers = 2,
        pin_memory  = False,
        worker_init_fn = lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn = null_collate,
    )

valid_loader = DataLoader(
        valid_dataset,
        sampler = SequentialSampler(valid_dataset),
        batch_size  = 64,
        drop_last   = False,
        num_workers = 2,
        pin_memory  = False,
        collate_fn = null_collate,
    )

net = Net()
#net = nn.DataParallel(net)
scaler = torch.cuda.amp.GradScaler(enabled = True) # To scale the extremely small gradients
net.to(device)
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net.parameters()),lr=start_lr)


# Load the model from a checkpoint: 

In [12]:
def load_checkpoint():
    start_epoch = 0
    initial_checkpoint = r"/kaggle/input/islr-self/00000030.model.pth"
    f = torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)
    start_epoch = f.get('epoch',0)+1
    state_dict = f['state_dict']
    net.load_state_dict(state_dict,strict=False)
    optimizer.load_state_dict(f['optimizer_dict'])
    scaler.load_state_dict(f['scheduler_dict'])
    print(f"Starting from Epoch: {start_epoch}")

# if __name__=="main":
#     load_checkpoint()

In [13]:
def run_train():
    for i in tqdm(range(start_epoch , num_epoch+start_epoch)): # change to num_epochs
        net.train()
        train_sign = []
        train_truth = []
        train_num = 0
        train_loss_sum = 0
        train_total = 0
        for t, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            rate = get_learning_rate(optimizer)

            with torch.cuda.amp.autocast(enabled = True):
                batch_size = len(batch['index'])
                batch['xyz'] = [xyz.cuda() for xyz in batch['xyz']]
                batch['label'] = batch['label'].cuda()
                #y = batch['label']
                #x = batch['xyz']
                net.output_type = ['loss', 'inference']
                output = net(batch)
                loss0  = output['label_loss'].mean()


            train_sign.append(output['sign'].detach().cpu().numpy())
            train_truth.append(batch['label'].detach().cpu().numpy())
            train_num += batch_size

            optimizer.zero_grad()
            scaler.scale(loss0).backward()

            scaler.step(optimizer)
            scaler.update()

            train_loss_sum += loss0.item()
            train_total += 1

        train_truth = np.concatenate(train_truth)
        sign = np.concatenate(train_sign)
        topk_indices = np.argsort(sign, -1)[:,-5:]
        correct_topk = np.any(np.equal(topk_indices, train_truth.reshape(-1, 1)), axis=1)
        topk_accuracy_train = np.mean(correct_topk)

        val_loss_sum = 0
        val_total = 0
        valid_num = 0
        valid_sign=[]
        net.eval()

        for t, batch in tqdm(enumerate(valid_loader),total=len(valid_loader)):

            net.output_type = ['inference']
            with torch.no_grad():
                with torch.cuda.amp.autocast(enabled = True):
                    batch_size = len(batch['index'])
                    batch['xyz'] = [xyz.cuda() for xyz in batch['xyz']]
                    batch['label'] = batch['label'].cuda()
                    output = net(batch)
                    valid_loss = np_cross_entropy(output['sign'].detach().cpu(), batch['label'].detach().cpu())
                    val_loss_sum += valid_loss
                    val_total +=1
                valid_sign.append(output['sign'].detach().cpu().numpy())
                valid_num += batch_size

        valid_truth = valid_loader.dataset.df.label.values
        sign = np.concatenate(valid_sign)
        topk_indices = np.argsort(sign, -1)[:,-5:]
        correct_topk = np.any(np.equal(topk_indices, valid_truth.reshape(-1, 1)), axis=1)
        topk_accuracy_valid = np.mean(correct_topk)

        print(f"Epoch:{i} => Train Loss: {(train_loss_sum/train_total):.04f}, Train Acc: {topk_accuracy_train:0.04f}")
        print(f"Epoch:{i} => Val Loss: {(val_loss_sum/val_total):.04f}, Val Acc: {topk_accuracy_valid:0.04f}")
        print("="*50)

        if i%save_iter==0:
            if i != start_epoch:
                n = i
                torch.save({
                           'state_dict': net.state_dict(),
                            'epoch': i,
                            'optimizer_dict': optimizer.state_dict(),
                            'scheduler_dict':scaler.state_dict(),
                            }, f'{n:08d}.model.pth')
        torch.cuda.empty_cache()
        
# if __name__ == "__main__":
#     run_train()

In [15]:
net = Net()
net.load_state_dict(torch.load('/kaggle/input/gislr-saved-models/00000038.model.pth', map_location= torch.device('cuda'))['state_dict'])

<All keys matched successfully>

In [16]:
def run_inference_on_random_example():
    kaggle_df = pd.read_csv('/kaggle/input/asl-demo/train_prepared.csv')
    randno = np.random.randint(len(kaggle_df))
    valid_df = kaggle_df[kaggle_df.index==randno].reset_index(drop=True)
    valid_dataset = SignDataset(valid_df,)
    print("Actual label: ",valid_df.iloc[0]['label'])
    valid_loader = DataLoader(
        valid_dataset,
        sampler = SequentialSampler(valid_dataset),
        batch_size  = 1,
        drop_last   = False,
        num_workers = 2,
        pin_memory  = False,
        collate_fn = null_collate,
    )
    
    for t, batch in enumerate(valid_loader):
        net.output_type = ['inference']
        with torch.no_grad():
            with torch.cuda.amp.autocast(enabled = True):
                output = net(batch)
                top_values, top_indices = torch.topk(output['sign'].detach().cpu(), k=5)
                print(top_indices)
                print(top_values)

if __name__=="__main__":
    run_inference_on_random_example()

Actual label:  53
tensor([[ 53, 192, 180,  74, 184]])
tensor([[0.7807, 0.0864, 0.0636, 0.0144, 0.0139]])


#### **Inference on a single file (dataframe)**

In [17]:
class InferenceDataset(Dataset):
    def __init__(self, csv_path):
        self.csv_path = csv_path
        self.length = 1  # Set length to 1 for inference on a single file

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        xyz = load_relevant_data_subset(self.csv_path, type='csv')
        xyz = xyz - xyz[~np.isnan(xyz)].mean(0, keepdims=True)  # Normalization to common mean
        xyz = xyz / xyz[~np.isnan(xyz)].std(0, keepdims=True)
        xyz = torch.from_numpy(xyz).float()
        xyz = pre_process(xyz)

        r = {}
        r['xyz'] = xyz
        return r

In [18]:
sign_path = '/kaggle/input/blow-dataset/cat.csv'
pd.read_csv('/kaggle/input/blow-dataset/cat.csv').head()

Unnamed: 0,x,y,z
0,0.436362,0.356953,-0.048455
1,0.43071,0.321346,-0.08646
2,0.433086,0.333287,-0.047363
3,0.413351,0.289695,-0.062578
4,0.429143,0.310761,-0.091302


In [19]:
json_file_path = '/kaggle/input/asl-signs/sign_to_prediction_index_map.json'
with open(json_file_path, 'r') as json_file:
    sign2label = json.load(json_file)
    
vd = InferenceDataset(sign_path,)
print(vd)
valid_loader = DataLoader(
    vd,
    batch_size  = 1,
    shuffle=False,
    num_workers = 0,
)
for t, batch in enumerate(valid_loader):
    net.output_type = ['inference']
    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled = True):
            output = net(batch)
            top_values, top_indices = torch.topk(output['sign'].detach().cpu(), k=5)
            print(top_indices)
            print(top_values)
            
            for value in top_indices[0]:
                corresponding_key = None
                for key, json_value in sign2label.items():
                    if json_value == value:
                        corresponding_key = key
                        break
                        
                print(f"Sign: {corresponding_key}")

<__main__.InferenceDataset object at 0x7b254a5f7700>
tensor([[ 38, 129,  78, 104, 249]])
tensor([[0.6334, 0.2779, 0.0526, 0.0098, 0.0057]])
Sign: cat
Sign: kitty
Sign: find
Sign: hair
Sign: zipper


In [20]:
from IPython.display import HTML

HTML("<h1 style='background-color: #FFEBBA; color: #1e1e1e; font-size: 32px; font-family: garamond; padding:12px 50%'>INCLUDE DATASET</h1>")

In [21]:
data = pd.read_csv('/kaggle/input/include-dataset/train-preprocessed.csv')
data.head()

Unnamed: 0,path,sign,category,label
0,/kaggle/input/include-dataset/islr-xyz/islr-xy...,I,Pronouns,19
1,/kaggle/input/include-dataset/islr-xyz/islr-xy...,I,Pronouns,19
2,/kaggle/input/include-dataset/islr-xyz/islr-xy...,I,Pronouns,19
3,/kaggle/input/include-dataset/islr-xyz/islr-xy...,I,Pronouns,19
4,/kaggle/input/include-dataset/islr-xyz/islr-xy...,I,Pronouns,19


In [22]:
def read_csvs_by_fold(fold=0):
    num_fold = 5
    data = pd.read_csv('/kaggle/input/include-dataset/train-preprocessed.csv')
    data.loc[:, 'fold' ] = -1
    sgkf = StratifiedGroupKFold(n_splits=num_fold, random_state=123, shuffle=True)
    for i, (train_index, valid_index) in enumerate(sgkf.split(data[['path', 'sign']], data.label, data.category)):
        data.loc[valid_index,'fold'] = i
    train_df = data[data.fold!=fold].reset_index(drop=True)
    valid_df = data[data.fold==fold].reset_index(drop=True)
    return train_df, valid_df

In [23]:
class IncludeDataset(Dataset):
    def __init__(self, df, augment=None):
        self.df = df
        self.augment = augment
        self.length = len(self.df)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        d = self.df.iloc[index]

        csv_file = d.path
        xyz = load_relevant_data_subset(csv_file, type="csv")
        xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
        xyz = xyz / xyz[~np.isnan(xyz)].std(0, keepdims=True)

        if self.augment is not None:
            xyz = self.augment(xyz)
        xyz = torch.from_numpy(xyz).float()
        xyz = pre_process(xyz)

        r = {}
        r['index'] = index
        r['d'    ] = d
        r['xyz'  ] = xyz
        r['label'] = d.label
        r['category'] = d.category
        return r

In [24]:
def run_check_dataset():
    train_df, valid_df = read_csvs_by_fold(fold=0)
    dataset = IncludeDataset(valid_df)

    loader = DataLoader(
        dataset,
        sampler=SequentialSampler(dataset),
        batch_size=8,
        drop_last=True,
        num_workers=0,
        pin_memory=False,
        worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn=null_collate,
    )
    for t, batch in enumerate(loader):
        if t > 0: break
        print(batch['index'])

if __name__ == '__main__':
    run_check_dataset()

[0, 1, 2, 3, 4, 5, 6, 7]


<h2 style="color:red; font-weight:600; font-family: badoni;">Dataloaders</h2>
<p style="color:green; font-weight:300; font-family:verdana;">Common mistake : 1. Not changing "SignDataset"->"IncludeDataset" 2. "net"->"net_plus</p>

In [25]:
from torch.optim.lr_scheduler import LambdaLR
fold     = 2
out_dir  = '/kaggle/input' + '/result/run10/transfomer-80-256-lip-hand-3a'
fold_dir = out_dir+ f'/fold-{fold}-{fold_type}'

start_lr = 5e-5 # 1e-4
batch_size = 64
num_epoch = 16
save_iter = 2

train_df, valid_df = read_csvs_by_fold(fold)
train_dataset = IncludeDataset(train_df,train_augment)
valid_dataset = IncludeDataset(valid_df,)

train_loader  = DataLoader(
        train_dataset,
        sampler = RandomSampler(train_dataset),
        batch_size  = batch_size,
        drop_last   = True,
        num_workers = 2,
        pin_memory  = False,
        worker_init_fn = lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id),
        collate_fn = null_collate,
    )

valid_loader = DataLoader(
        valid_dataset,
        sampler = SequentialSampler(valid_dataset),
        batch_size  = 64,
        drop_last   = False,
        num_workers = 2,
        pin_memory  = False,
        collate_fn = null_collate,
    )

net_plus = Net(num_class=64)
scaler = torch.cuda.amp.GradScaler(enabled = True) # To scale the extremely small gradients
net_plus.to(device)
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net_plus.parameters()),lr=start_lr)
scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: epoch / num_epoch)

In [27]:
state_dict = torch.load("/kaggle/input/gislr-saved-models/00000038.model.pth", map_location=torch.device('cuda'))['state_dict']
state_dict = {k: v for k, v in state_dict.items() if not k.startswith('logit')}
net_plus.load_state_dict(state_dict, strict=False)

_IncompatibleKeys(missing_keys=['logit.weight', 'logit.bias'], unexpected_keys=[])

<h2 style="color:red; font-weight:600; font-family: badoni;">Load the model from check point</h2>

In [28]:
def load_checkpoint():
    start_epoch = 0
    initial_checkpoint = r"/kaggle/input/islr-self/lasilfjiogitoiet"
    f = torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)
    start_epoch = f.get('epoch',0)+1
    state_dict = f['state_dict']
    net_plus.load_state_dict(state_dict,strict=False)
    optimizer.load_state_dict(f['optimizer_dict'])
    scaler.load_state_dict(f['scheduler_dict'])
    print(f"Starting from Epoch: {start_epoch}")

# if __name__=="main":
#     load_checkpoint()

<p style="color:green; font-weight:300; font-family:verdana; padding-left:20px; font-size:40px">Run train</p>

In [29]:
# Freezing the weights of the layers
for name, param in net_plus.named_parameters():
    if not name.startswith('logit'):
        param.requires_grad = False

In [30]:
start_epoch=0
def run_train():
    for i in tqdm(range(start_epoch , num_epoch+start_epoch)): # change to num_epochs
        net_plus.train()
        train_sign = []
        train_truth = []
        train_num = 0
        train_loss_sum = 0
        train_total = 0
        for t, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            rate = get_learning_rate(optimizer)

            with torch.cuda.amp.autocast(enabled = True):
                batch_size = len(batch['index'])
                batch['xyz'] = [xyz.cuda() for xyz in batch['xyz']]
                batch['label'] = batch['label'].cuda()
                net_plus.output_type = ['loss', 'inference']
                output = net_plus(batch)
                loss0  = output['label_loss'].mean()


            train_sign.append(output['sign'].detach().cpu().numpy())
            train_truth.append(batch['label'].detach().cpu().numpy())
            train_num += batch_size
            
            
            optimizer.zero_grad()
            scaler.scale(loss0).backward()

            scaler.step(optimizer)
            scaler.update()

            train_loss_sum += loss0.item()
            train_total += 1

        scheduler.step()
        train_truth = np.concatenate(train_truth)
        sign = np.concatenate(train_sign)
        topk_indices = np.argsort(sign, -1)[:,-5:]
        correct_topk = np.any(np.equal(topk_indices, train_truth.reshape(-1, 1)), axis=1)
        topk_accuracy_train = np.mean(correct_topk)

        val_loss_sum = 0
        val_total = 0
        valid_num = 0
        valid_sign=[]
        net_plus.eval()

        for t, batch in tqdm(enumerate(valid_loader),total=len(valid_loader)):

            net_plus.output_type = ['inference']
            with torch.no_grad():
                with torch.cuda.amp.autocast(enabled = True):
                    batch_size = len(batch['index'])
                    batch['xyz'] = [xyz.cuda() for xyz in batch['xyz']]
                    batch['label'] = batch['label'].cuda()
                    output = net_plus(batch)
                    valid_loss = np_cross_entropy(output['sign'].detach().cpu(), batch['label'].detach().cpu())
                    val_loss_sum += valid_loss
                    val_total +=1
                valid_sign.append(output['sign'].detach().cpu().numpy())
                valid_num += batch_size

        valid_truth = valid_loader.dataset.df.label.values
        sign = np.concatenate(valid_sign)
        topk_indices = np.argsort(sign, -1)[:,-5:]
        correct_topk = np.any(np.equal(topk_indices, valid_truth.reshape(-1, 1)), axis=1)
        topk_accuracy_valid = np.mean(correct_topk)

        print(f"Epoch:{i} => Train Loss: {(train_loss_sum/train_total):.04f}, Train Acc: {topk_accuracy_train:0.04f}")
        print(f"Epoch:{i} => Val Loss: {(val_loss_sum/val_total):.04f}, Val Acc: {topk_accuracy_valid:0.04f}")
        print("="*50)

        if i%save_iter==0:
            if i != start_epoch:
                n = i
                torch.save({
                           'state_dict': net_plus.state_dict(),
                            'epoch': i,
                            'optimizer_dict': optimizer.state_dict(),
                            'scheduler_dict':scaler.state_dict(),
                            }, f'{n:08d}.model.pth')
        torch.cuda.empty_cache()
        
if __name__ == "__main__":
    run_train()

  0%|          | 0/16 [00:00<?, ?it/s]
  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:06<01:12,  6.56s/it][A
 25%|██▌       | 3/12 [00:08<00:21,  2.38s/it][A
 42%|████▏     | 5/12 [00:12<00:14,  2.12s/it][A
 58%|█████▊    | 7/12 [00:15<00:09,  1.94s/it][A
 75%|███████▌  | 9/12 [00:19<00:05,  1.86s/it][A
100%|██████████| 12/12 [00:22<00:00,  1.85s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:04<00:23,  4.76s/it][A
 50%|█████     | 3/6 [00:08<00:07,  2.47s/it][A
100%|██████████| 6/6 [00:10<00:00,  1.78s/it][A
  6%|▋         | 1/16 [00:32<08:14, 32.97s/it]

Epoch:0 => Train Loss: 4.8194, Train Acc: 0.0768
Epoch:0 => Val Loss: 4.7308, Val Acc: 0.0278



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:03<00:36,  3.33s/it][A
 25%|██▌       | 3/12 [00:05<00:16,  1.82s/it][A
 42%|████▏     | 5/12 [00:08<00:10,  1.52s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 58%|█████▊    | 7/12 [00:10<00:07,  1.50s/it][A
 75%|███████▌  | 9/12 [00:13<00:04,  1.38s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.32s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.17s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.76s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.21s/it][A
 12%|█▎        | 2/16 [00:56<06:21, 27.27s/it]

Epoch:1 => Train Loss: 4.7879, Train Acc: 0.0664
Epoch:1 => Val Loss: 4.7387, Val Acc: 0.0278



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:28,  2.59s/it][A
 17%|█▋        | 2/12 [00:02<00:11,  1.14s/it][A
 25%|██▌       | 3/12 [00:05<00:15,  1.70s/it][A
 33%|███▎      | 4/12 [00:05<00:08,  1.11s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.54s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.09s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.44s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.06s/it][A
 75%|███████▌  | 9/12 [00:13<00:04,  1.61s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 92%|█████████▏| 11/12 [00:15<00:01,  1.49s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.32s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.18s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.69s/it][

Epoch:2 => Train Loss: 4.8019, Train Acc: 0.0716
Epoch:2 => Val Loss: 4.7544, Val Acc: 0.0278



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:27,  2.47s/it][A
 17%|█▋        | 2/12 [00:02<00:12,  1.26s/it][A
 25%|██▌       | 3/12 [00:05<00:16,  1.78s/it][A
 33%|███▎      | 4/12 [00:05<00:09,  1.21s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.54s/it][A
 50%|█████     | 6/12 [00:08<00:06,  1.11s/it][A
 58%|█████▊    | 7/12 [00:10<00:07,  1.44s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.03s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.41s/it][A
 83%|████████▎ | 10/12 [00:12<00:02,  1.04s/it][A
 92%|█████████▏| 11/12 [00:15<00:01,  1.41s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.27s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.13s/it][A
 50%|█████     | 3/6 [00:06<00:05,  1.95s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.30s/it][A
 25%|██▌       | 4/16 [01:42<04:54, 24.50s/it]

Epoch:3 => Train Loss: 4.8102, Train Acc: 0.0638
Epoch:3 => Val Loss: 4.7788, Val Acc: 0.0278



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:30,  2.75s/it][A
 17%|█▋        | 2/12 [00:02<00:12,  1.25s/it][A
 25%|██▌       | 3/12 [00:05<00:14,  1.66s/it][A
 33%|███▎      | 4/12 [00:05<00:08,  1.11s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.44s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.07s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.40s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 75%|███████▌  | 9/12 [00:12<00:04,  1.40s/it][A
 83%|████████▎ | 10/12 [00:12<00:02,  1.07s/it][A
 92%|█████████▏| 11/12 [00:14<00:01,  1.43s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.26s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.09s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.71s/it]

Epoch:4 => Train Loss: 4.7745, Train Acc: 0.0716
Epoch:4 => Val Loss: 4.8120, Val Acc: 0.0247



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:28,  2.55s/it][A
 25%|██▌       | 3/12 [00:05<00:14,  1.59s/it][A
 42%|████▏     | 5/12 [00:08<00:11,  1.60s/it][A
 58%|█████▊    | 7/12 [00:10<00:07,  1.46s/it][A
 75%|███████▌  | 9/12 [00:13<00:04,  1.40s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.31s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:16,  3.23s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.69s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.18s/it][A
 38%|███▊      | 6/16 [02:27<03:54, 23.47s/it]

Epoch:5 => Train Loss: 4.6993, Train Acc: 0.0677
Epoch:5 => Val Loss: 4.8539, Val Acc: 0.0216



  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

  8%|▊         | 1/12 [00:02<00:29,  2.69s/it][A
 25%|██▌       | 3/12 [00:05<00:14,  1.57s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.45s/it][A
 58%|█████▊    | 7/12 [00:10<00:06,  1.36s/it][A
 75%|███████▌  | 9/12 [00:12<00:03,  1.32s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.25s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:19,  3.85s/it][A
 50%|█████     | 3/6 [00:06<00:05,  1.88s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.30s/it][A
 44%|████▍     | 7/16 [02:50<03:29, 23.31s/it]

Epoch:6 => Train Loss: 4.6228, Train Acc: 0.0924
Epoch:6 => Val Loss: 4.9047, Val Acc: 0.0216



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:30,  2.81s/it][A
 25%|██▌       | 3/12 [00:05<00:14,  1.62s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 58%|█████▊    | 7/12 [00:10<00:07,  1.44s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.33s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.27s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:14,  3.00s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.67s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.20s/it][A
 50%|█████     | 8/16 [03:13<03:04, 23.08s/it]

Epoch:7 => Train Loss: 4.5647, Train Acc: 0.0938
Epoch:7 => Val Loss: 4.9644, Val Acc: 0.0216



  0%|          | 0/12 [00:00<?, ?it/s][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 25%|██▌       | 3/12 [00:05<00:15,  1.76s/it][A
 42%|████▏     | 5/12 [00:08<00:10,  1.46s/it][A
 50%|█████     | 6/12 [00:08<00:06,  1.11s/it][A
 58%|█████▊    | 7/12 [00:10<00:07,  1.50s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.14s/it][A
 75%|███████▌  | 9/12 [00:13<00:04,  1.44s/it][A
 83%|████████▎ | 10/12 [00:13<00:02,  1.12s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.31s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.12s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.76s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.21s/it][A
 56%|█████▋    | 9/16 [03:36<02:41, 23.10s/it]

Epoch:8 => Train Loss: 4.4763, Train Acc: 0.1003
Epoch:8 => Val Loss: 5.0325, Val Acc: 0.0185



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:26,  2.42s/it][A
 25%|██▌       | 3/12 [00:04<00:13,  1.49s/it][A
 42%|████▏     | 5/12 [00:07<00:09,  1.38s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.05s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.44s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.06s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.62s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

100%|██████████| 12/12 [00:15<00:00,  1.30s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.20s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.75s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.20s/it][A
 62%|██████▎   | 10/16 [03:59<02:18, 23.05s/it]

Epoch:9 => Train Loss: 4.4510, Train Acc: 0.1146
Epoch:9 => Val Loss: 5.1071, Val Acc: 0.0154



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:27,  2.46s/it][A
 17%|█▋        | 2/12 [00:02<00:10,  1.08s/it][A
 25%|██▌       | 3/12 [00:05<00:15,  1.70s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.43s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.05s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.44s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.08s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.38s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 92%|█████████▏| 11/12 [00:14<00:01,  1.33s/it][A
100%|██████████| 12/12 [00:14<00:00,  1.25s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.06s/it][A
 50%|█████     | 3/6 [00:06<00:05,  1.88s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.29s/it][A

Epoch:10 => Train Loss: 4.2759, Train Acc: 0.1354
Epoch:10 => Val Loss: 5.1904, Val Acc: 0.0123



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:28,  2.60s/it][A
 17%|█▋        | 2/12 [00:02<00:11,  1.14s/it][A
 25%|██▌       | 3/12 [00:04<00:14,  1.63s/it][A
 33%|███▎      | 4/12 [00:05<00:08,  1.07s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.49s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.05s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.45s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 75%|███████▌  | 9/12 [00:12<00:04,  1.47s/it][A
100%|██████████| 12/12 [00:14<00:00,  1.24s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.09s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.72s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.19s/it][A
 75%|███████▌  | 12/16 [04:44<01:30, 22.73s/it]

Epoch:11 => Train Loss: 4.2259, Train Acc: 0.1484
Epoch:11 => Val Loss: 5.2788, Val Acc: 0.0123



  0%|          | 0/12 [00:00<?, ?it/s][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 25%|██▌       | 3/12 [00:05<00:14,  1.58s/it][A
 33%|███▎      | 4/12 [00:05<00:08,  1.07s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.57s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.14s/it][A
 58%|█████▊    | 7/12 [00:10<00:08,  1.64s/it][A
 67%|██████▋   | 8/12 [00:11<00:04,  1.25s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.45s/it][A
 83%|████████▎ | 10/12 [00:13<00:02,  1.14s/it][A
 92%|█████████▏| 11/12 [00:15<00:01,  1.38s/it][A
100%|██████████| 12/12 [00:15<00:00,  1.30s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:16,  3.32s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.73s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.21s/it][

Epoch:12 => Train Loss: 4.2357, Train Acc: 0.1341
Epoch:12 => Val Loss: 5.3708, Val Acc: 0.0123



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:27,  2.50s/it][A
 25%|██▌       | 3/12 [00:04<00:13,  1.49s/it][A
 33%|███▎      | 4/12 [00:04<00:08,  1.02s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.48s/it][A
 50%|█████     | 6/12 [00:07<00:06,  1.06s/it][A
 58%|█████▊    | 7/12 [00:09<00:07,  1.49s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.37s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

100%|██████████| 12/12 [00:15<00:00,  1.25s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:19,  3.94s/it][A
 50%|█████     | 3/6 [00:06<00:05,  1.92s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.31s/it][A
 88%|████████▊ | 14/16 [05:30<00:45, 22.89s/it]

Epoch:13 => Train Loss: 4.0973, Train Acc: 0.1836
Epoch:13 => Val Loss: 5.4671, Val Acc: 0.0123



  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

  8%|▊         | 1/12 [00:02<00:27,  2.46s/it][A
 17%|█▋        | 2/12 [00:02<00:11,  1.15s/it][A
 25%|██▌       | 3/12 [00:04<00:14,  1.60s/it][A
 33%|███▎      | 4/12 [00:04<00:08,  1.01s/it][A
 42%|████▏     | 5/12 [00:07<00:10,  1.55s/it][A
 58%|█████▊    | 7/12 [00:09<00:06,  1.34s/it][A
 67%|██████▋   | 8/12 [00:10<00:04,  1.07s/it][A
 75%|███████▌  | 9/12 [00:12<00:04,  1.40s/it][A
 83%|████████▎ | 10/12 [00:12<00:02,  1.07s/it][A
100%|██████████| 12/12 [00:14<00:00,  1.24s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.18s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.68s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.17s/it][A
 94%|█████████▍| 15/16 [05:52<00:22, 22.6

Epoch:14 => Train Loss: 4.0094, Train Acc: 0.2044
Epoch:14 => Val Loss: 5.5700, Val Acc: 0.0031



  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:02<00:28,  2.60s/it][A
 17%|█▋        | 2/12 [00:02<00:11,  1.14s/it][A
  xyz = xyz - xyz[~np.isnan(xyz)].mean(0,keepdims=True) #noramlisation to common maen
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = um.true_divide(

 42%|████▏     | 5/12 [00:08<00:10,  1.55s/it][A
 50%|█████     | 6/12 [00:08<00:07,  1.27s/it][A
 58%|█████▊    | 7/12 [00:10<00:07,  1.51s/it][A
 75%|███████▌  | 9/12 [00:13<00:04,  1.44s/it][A
 83%|████████▎ | 10/12 [00:13<00:02,  1.13s/it][A
 92%|█████████▏| 11/12 [00:15<00:01,  1.40s/it][A
100%|██████████| 12/12 [00:16<00:00,  1.35s/it][A

  0%|          | 0/6 [00:00<?, ?it/s][A
 17%|█▋        | 1/6 [00:03<00:15,  3.08s/it][A
 50%|█████     | 3/6 [00:05<00:05,  1.70s/it][A
100%|██████████| 6/6 [00:07<00:00,  1.18s/it][A
100%|██████████| 16/16 [06:16<00:00, 23.50s/it]

Epoch:15 => Train Loss: 3.9287, Train Acc: 0.2031
Epoch:15 => Val Loss: 5.6739, Val Acc: 0.0031





In [32]:
net_plus.load_state_dict(torch.load('/kaggle/working/00000014.model.pth', map_location= torch.device('cuda'))['state_dict'])

<All keys matched successfully>

<p style="color:red; font-weight:600; font-size:24px;">Inference on one example from validation set</p>
<p style="padding-left:60px">> Modify collate function, or take just one example from the dataset and set up inference</p>

In [None]:
def inf_null_collate(batch):
    batch_size = len(batch)
    d = {}
    key = batch.keys()
    for k in key:
        d[k] = [b[k] for b in batch]
    d['label'] = torch.LongTensor(d['label'])
    return d

for t, batch in tqdm(enumerate(valid_loader), total=len(valid_loader)):
        print(batch)

In [None]:
def ft_run_inference_on_random_example():
    kaggle_df = pd.read_csv('/kaggle/input/include-dataset/train-preprocessed.csv')
    randno = np.random.randint(len(kaggle_df))
    valid_df = kaggle_df[kaggle_df.index==randno].reset_index(drop=True)
    valid_dataset = IncludeDataset(valid_df,)
    print("Actual label: ",valid_df.iloc[0]['label'])
    valid_loader = DataLoader(
        valid_dataset,
        sampler = SequentialSampler(valid_dataset),
        batch_size  = 1,
        drop_last   = False,
        num_workers = 0,
        pin_memory  = False,
        collate_fn = null_collate,
    )
    
    for t, batch in enumerate(valid_loader):
        net_plus.output_type = ['inference']
        with torch.no_grad():
            with torch.cuda.amp.autocast(enabled = True):
                output = net_plus(batch)
                top_values, top_indices = torch.topk(output['sign'].detach().cpu(), k=5)
                print(top_indices)
                print(top_values)

if __name__=="__main__":
    ft_run_inference_on_random_example()