# First text experimental model

This notebook contains a first attempt to create a model for the 

In [1]:
import sys
sys.path.insert(0, '../src')

In [19]:
import torch
import torch.nn as nn
import os
import json
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import pyarrow.parquet as pq
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
import pytorch_lightning as pl
from sklearn import *
from torchmetrics.classification import accuracy

from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

from config import *

Now that we have a seperate class for the data, we can just load it

In [3]:
COLUMNS_TO_USE = ["x","y"]

In [4]:
class ASL_DATSET(Dataset):
    def __init__(self, transform=None, max_seq_length=MAX_SEQUENCES,):
        super().__init__()
        
        self.transform = transform
        
        
        #[TODO] get this from data
        self.max_seq_length = max_seq_length
        
        self.n_features =  HAND_FEATURES *2 + 12 + len(FACE_INDICES)
        
        self.total_length = self.max_seq_length * self.n_features
        self.load_data()
        
    def load_data(self):
        
        # Load Processed data
        self.df_train = pd.read_csv(os.path.join(ROOT_PATH,RAW_DATA_DIR,"train.csv"))
        self.label_dict =  json.load(open(os.path.join(ROOT_PATH,RAW_DATA_DIR,MAP_JSON_FILE)))
        
        # Generate Absolute path to locate landmark files
        self.file_paths = np.array([os.path.join(ROOT_PATH,RAW_DATA_DIR,x) for x in self.df_train["path"].values])
        self.labels = self.df_train.sign.map(self.label_dict).values
        
        # Store individual metadata lists
        # [TODO] Cleanup unnecessary files, do we need these?
        self.participant_ids = self.df_train["participant_id"].values
        self.sequence_ids = self.df_train["sequence_id"].values
        
    def __len__(self):
        return len(self.df_train)
    
    def __getitem__(self, idx):
        
        if torch.is_tensor(idx):
            idx = idx.item()
        
        # Get the processed data for the single index
        landmark_path = self.file_paths[idx]
        target = self.labels[idx]
        
        # Read in the processed file
        df_in = pd.read_parquet(landmark_path).fillna(0)
        
        #get number of frames
        n_frames = df_in.frame.nunique()

        
        #select the landmarks
        landmarks = df_in.loc[(
                    ((df_in.type == "pose")&(df_in.landmark_index.isin(list(range(11,23)))))|

                   ( (df_in.type == "face")&(df_in.landmark_index.isin(FACE_INDICES)))|
                    ((df_in.type == "right_hand"))|
                    ((df_in.type == "left_hand"))
                ),COLUMNS_TO_USE
        ].values

         

        #print(n_frames)
        #pad or crop series to max_seq_length
        if n_frames <= self.max_seq_length:
            landmarks = np.append(landmarks,np.zeros(((MAX_SEQUENCES-n_frames)*self.n_features,2)),axis = 0)
        else:
            #crop
            landmarks = landmarks[:self.total_length,:]

        
        
        
        
        #landmark_file = torch.load(landmark_file)

        
        # Get the processed landmarks and target for the data
        # landmarks = landmark_file['landmarks']
        # target = landmark_file['target']
        # size = landmark_file['size']
        
        # Pad the landmark data
        # pad_len = max(0, self.max_seq_length - len(landmarks))
        # landmarks = landmarks + [[0]*len(landmarks[0])] * pad_len
        
        # if self.transform:
         #    sample = self.transform(landmarks)
            
        #create tensor
        lm = torch.from_numpy(landmarks).reshape(self.max_seq_length,self.n_features*2)
      
        
        return {'landmarks': lm, 'target': torch.Tensor([target])}
    
    def __repr__(self):
        return f'ASL_DATSET(Participants: {len(set(self.participant_ids))}, Length: {len(self.df_train)}, Number of Features: {self.n_features}, " Number of Frames: {self.max_seq_length}"'

In [5]:
dL = DataLoader(ASL_DATSET(),shuffle = True,batch_size = 16)

In [6]:
next(iter(dL))["landmarks"].shape

torch.Size([16, 537, 188])

In [7]:
dataset = ASL_DATSET()

In [8]:
dataset[500]

{'landmarks': tensor([[0.4768, 0.4567, 0.4794,  ..., 0.0000, 0.0000, 0.0000],
         [0.4794, 0.4544, 0.4819,  ..., 0.0000, 0.0000, 0.0000],
         [0.4795, 0.4549, 0.4821,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
        dtype=torch.float64),
 'target': tensor([50.])}

In [9]:
pd.set_option("display.max_rows", 543)

In [10]:
next(iter(dataset))

{'landmarks': tensor([[0.4944, 0.3805, 0.4983,  ..., 0.3996, 0.3858, 0.4011],
         [0.5012, 0.3806, 0.5033,  ..., 0.3803, 0.4164, 0.3829],
         [0.4985, 0.3795, 0.5016,  ..., 0.3777, 0.4391, 0.3799],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
        dtype=torch.float64),
 'target': tensor([25.])}

In [11]:
MAX_SEQUENCES = 150
dataset = DataLoader(ASL_DATSET(),shuffle = True,batch_size = 32)
dataset

<torch.utils.data.dataloader.DataLoader at 0x2e6270e11f0>

In [12]:
%%timeit
sample = next(iter(dataset))

RuntimeError: shape '[537, 188]' is invalid for input of size 28200

In [None]:
sample = next(iter(dataset))

In [None]:
sample["landmarks"].shape

In [15]:
28200/537

52.513966480446925

In [None]:
sample["landmarks"][::2,0].shape



# Model

In [None]:
class ASLDataModule(pl.LightningDataModule):
    def __init__(self,batch_size = 16,num_workers = 0):
        self.batch_size = batch_size
        self.num_workers = num_workers

        
      
        
    def prepare_data(self):
        pass
    
    
    def setup(self, stage=None):
        self.train_dataset = ASL_DATSET()
        
    
    def train_dataloader(self):   
        
        train_loader = DataLoader(self.train_dataset, 
                                  batch_size = self.batch_size, 
                                  shuffle = True, 
                                  num_workers = self.num_workers)
        
        return train_loader
    
    def val_dataloader(self):
        return None
        


In [None]:
dM = ASLDataModule()
dM.setup()
train_loader = dM.train_dataloader()

In [None]:
sample = next(iter(train_loader))
sample["landmarks"].shape

In [None]:

class LSTM_Model(nn.Module):
    def __init__(self,n_features, n_classes = 250,n_hidden = 256 ,num_layers =3):
        super().__init__()
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            num_layers = num_layers,
            batch_first = True,
            dropout = .3)
        
        self.fc = nn.Linear(n_features,n_classes)
        
    def forward(self,x):
        
        self.lstm.flatten_parameters()
        _, (hidden,_) = self.lstm(x)
        
        out = hidden[-1]
        
        return self.fc(out)
        



In [None]:
class LSTM_Predictor(pl.LightningModule):
    def __init__(self, 
                 n_features: int, 
                 n_classes:int = 250, 
                 num_layers:int = 3):
        super().__init__()
        
        
        self.model = LSTM_Model(n_features = n_features,
                                n_classes = n_classes, 
                                num_layers = num_layers)
        #Define criterion
        self.criterion = nn.CrossEntropyLoss()
        
        
                
        self.accuracy = accuracy.Accuracy(task = "multiclass",
                                          num_classes=n_classes
                                     )
        
    def forward(self,x,labels):
        y_hat = self.model(x)
        loss = 0
        if labels is not None:
            loss = self.criterion(y_hat,labels)
        return loss, y_hat
    
    def training_step(self, batch, batch_idx):
        landmarks = batch["landmarks"]
        labels = batch["target"]
        
        #forward pass through the model
        loss, out = self(landmarks,labels)
        y_hat = torch.argmax(out,dim = 1)
        step_accuracy = self.accuracy(y_hat,labels)
        
        
        self.log("train_loss", loss, prog_bar = True, logger = True)
        self.log("train_accuracy", step_accuracy, prog_bar = True, logger = True)
        return {"loss":loss, "train_accuracy":step_accuracy}
        
        
    def validation_step(self,batch,batch_idx):
        pass
        
        
    def configure_optimizers(self,):
        return torch.optim.Adam(self.parameters(), lr = 0.0001)
        
        


In [None]:
#get the model
model = LSTM_Predictor(n_features=188,num_layers = 3)

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./../checkpoints

In [18]:
checkpoint_callback = ModelCheckpoint(
        dirpath = os.path.join(ROOT_PATH,"checkpoints"),
        filename = "best_checkpoint",
    save_top_k = 1,
    monitor = "train_loss",
    verbose = True,
    mode = "min"
        )

In [20]:
tb_logger = TensorBoardLogger(save_dir = os.path.join(ROOT_PATH,"checkpoints"),
                              name = "lightning_logs"
                         )

In [21]:
trainer = pl.Trainer(accelerator = "gpu",
                     logger = tb_logger,
                     callbacks=[checkpoint_callback],
                     max_epochs=250,
                     
                )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [22]:
trainer.fit(model = model,
            datamodule=dM
       )

NameError: name 'model' is not defined

In [None]:
landmark_path = dataset.file_paths[0]
        
COLUMNS_TO_USE = ["x","y"]


# Read in the processed file
df_in = pd.read_parquet(landmark_path).fillna(0)

#get number of frames
n_frames = df_in.frame.nunique()


landmarks = df_in.loc[df_in.landmark_index.isin(LANDMARK_INDICES)][COLUMNS_TO_USE].values
print(landmarks.shape)

#pad or crop series to max_seq_length
if n_frames <= dataset.max_seq_length:
    landmarks = np.append(landmarks,np.zeros(((MAX_SEQUENCES-n_frames+1)*len(LANDMARK_INDICES),2)),axis = 0)
else:
    #crop
    landmarks = landmarks[:dataset.total_length,:]
landmarks.shape




In [None]:
df_in.loc[df_in.frame == 20];

In [None]:
#%%timeit
#selection of landmarks
dataset = ASL_DATSET()
dataset


In [None]:
len(FACE_INDICES) + HAND_FEATURES *2 + 12

In [None]:
len(LANDMARK_INDICES) * MAX_SEQUENCES

In [None]:
len(set(LANDMARK_INDICES))

In [None]:
39201 - (MAX_SEQUENCES - n_frames)*len(LANDMARK_INDICES) + 1196

In [None]:
next(iter(dataset))

In [None]:
n_frames = df_in.frame.nunique()
n_frames
np.zeros(((MAX_SEQUENCES-n_frames)*len(LANDMARK_INDICES),2)).shape

In [None]:
MAX_SEQUENCES * (len(LANDMARK_INDICES))

In [None]:
df_in.loc[df_in.frame == 20];

In [None]:
LANDMARK_INDICES
;

In [None]:
# check the dataloader

Get dataloader:

In [None]:
dm = ASLDataModule(batch_size = 16)
dm.setup()