In [1]:
import warnings
warnings.simplefilter(action='ignore')

import pandas as pd
import tensorflow as tf
import glob
import json
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import random
import os
import tensorflow_io as tfio

In [2]:
def seed_it_all(seed=7):
    """ Attempt to be Reproducible """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_it_all()

In [3]:
class CFG:
    left_ROWS_per_frame = 21
    sequence_length = 20
    batch_size = 32

labels  = json.load(open('sign_to_prediction_index_map.json','r'))
complete_df = pd.read_csv('train.csv')
extended_df = pd.read_csv('extended_train.csv')
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(extended_df, test_size=0.2)

In [4]:
def build_loader(with_labels=True):
    def load_video(video_path):
        #print('herer')
        video_df = tfio.IODataset.from_parquet(video_path)
        #video_df = pd.read_parquet(video_path, engine='pyarrow')
        #video_df.fillna(0,inplace=True)
        left_df = video_df[video_df.type=='left_hand']
        left_values = left_df[['x','y','z']].values
        left_values = left_values.reshape(-1,CFG.left_ROWS_per_frame,3)
        left_hand_array =  tf.image.resize(left_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        right_df = video_df[video_df.type=='right_hand']
        right_values = right_df[['x','y','z']].values
        right_values = right_values.reshape(-1,CFG.left_ROWS_per_frame,3)
        right_hand_array =  tf.image.resize(right_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        return [left_hand_array, right_hand_array]
    
    def load_video_with_labels(path, label):
        return load_video(path), labels[label]
    
    return load_video_with_labels if with_labels else load_video

In [5]:
class CustomData(tf.keras.utils.Sequence):
    def __init__(self,df,num_frames=20,batch_size=8,shuffle=True,\
                 labels_path='sign_to_prediction_index_map.json'):
        self.df = df
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_frames = num_frames
        self.labels  = json.load(open('sign_to_prediction_index_map.json','r'))
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self,index):
        batches = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        left_hand_input = np.zeros(shape=(self.batch_size,self.num_frames,CFG.left_ROWS_per_frame,2))
        right_hand_input = np.zeros(shape=(self.batch_size,self.num_frames,CFG.left_ROWS_per_frame,2))
        labels = []
        for i,row_val in enumerate(batches):
            row = self.df.iloc[row_val]
            left_hand,right_hand = self.load_video(row['path'])
            left_hand_input[i,:] = left_hand
            right_hand_input[i,:] = right_hand
            labels.append(self.labels[row['sign']])
        return [left_hand_input,right_hand_input],np.asarray(labels)
            
    def load_video(self,video_path):
        video_df = pd.read_parquet(video_path, engine='pyarrow')
        video_df.dropna(inplace=True)
        left_df = video_df[video_df.type=='left_hand']
        left_values = left_df[['x','y']].values
        left_values = left_values.reshape(-1,CFG.left_ROWS_per_frame,2)
        if len(left_values)!=0:
            left_values[:,:,0] = (left_values[:,:,0]- np.min(left_values[:,:,0]))/(left_values[:,:,0].max()- left_values[:,:,0].min())
            left_values[:,:,1] = (left_values[:,:,1]- np.min(left_values[:,:,1]))/(left_values[:,:,1].max()- left_values[:,:,1].min())
            left_hand_array =  tf.image.resize(left_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        else:
            left_hand_array =  tf.zeros(shape=(CFG.sequence_length, CFG.left_ROWS_per_frame,2),dtype=tf.float32)
        
        right_df = video_df[video_df.type=='right_hand']
        right_values = right_df[['x','y']].values
        right_values = right_values.reshape(-1,CFG.left_ROWS_per_frame,2)
        if len(right_values) != 0:
            right_values[:,:,0] = (right_values[:,:,0]- np.min(right_values[:,:,0]))/(right_values[:,:,0].max()- right_values[:,:,0].min())
            right_values[:,:,1] = (right_values[:,:,1]- np.min(right_values[:,:,1]))/(right_values[:,:,1].max()- right_values[:,:,1].min())
            right_hand_array =  tf.image.resize(right_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        else:
            right_hand_array =  tf.zeros(shape=(CFG.sequence_length, CFG.left_ROWS_per_frame,2),dtype=tf.float32)
        return left_hand_array, right_hand_array
    
    def __len__(self):
        return len(self.df)//self.batch_size

In [6]:
def read_json_file(file_path):
    """Read a JSON file and parse it into a Python object.

    Args:
        file_path (str): The path to the JSON file to read.

    Returns:
        dict: A dictionary object representing the JSON data.
        
    Raises:
        FileNotFoundError: If the specified file path does not exist.
        ValueError: If the specified file path does not contain valid JSON data.
    """
    try:
        # Open the file and load the JSON data into a Python object
        with open(file_path, 'r') as file:
            json_data = json.load(file)
        return json_data
    except FileNotFoundError:
        # Raise an error if the file path does not exist
        raise FileNotFoundError(f"File not found: {file_path}")
    except ValueError:
        # Raise an error if the file does not contain valid JSON data
        raise ValueError(f"Invalid JSON data in file: {file_path}")
p2s_map = {v:k for k,v in read_json_file("sign_to_prediction_index_map.json").items()}
encoder = lambda x: s2p_map.get(x.lower())
decoder = lambda x: p2s_map.get(x)

In [7]:
model = tf.keras.models.load_model('models/030923_00_26.h5')

In [8]:
extended_df['pred'] = None
test_df['pred'] = None

In [9]:
extended_df['topk'] = None

In [10]:
def topk(label,preds,k=3):
    pred_labels = [decoder(j) for j in np.argsort(preds)[::-1][:k]]
    if label in pred_labels:
        return True
    return False

In [12]:
for i in tqdm.tqdm(range(len(extended_df)//32)):
    batched_df = extended_df[i*32:(i+1)*32]
    left_hand_input = np.zeros(shape=(32,CFG.sequence_length,CFG.left_ROWS_per_frame,2))
    right_hand_input = np.zeros(shape=(32,CFG.sequence_length,CFG.left_ROWS_per_frame,2))
    for index,path in enumerate(batched_df.path.to_list()):
        left_hand_array, right_hand_array = train_datagen.load_video(path.replace('/kaggle/input/asl-signs/',''))
        left_hand_input[index,:] = left_hand_array
        right_hand_input[index,:] = right_hand_array
    preds = model.predict([left_hand_input,right_hand_input],verbose=0)
    pred_labels = [topk(j,k) for j,k in zip(batched_df.sign.to_list(),preds)]
    extended_df.topk.loc[i*32:(i+1)*32-1]=pred_labels

100%|██████████| 2952/2952 [21:16<00:00,  2.31it/s]


In [21]:
extended_df[(extended_df.sign != extended_df.pred) & (extended_df.topk == False)].sign.value_counts()

beside    43
there     38
before    38
fast      37
give      35
          ..
aunt       4
uncle      4
gum        3
clown      3
flower     2
Name: sign, Length: 250, dtype: int64

In [15]:
len(extended_df[extended_df.topk == True])/len(extended_df)

0.9565502714946494

In [11]:
train_datagen = CustomData(train_df,num_frames=CFG.sequence_length,batch_size=CFG.batch_size)
test_datagen = CustomData(test_df,num_frames=CFG.sequence_length,batch_size=CFG.batch_size)

In [24]:
new_df = extended_df.dropna()

In [48]:
cd = sklearn.metrics.confusion_matrix(new_df.sign.to_list(), new_df.pred.to_list(),labels=extended_df.sign.unique())

In [10]:
file_name = "models/030923_00_26.h5"
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        file_name, 
        save_best_only=True, 
        monitor="val_accuracy",
        mode="max",
        verbose = 1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1,mode='max',verbose=1,
                              patience=3, min_lr=0.000001)
]
model.fit(train_datagen,validation_data=test_datagen,\
          epochs=30, callbacks=callbacks)
model = tf.keras.models.load_model(file_name)

Epoch 1/30
Epoch 1: val_accuracy improved from -inf to 0.38692, saving model to models/030923_00_26.h5
Epoch 2/30
Epoch 2: val_accuracy improved from 0.38692 to 0.46944, saving model to models/030923_00_26.h5
Epoch 3/30
Epoch 3: val_accuracy improved from 0.46944 to 0.51404, saving model to models/030923_00_26.h5
Epoch 4/30
Epoch 4: val_accuracy improved from 0.51404 to 0.54386, saving model to models/030923_00_26.h5
Epoch 5/30
Epoch 5: val_accuracy improved from 0.54386 to 0.56112, saving model to models/030923_00_26.h5
Epoch 6/30
Epoch 6: val_accuracy improved from 0.56112 to 0.57436, saving model to models/030923_00_26.h5
Epoch 7/30
Epoch 7: val_accuracy improved from 0.57436 to 0.59248, saving model to models/030923_00_26.h5
Epoch 8/30
Epoch 8: val_accuracy improved from 0.59248 to 0.59576, saving model to models/030923_00_26.h5
Epoch 9/30
Epoch 9: val_accuracy improved from 0.59576 to 0.60201, saving model to models/030923_00_26.h5
Epoch 10/30
Epoch 10: val_accuracy improved from 

Epoch 25/30
Epoch 25: val_accuracy did not improve from 0.63437

Epoch 25: ReduceLROnPlateau reducing learning rate to 3.330000035930425e-05.
Epoch 26/30
Epoch 26: val_accuracy improved from 0.63437 to 0.65095, saving model to models/030923_00_26.h5
Epoch 27/30
Epoch 27: val_accuracy improved from 0.65095 to 0.65445, saving model to models/030923_00_26.h5
Epoch 28/30
Epoch 28: val_accuracy improved from 0.65445 to 0.65546, saving model to models/030923_00_26.h5
Epoch 29/30
Epoch 29: val_accuracy improved from 0.65546 to 0.65720, saving model to models/030923_00_26.h5
Epoch 30/30
Epoch 30: val_accuracy did not improve from 0.65720


In [11]:
model.save('models/030523_00_09.h5')