In [1]:
import warnings
warnings.simplefilter(action='ignore')
from focal_loss import SparseCategoricalFocalLoss
import pandas as pd
import tensorflow as tf
import glob
import math
import matplotlib.pyplot as plt
import json
import numpy as np
import random
from tqdm import tqdm
import tensorflow_addons as tfa
import os
import tensorflow_io as tfio

In [2]:
def seed_it_all(seed=7):
    """ Attempt to be Reproducible """
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_it_all()

In [3]:
ROWS_PER_FRAME = 543
def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

#test_data  = load_relevant_data_subset('train_landmark_files/16069/100015657.parquet')

In [4]:
LIP = [
            61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
            291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
            78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
            95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
]


In [12]:
class CFG:
    left_ROWS_per_frame = 21
    sequence_length = 20
    batch_size = 32
    face_ROWS_per_frame = 468
    lip_ROWS_per_frame = 40

labels  = json.load(open('sign_to_prediction_index_map.json','r'))
complete_df = pd.read_csv('extended_train.csv')
complete_df = complete_df[complete_df['participant_id']!='37055']
from sklearn.model_selection import train_test_split
y = complete_df['sign']
train_df, test_df = train_test_split(complete_df, test_size=0.2,stratify=y)

In [13]:
def build_loader(with_labels=True):
    def load_video(video_path):
        #print('herer')
        video_df = tfio.IODataset.from_parquet(video_path)
        #video_df = pd.read_parquet(video_path, engine='pyarrow')
        #video_df.fillna(0,inplace=True)
        left_df = video_df[video_df.type=='left_hand']
        left_values = left_df[['x','y','z']].values
        left_values = left_values.reshape(-1,CFG.left_ROWS_per_frame,3)
        left_hand_array =  tf.image.resize(left_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        right_df = video_df[video_df.type=='right_hand']
        right_values = right_df[['x','y','z']].values
        right_values = right_values.reshape(-1,CFG.left_ROWS_per_frame,3)
        right_hand_array =  tf.image.resize(right_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
        return [left_hand_array, right_hand_array]
    
    def load_video_with_labels(path, label):
        return load_video(path), labels[label]
    
    return load_video_with_labels if with_labels else load_video

In [14]:
class CustomData(tf.keras.utils.Sequence):
    def __init__(self,df,num_frames=20,batch_size=8,shuffle=True,\
                 labels_path='sign_to_prediction_index_map.json'):
        self.df = df
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_frames = num_frames
        self.labels  = json.load(open('sign_to_prediction_index_map.json','r'))
        self.on_epoch_end()
        
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self,index):
        batches = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        combined = np.zeros(shape=(self.batch_size,self.num_frames,\
                                        CFG.left_ROWS_per_frame+CFG.lip_ROWS_per_frame,3))
        labels = []
        for i,row_val in enumerate(batches):
            row = self.df.iloc[row_val]
            left_hand,lip = self.load_video(row['path'])
            combined[i,:,:21,:] = left_hand
            combined[i,:,21:,:] = lip
            labels.append(self.labels[row['sign']])
        return combined,np.asarray(labels)
            
    def load_video(self,video_path):
        video_df = pd.read_parquet(video_path, engine='pyarrow')
        #video_df.dropna(inplace=True)
        
        if video_df[video_df['type']=='left_hand']['x'].isna().mean() <= \
      video_df[video_df['type']=='right_hand']['x'].isna().mean():
            left_df = video_df[video_df.type=='left_hand']
            left_df.dropna(inplace=True)
            if len(left_df) != 0:
                left_values = left_df[['x','y','z']].values
                left_values = left_values.reshape(-1,CFG.left_ROWS_per_frame,3)
                left_hand_array =  tf.image.resize(left_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
            else:
                left_hand_array = tf.zeros(shape=(CFG.sequence_length, CFG.left_ROWS_per_frame,2),dtype=tf.float32)
        else:
            left_df = video_df[video_df.type=='right_hand']
            left_df.dropna(inplace=True)
            if len(left_df) != 0:
                left_values = left_df[['x','y','z']].values
                left_values = left_values.reshape(-1,CFG.left_ROWS_per_frame,3)
                left_values[:,:,:1] = np.max(left_values[:,:,:1]) - left_values[:,:,:1] 
                left_hand_array =  tf.image.resize(left_values, (CFG.sequence_length, CFG.left_ROWS_per_frame))
            else:
                left_hand_array = tf.zeros(shape=(CFG.sequence_length, CFG.left_ROWS_per_frame,3),dtype=tf.float32)
        
        face_df = video_df[video_df.type=='face']
        face_df.dropna(inplace=True)
        face_df = face_df[['x','y','z']].values
        face_df = face_df.reshape(-1,CFG.face_ROWS_per_frame,3)
        lip_values = face_df[:,LIP,:]
        if len(lip_values) != 0:
            lip_values_array =  tf.image.resize(lip_values, (CFG.sequence_length, CFG.lip_ROWS_per_frame))
        else:
            lip_values_array = tf.zeros(shape=(CFG.sequence_length, CFG.lip_ROWS_per_frame,3),dtype=tf.float32)
        
        return left_hand_array,lip_values_array
    
    def __len__(self):
        return len(self.df)//self.batch_size

In [15]:
train_datagen = CustomData(train_df,num_frames=CFG.sequence_length,batch_size=256)
test_datagen = CustomData(test_df,num_frames=CFG.sequence_length,batch_size=256)

In [37]:
import tensorflow as tf

# define the graph convolution layer
class GraphConv(tf.keras.layers.Layer):
    def __init__(self, units, activation=None, use_bias=True):
        super(GraphConv, self).__init__()
        self.units = units
        self.activation = activation
        self.use_bias = use_bias

    def build(self, input_shape):
        self.kernel = self.add_weight(name='kernel', shape=(input_shape[-1], self.units),
                                      initializer='glorot_uniform', trainable=True)
        if self.use_bias:
            self.bias = self.add_weight(name='bias', shape=(self.units,),
                                        initializer='zeros', trainable=True)

    def call(self, inputs, adj):
        x = tf.matmul(adj, inputs)
        x = tf.matmul(x, self.kernel)
        if self.use_bias:
            x = x + self.bias
        if self.activation is not None:
            x = self.activation(x)
        return x

# define the graph pooling layer
class GraphPool(tf.keras.layers.Layer):
    def __init__(self, activation=None):
        super(GraphPool, self).__init__()
        self.activation = activation

    def call(self, inputs, adj):
        x = tf.matmul(adj, inputs)
        if self.activation is not None:
            x = self.activation(x)
        return x
def build_gcn(input_shape, num_classes):
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.slice(inputs, [0,0,0,0], [-1,20, 61, 2])
    # adjacency matrix
    adj = tf.linalg.diag(tf.ones(shape=(input_shape[1],)))
    adj = tf.expand_dims(adj, axis=0)
    adj = tf.tile(adj, [input_shape[0], 1, 1])
    
    # first GCN layer
    x = GraphConv(units=64, activation=tf.nn.relu)(x, adj)
    
    # second GCN layer
    x = GraphConv(units=128, activation=tf.nn.relu)(x, adj)
    
    # graph pooling layer
    x = GraphPool(activation=tf.nn.relu)(x, adj)
    # Output layer
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(NUM_CLASSES, activation=tf.keras.activations.softmax)(x)
    outputs = x
    # compile the model
    model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [33]:
model = build_gcn((20,61,2),250)

In [34]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 20, 61, 2)]       0         
                                                                 
 tf.slice_3 (TFOpLambda)     (None, 20, 61, 2)         0         
                                                                 
 graph_conv_6 (GraphConv)    (None, 20, 61, 64)        192       
                                                                 
 graph_conv_7 (GraphConv)    (None, 20, 61, 128)       8320      
                                                                 
 graph_pool_3 (GraphPool)    (None, 20, 61, 128)       0         
                                                                 
 global_average_pooling2d_1   (None, 128)              0         
 (GlobalAveragePooling2D)                                        
                                                           

In [19]:
# If True, processing data from scratch
# If False, loads preprocessed data
PREPROCESS_DATA = False
TRAIN_MODEL = True
# True: use 10% of participants as validation set
# False: use all data for training -> gives better LB result
USE_VAL = False
N_ROWS = 543
N_DIMS = 3
DIM_NAMES = ['x', 'y', 'z']
SEED = 42
NUM_CLASSES = 250
INPUT_SIZE = 64
BATCH_ALL_SIGNS_N = 4
BATCH_SIZE = 256
N_EPOCHS = 100
LR_MAX = 1e-3
N_WARMUP_EPOCHS = 0
WD_RATIO = 0.05
MASK_VAL = 4237
N_COLS = 61

In [20]:
# Custom callback to update weight decay with learning rate
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=WD_RATIO):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

In [21]:
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=N_EPOCHS):
    
    if current_step < num_warmup_steps:
        if WARMUP_METHOD == 'log':
            return lr_max * 0.10 ** (num_warmup_steps - current_step)
        else:
            return lr_max * 2 ** -(num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max

In [22]:
# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=N_WARMUP_EPOCHS, lr_max=LR_MAX, num_cycles=0.50) for step in range(N_EPOCHS)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=1)


In [23]:
x_train = np.load('X_train_20x61_left.npy')
y_train = np.load('y_train_20x61_left.npy')
x_test = np.load('X_test_20x61_left.npy')
y_test = np.load('y_test_20x61_left.npy')

In [39]:
tf.keras.backend.clear_session()

# Get new fresh model
model = build_gcn((20,61,3),250)
file_name = 'models/weights_042323_18_47.h5'
#model = tf.keras.models.load_model('models/041423_21_02.h5')

# Sanity Check
model.summary()

# Actual Training
history = model.fit(
        x=x_train,
        y=y_train,
        epochs=100,
        # Only used for validation data since training data is a generator
        batch_size=256,
        validation_data=(x_test,y_test),
        callbacks=[
            tf.keras.callbacks.ModelCheckpoint(
            file_name,
            save_weights_only = True,
            save_best_only=True, 
            monitor="val_accuracy",
            mode="max",
            verbose = 1),
            lr_callback,
            WeightDecayCallback(),
        ],
        verbose = 1,
    )

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 20, 61, 3)]       0         
                                                                 
 tf.slice (TFOpLambda)       (None, 20, 61, 2)         0         
                                                                 
 graph_conv (GraphConv)      (None, 20, 61, 64)        192       
                                                                 
 graph_conv_1 (GraphConv)    (None, 20, 61, 128)       8320      
                                                                 
 graph_pool (GraphPool)      (None, 20, 61, 128)       0         
                                                                 
 global_average_pooling2d (G  (None, 128)              0         
 lobalAveragePooling2D)                                          
                                                             


Epoch 15: LearningRateScheduler setting learning rate to 0.0009524135262330098.
learning rate: 9.52e-04, weight decay: 4.76e-05
Epoch 15/100
Epoch 15: val_accuracy improved from 0.02429 to 0.02461, saving model to models/weights_042323_18_47.h5

Epoch 16: LearningRateScheduler setting learning rate to 0.0009455032620941839.
learning rate: 9.46e-04, weight decay: 4.73e-05
Epoch 16/100
Epoch 16: val_accuracy did not improve from 0.02461

Epoch 17: LearningRateScheduler setting learning rate to 0.0009381533400219318.
learning rate: 9.38e-04, weight decay: 4.69e-05
Epoch 17/100
Epoch 17: val_accuracy improved from 0.02461 to 0.02697, saving model to models/weights_042323_18_47.h5

Epoch 18: LearningRateScheduler setting learning rate to 0.0009303710135019718.
learning rate: 9.30e-04, weight decay: 4.65e-05
Epoch 18/100
Epoch 18: val_accuracy did not improve from 0.02697

Epoch 19: LearningRateScheduler setting learning rate to 0.0009221639627510075.
learning rate: 9.22e-04, weight decay: 


Epoch 34: LearningRateScheduler setting learning rate to 0.0007545207078751857.
learning rate: 7.55e-04, weight decay: 3.77e-05
Epoch 34/100
Epoch 34: val_accuracy improved from 0.03114 to 0.03146, saving model to models/weights_042323_18_47.h5

Epoch 35: LearningRateScheduler setting learning rate to 0.0007408768370508576.
learning rate: 7.41e-04, weight decay: 3.70e-05
Epoch 35/100
Epoch 35: val_accuracy improved from 0.03146 to 0.03216, saving model to models/weights_042323_18_47.h5

Epoch 36: LearningRateScheduler setting learning rate to 0.0007269952498697733.
learning rate: 7.27e-04, weight decay: 3.63e-05
Epoch 36/100
Epoch 36: val_accuracy did not improve from 0.03216

Epoch 37: LearningRateScheduler setting learning rate to 0.0007128896457825364.
learning rate: 7.13e-04, weight decay: 3.56e-05
Epoch 37/100
Epoch 37: val_accuracy did not improve from 0.03216

Epoch 38: LearningRateScheduler setting learning rate to 0.0006985739453173903.
learning rate: 6.99e-04, weight decay: 

Epoch 52: val_accuracy did not improve from 0.03398

Epoch 53: LearningRateScheduler setting learning rate to 0.0004686047402353433.
learning rate: 4.69e-04, weight decay: 2.34e-05
Epoch 53/100
Epoch 53: val_accuracy did not improve from 0.03398

Epoch 54: LearningRateScheduler setting learning rate to 0.00045294584334074284.
learning rate: 4.53e-04, weight decay: 2.26e-05
Epoch 54/100
Epoch 54: val_accuracy did not improve from 0.03398

Epoch 55: LearningRateScheduler setting learning rate to 0.00043733338321784784.
learning rate: 4.37e-04, weight decay: 2.19e-05
Epoch 55/100
Epoch 55: val_accuracy did not improve from 0.03398

Epoch 56: LearningRateScheduler setting learning rate to 0.0004217827674798845.
learning rate: 4.22e-04, weight decay: 2.11e-05
Epoch 56/100
Epoch 56: val_accuracy did not improve from 0.03398

Epoch 57: LearningRateScheduler setting learning rate to 0.0004063093427071376.
learning rate: 4.06e-04, weight decay: 2.03e-05
Epoch 57/100
Epoch 57: val_accuracy impro


Epoch 71: LearningRateScheduler setting learning rate to 0.00020610737385376348.
learning rate: 2.06e-04, weight decay: 1.03e-05
Epoch 71/100
Epoch 71: val_accuracy did not improve from 0.03542

Epoch 72: LearningRateScheduler setting learning rate to 0.00019354647317351188.
learning rate: 1.94e-04, weight decay: 9.68e-06
Epoch 72/100
Epoch 72: val_accuracy did not improve from 0.03542

Epoch 73: LearningRateScheduler setting learning rate to 0.00018128800512565513.
learning rate: 1.81e-04, weight decay: 9.06e-06
Epoch 73/100
Epoch 73: val_accuracy did not improve from 0.03542

Epoch 74: LearningRateScheduler setting learning rate to 0.00016934406733817414.
learning rate: 1.69e-04, weight decay: 8.47e-06
Epoch 74/100
Epoch 74: val_accuracy did not improve from 0.03542

Epoch 75: LearningRateScheduler setting learning rate to 0.00015772644703565563.
learning rate: 1.58e-04, weight decay: 7.89e-06
Epoch 75/100
Epoch 75: val_accuracy did not improve from 0.03542

Epoch 76: LearningRateSc

learning rate: 2.96e-05, weight decay: 1.48e-06
Epoch 90/100
Epoch 90: val_accuracy did not improve from 0.03542

Epoch 91: LearningRateScheduler setting learning rate to 2.4471741852423235e-05.
learning rate: 2.45e-05, weight decay: 1.22e-06
Epoch 91/100
Epoch 91: val_accuracy did not improve from 0.03542

Epoch 92: LearningRateScheduler setting learning rate to 1.985315716152847e-05.
learning rate: 1.99e-05, weight decay: 9.93e-07
Epoch 92/100
Epoch 92: val_accuracy did not improve from 0.03542

Epoch 93: LearningRateScheduler setting learning rate to 1.5708419435684463e-05.
learning rate: 1.57e-05, weight decay: 7.85e-07
Epoch 93/100
Epoch 93: val_accuracy did not improve from 0.03542

Epoch 94: LearningRateScheduler setting learning rate to 1.2041619030626282e-05.
learning rate: 1.20e-05, weight decay: 6.02e-07
Epoch 94/100
Epoch 94: val_accuracy improved from 0.03542 to 0.03553, saving model to models/weights_042323_18_47.h5

Epoch 95: LearningRateScheduler setting learning rate t

In [40]:
y_train[:256].shape

(256,)

In [31]:
test_df.head()

Unnamed: 0,path,participant_id,sequence_id,sign
28332,train_landmark_files/34503/2167338153.parquet,34503,2167338153,zipper
85588,train_landmark_files/4718/633430216.parquet,4718,633430216,finger
5325,train_landmark_files/2044/1220910156.parquet,2044,1220910156,nap
17262,train_landmark_files/26734/1712048579.parquet,26734,1712048579,if
77761,train_landmark_files/37779/4182573073.parquet,37779,4182573073,fine


In [33]:
model = tf.keras.models.load_model('models/042223_20_52.h5')

In [45]:
test_df['prediction'] = None

In [46]:
for index,row in test_df.iterrows():
    label = row['sign']
    left_hand,lips = train_datagen.load_video(row['path'])
    pred = model.predict(np.expand_dims(np.concatenate([left_hand,lips],axis=-2),axis=0),verbose=0)
    test_df.loc[index,'prediction'] = decoder(np.argmax(pred[0], axis=-1))

FileNotFoundError: [Errno 2] No such file or directory: 'yes'

In [173]:
complete_df['path'] = complete_df['path'].str.replace('/kaggle/input/asl-signs/','')

In [183]:
sum(seg_df['sign_x'] == seg_df['prediction'])/(len(seg_df))

0.7631244707874683

In [174]:
combined = test_df.merge(complete_df,on=['path'],how='outer',indicator=True)

In [175]:
len(combined)

94477

In [177]:
seg_df = combined[combined['_merge']=='both']

In [182]:
seg_df['sign_x']

0         zipper
1         finger
2            nap
3             if
4           fine
          ...   
18891    fireman
18892       home
18893      clean
18894      taste
18895       loud
Name: sign_x, Length: 18896, dtype: object

In [188]:
seg_df[seg_df['total_frames']==6]['participant_id_x'].value_counts()

37779    299
30680    196
25571    184
18796    177
53618    162
2044     138
28656    125
16069    111
36257    103
37055     94
4718      93
34503     81
27610     74
62590     72
29302     72
22343     72
55372     69
26734     66
49445     59
32319     50
61333     36
Name: participant_id_x, dtype: int64

In [186]:
seg_df[seg_df['sign_x'] != seg_df['prediction']]['total_frames'].value_counts()[:10]

6     1128
16     103
8      102
7       96
15      90
13      89
9       87
14      83
12      82
17      81
Name: total_frames, dtype: int64

In [28]:
def read_json_file(file_path):
    """Read a JSON file and parse it into a Python object.

    Args:
        file_path (str): The path to the JSON file to read.

    Returns:
        dict: A dictionary object representing the JSON data.
        
    Raises:
        FileNotFoundError: If the specified file path does not exist.
        ValueError: If the specified file path does not contain valid JSON data.
    """
    try:
        # Open the file and load the JSON data into a Python object
        with open(file_path, 'r') as file:
            json_data = json.load(file)
        return json_data
    except FileNotFoundError:
        # Raise an error if the file path does not exist
        raise FileNotFoundError(f"File not found: {file_path}")
    except ValueError:
        # Raise an error if the file does not contain valid JSON data
        raise ValueError(f"Invalid JSON data in file: {file_path}")
p2s_map = {v:k for k,v in read_json_file("sign_to_prediction_index_map.json").items()}
encoder = lambda x: s2p_map.get(x.lower())
decoder = lambda x: p2s_map.get(x)

In [29]:
preds = [decoder(np.argmax(i, axis=-1)) for i in pred]

NameError: name 'pred' is not defined

In [30]:
labels = [decoder(i) for i in y_test]

In [33]:
cnt = 0
for i , j in zip(preds,labels):
    if i == j:
        cnt+=1
print(cnt/len(preds)) 

0.7688356164383562
