# Isolated Sign Language Recognition with STGCN

In this notebook, I will create Sign Language Recognition model using STGCN. To build an efficient training pipeline, I will use TFRecord Dataset from https://www.kaggle.com/datasets/lonnieqin/islr-12-time-steps-tfrecords created by notebook https://www.kaggle.com/code/lonnieqin/islr-create-tfrecord for training.
The ST-GCN model archetecture was adapated from https://github.com/kdkalvik/ST-GCN
It will take about 1 hour to finish runing this notebook using GPU.

## Configuration

In [1]:
class CFG:
    data_path = ""
    tf_record_path = "/kaggle/input/islr-12-time-steps-tfrecords/"
    sequence_length = 12
    rows_per_frame = 543
    is_training = True

## Import Libraries

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm
import json
import time
import os
import math
np.random.seed(16)
tf.random.set_seed(16)

## Utilities

In [3]:
ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset_with_imputation(pq_path):
    data_columns = ['x', 'y']
    data = pd.read_parquet(pq_path, columns=data_columns)
    data.replace(np.nan, 0, inplace=True)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float16)

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic

## Load data

In [4]:
train = pd.read_csv(f"{CFG.data_path}train.csv")
train.head()

Unnamed: 0,path,participant_id,sequence_id,sign
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie


There are 21 participants. Each of them created about 3000 to 5000 training records.

In [5]:
train.participant_id.nunique()

21

In [6]:
train.participant_id.value_counts().plot(kind="bar")

<AxesSubplot: >

There are 94477 training samples in total.

In [7]:
len(train)

94477

There are 250 kinds of sign languages that we need to make prediction on.

In [8]:
label_index = read_dict(f"{CFG.data_path}sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
print(label_index)
train["label"] = train["sign"].map(lambda sign: label_index[sign])
train.head()

{'TV': 0, 'after': 1, 'airplane': 2, 'all': 3, 'alligator': 4, 'animal': 5, 'another': 6, 'any': 7, 'apple': 8, 'arm': 9, 'aunt': 10, 'awake': 11, 'backyard': 12, 'bad': 13, 'balloon': 14, 'bath': 15, 'because': 16, 'bed': 17, 'bedroom': 18, 'bee': 19, 'before': 20, 'beside': 21, 'better': 22, 'bird': 23, 'black': 24, 'blow': 25, 'blue': 26, 'boat': 27, 'book': 28, 'boy': 29, 'brother': 30, 'brown': 31, 'bug': 32, 'bye': 33, 'callonphone': 34, 'can': 35, 'car': 36, 'carrot': 37, 'cat': 38, 'cereal': 39, 'chair': 40, 'cheek': 41, 'child': 42, 'chin': 43, 'chocolate': 44, 'clean': 45, 'close': 46, 'closet': 47, 'cloud': 48, 'clown': 49, 'cow': 50, 'cowboy': 51, 'cry': 52, 'cut': 53, 'cute': 54, 'dad': 55, 'dance': 56, 'dirty': 57, 'dog': 58, 'doll': 59, 'donkey': 60, 'down': 61, 'drawer': 62, 'drink': 63, 'drop': 64, 'dry': 65, 'dryer': 66, 'duck': 67, 'ear': 68, 'elephant': 69, 'empty': 70, 'every': 71, 'eye': 72, 'face': 73, 'fall': 74, 'farm': 75, 'fast': 76, 'feet': 77, 'find': 78, '

Unnamed: 0,path,participant_id,sequence_id,sign,label
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,25
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,232
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,48
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,23
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,164


## Create Tensorflow Dataset

In [10]:
def decode_function(record_bytes):
    return tf.io.parse_single_example(
          # Data
          record_bytes,
          # Schema
          {
              "feature": tf.io.FixedLenFeature([12 * 543 * 3], dtype=tf.float32),
              "label": tf.io.FixedLenFeature([], dtype=tf.int64)
          }
      )
def preprocess(item):
    features = item["feature"]
#     features = tf.reshape(features, (1,CFG.sequence_length, 543,3))
    features=tf.reshape(features, (1,12, 543, 3))
#         "face"       : np.arange(0, 468),
#     "left_hand"  : np.arange(468, 489),
#     "pose"       : np.arange(489, 522),
#     "right_hand" : np.arange(522, 543),
    features=tf.transpose(features, perm=[3, 1, 2, 0])
    features1=features[:,-5:,468:489,:]
    features2=features[:,-5:,522:543,:]
    features=tf.concat([features1, features2],2)
    print(features.shape)
    return features, item["label"]         
def make_dataset(file_paths, batch_size=128, mode="train"):
    ds = tf.data.TFRecordDataset(file_paths)
    ds = ds.map(decode_function)
    ds = ds.map(preprocess)
    options = tf.data.Options()
    if mode == "train":
        ds = ds.shuffle(1024)
        options.experimental_deterministic = False
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.with_options(options) 
    ds = ds.cache().prefetch(tf.data.AUTOTUNE)
    return ds

In [11]:
USE_TYPES = ['left_hand', 'pose', 'right_hand']
START_IDX = 468
LIPS_IDXS0 = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    ])
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511])

In [12]:
x_train = np.load('X_train_20x61_left.npy')
y_train = np.load('y_train_20x61_left.npy')
x_test = np.load('X_test_20x61_left.npy')
y_test = np.load('y_test_20x61_left.npy')

In [13]:
LIPS_reset = np.arange(len(LIPS_IDXS0))

In [14]:
LIPS_reset

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39])

In [15]:
FACEMESH_LIPS = frozenset([(61, 146), (146, 91), (91, 181), (181, 84), (84, 17),
                           (17, 314), (314, 405), (405, 321), (321, 375),
                           (375, 291), (61, 185), (185, 40), (40, 39), (39, 37),
                           (37, 0), (0, 267),
                           (267, 269), (269, 270), (270, 409), (409, 291),
                           (78, 95), (95, 88), (88, 178), (178, 87), (87, 14),
                           (14, 317), (317, 402), (402, 318), (318, 324),
                           (324, 308), (78, 191), (191, 80), (80, 81), (81, 82),
                           (82, 13), (13, 312), (312, 311), (311, 310),
                           (310, 415), (415, 308)])

In [16]:
lips_index = []
for link in FACEMESH_LIPS:
    i,j = link
    lips_index.append((21+LIPS_reset[np.where(LIPS_IDXS0 == i)[0][0]],21+LIPS_reset[np.where(LIPS_IDXS0 == j)[0][0]]))

In [17]:
x_train = x_train[:,:,:,:2]
x_test = x_test[:,:,:,:2]
x_train = np.transpose(x_train,(0, 3,1, 2))
x_test = np.transpose(x_test,(0,3, 1, 2))
#x_train = np.expand_dims(x_train,axis=-1)
#x_test = np.expand_dims(x_test,axis=-1)

In [18]:
unique_ids = np.array(sorted(train.participant_id.unique()))

In [19]:
import numpy as np

def edge2mat(link, num_node):
    A = np.zeros((num_node, num_node))
    for i, j in link:
        A[j, i] = 1
    return A


def normalize_digraph(A):  # 除以每列的和
    Dl = np.sum(A, 0)
    h, w = A.shape
    Dn = np.zeros((w, w))
    for i in range(w):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i] ** (-1)
    AD = np.dot(A, Dn)
    return AD


def get_spatial_graph(num_node, self_link, inward, outward):
    I = edge2mat(self_link, num_node)
    In = normalize_digraph(edge2mat(inward, num_node))
    Out = normalize_digraph(edge2mat(outward, num_node))
    A = np.stack((I, In, Out))
    return A


![handlandmark](https://developers.google.com/static/mediapipe/images/solutions/hand-landmarks.png) create the node graph for hand landmarks 

In [20]:
num_node = 61
self_link = [(i, i) for i in range(num_node)]
inward_ori_index = [(1, 2), (2, 3), (3, 4), (4, 5), (1, 6), (6, 7), (7, 8),
                    (8, 9), (6, 10), (10, 11), (11, 12), (12, 13), (10, 14),
                    (14, 15), (15, 16), (16, 17), (14, 18), (18, 19), (19, 20),
                    (20, 21), (18, 1)]
inward_ori_index2=[(1+21, 2+21), (2+21, 3+21), (3+21, 4+21), (4+21, 5+21), (1+21, 6), 
                   (6+21, 7+21), (7+21, 8+21), (8+21, 9+21), (6+21, 10+21), 
                    (10+21, 11+21), (11+21, 12+21), (12+21, 13+21), (10+21, 14+21),
                    (14+21, 15+21), (15+21, 16+21), (16+21, 17+21), (14+21, 18+21), (18+21, 19+21), (19+21, 20+21),
                    (20+21, 21+21), (18+21, 1+21)]
inward_ori_index.extend(lips_index)
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
outward = [(j, i) for (i, j) in inward]
neighbor = inward + outward

In [21]:
len(outward)

61

In [22]:

class Graph:
    def __init__(self, labeling_mode='spatial'):
        self.A = self.get_adjacency_matrix(labeling_mode)
        self.num_node = num_node
        self.self_link = self_link
        self.inward = inward
        self.outward = outward
        self.neighbor = neighbor

    def get_adjacency_matrix(self, labeling_mode=None):
        if labeling_mode is None:
            return self.A
        if labeling_mode == 'spatial':
            A = get_spatial_graph(num_node, self_link, inward, outward)
        else:
            raise ValueError()
        return A


## Modeling

In [77]:
REGULARIZER = tf.keras.regularizers.l2(l=0.001)
INITIALIZER = tf.keras.initializers.VarianceScaling(scale=2.,
                                                    mode="fan_out",
                                                    distribution="truncated_normal")
def SGCN(old_filters,filters,kernel_size,INITIALIZER,REGULARIZER,graph_A):
    #(1, 2, 20, 61) (3, 61, 61)
    
    input_tensor = tf.keras.layers.Input(shape=[old_filters,None,61], dtype=tf.float32)
    A = tf.Variable(graph_A, dtype=tf.float32, trainable=False, name='adjacency_matrix')
    conv = tf.keras.layers.Conv2D(filters*kernel_size,
                                  kernel_size=1,
                                  padding='same',
                                  kernel_initializer=INITIALIZER,
                                  data_format='channels_first',
                                  kernel_regularizer=REGULARIZER)(input_tensor)

    N = tf.shape(conv)[0]
    C = tf.shape(conv)[1]
    T = tf.shape(conv)[2]
    V = tf.shape(conv)[3]
    x = tf.reshape(conv,[N,kernel_size, C//kernel_size, T, V])

    x = tf.keras.layers.Lambda(lambda y: tf.einsum('nkctv,kvw->nctw', y[0], y[1]))([x, A])
    model = tf.keras.Model(inputs=input_tensor, outputs=x, name='SGCN')
    return model


#(1, 2, 20, 61) (3, 61, 61)
def STGCN(old_filters=2,filters=64, kernel_size=[9, 3], stride=1, activation='relu', residual=True, downsample=False,\
          INITIALIZER=INITIALIZER,REGULARIZER=REGULARIZER,graph_A=graph_A):
    input_tensor = tf.keras.layers.Input(shape=[old_filters, None, 61])

    sgcn_output = SGCN(old_filters,filters, kernel_size=kernel_size[1],INITIALIZER=INITIALIZER,\
                                     REGULARIZER=REGULARIZER,graph_A=graph_A)(input_tensor)
    tgcn_output = tf.keras.layers.BatchNormalization(axis=1)(sgcn_output)
    tgcn_output = tf.keras.layers.Activation(activation)(tgcn_output)
    tgcn_output = tf.keras.layers.Conv2D(filters, kernel_size=[kernel_size[0], 1], strides=[stride, 1], padding='same',
                                          kernel_initializer=INITIALIZER, data_format='channels_first', \
                                         kernel_regularizer=REGULARIZER)(tgcn_output)
    tgcn_output = tf.keras.layers.BatchNormalization(axis=1)(tgcn_output)
    if not residual:
        res = tf.zeros_like(tgcn_output)
    elif residual and stride != 1 or downsample:
        res = tf.keras.layers.Conv2D(filters, kernel_size=[1, 1], strides=[stride, 1], padding='same', kernel_initializer=INITIALIZER,
                                   data_format='channels_first', kernel_regularizer=REGULARIZER)(input_tensor)
        res = tf.keras.layers.BatchNormalization(axis=1)(res)
    else:
        res = input_tensor
    x = tf.keras.layers.add([tgcn_output, res])
    x = tf.keras.layers.Activation(activation)(x)
    model = tf.keras.models.Model(inputs=input_tensor, outputs=x)
    return model

def MainModel(num_classes=250,graph_A=graph_A):
    #(N, in_channels, T_{in}, V_{in}, M_{in})
    #graph = Graph()
    input_tensor = tf.keras.layers.Input(shape=[2, 20, 61,1],dtype=tf.float32)
    
    x = tf.keras.layers.BatchNormalization(axis=1, input_shape=(2, 20, 61))(input_tensor)
    x = STGCN(old_filters=2,filters=64, residual=False,graph_A=graph_A)(x)
    x = STGCN(old_filters=64,filters=128, stride=2, downsample=True,graph_A=graph_A)(x)
    x = STGCN(old_filters=128,filters=256, stride=2, downsample=True,graph_A=graph_A)(x)
    x = tf.keras.layers.GlobalAveragePooling2D(data_format='channels_first')(x)
    x = tf.keras.layers.Reshape((1,256,1,1))(x)
    x = tf.keras.layers.Conv2D(num_classes,
                           kernel_size=1,
                           padding='same',
                           kernel_initializer=INITIALIZER,
                           data_format='channels_first',
                           kernel_regularizer=REGULARIZER)(x)
    x = tf.keras.layers.Reshape((-1,))(x)
    x = tf.keras.layers.Softmax(axis=-1)(x)
    model = tf.keras.models.Model(inputs=input_tensor, outputs=x)
    return model


In [78]:
tf.keras.backend.clear_session()
graph_A = graph.A
model = MainModel(graph_A=graph_A)
model.summary()
model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[
            "accuracy",
        ]
    )

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2, 20, 61, 1)]    0         
                                                                 
 batch_normalization (BatchN  (None, 2, 20, 61, 1)     8         
 ormalization)                                                   
                                                                 
 model (Functional)          (None, 64, None, 61)      38016     
                                                                 
 model_1 (Functional)        (None, 128, None, 61)     182400    
                                                                 
 model_2 (Functional)        (None, 256, None, 61)     725248    
                                                                 
 global_average_pooling2d (G  (None, 256)              0         
 lobalAveragePooling2D)                                    

In [79]:
# If True, processing data from scratch
# If False, loads preprocessed data
PREPROCESS_DATA = False
TRAIN_MODEL = True
# True: use 10% of participants as validation set
# False: use all data for training -> gives better LB result
USE_VAL = False
N_ROWS = 543
N_DIMS = 3
DIM_NAMES = ['x', 'y', 'z']
SEED = 42
NUM_CLASSES = 250
INPUT_SIZE = 64
BATCH_ALL_SIGNS_N = 4
BATCH_SIZE = 256
N_EPOCHS = 100
LR_MAX = 1e-3
N_WARMUP_EPOCHS = 0
WD_RATIO = 0.05
MASK_VAL = 4237
N_COLS = 61
# Custom callback to update weight decay with learning rate
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=WD_RATIO):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=N_EPOCHS):
    
    if current_step < num_warmup_steps:
        if WARMUP_METHOD == 'log':
            return lr_max * 0.10 ** (num_warmup_steps - current_step)
        else:
            return lr_max * 2 ** -(num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max
# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=N_WARMUP_EPOCHS, lr_max=LR_MAX, num_cycles=0.50) for step in range(N_EPOCHS)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=1)


In [80]:
tf.keras.backend.clear_session()

# Get new fresh model
file_name = 'models/weights_stgcn6.h5'
#model = tf.keras.models.load_model('models/041423_21_02.h5')
model.summary()

# Actual Training
history = model.fit(
        x=x_train,
        y=y_train,
        epochs=100,
        # Only used for validation data since training data is a generator
        batch_size=128,
        validation_data=(x_test,y_test),
        callbacks=[
            tf.keras.callbacks.ModelCheckpoint(
            file_name,
            save_weights_only = True,
            save_best_only=True, 
            monitor="val_accuracy",
            mode="max",
            verbose = 1),
            lr_callback,
            WeightDecayCallback(),
        ],
        verbose = 1,
    )

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2, 20, 61, 1)]    0         
                                                                 
 batch_normalization (BatchN  (None, 2, 20, 61, 1)     8         
 ormalization)                                                   
                                                                 
 model (Functional)          (None, 64, None, 61)      38016     
                                                                 
 model_1 (Functional)        (None, 128, None, 61)     182400    
                                                                 
 model_2 (Functional)        (None, 256, None, 61)     725248    
                                                                 
 global_average_pooling2d (G  (None, 256)              0         
 lobalAveragePooling2D)                                    


Epoch 33: LearningRateScheduler setting learning rate to 0.0007679133974894983.
learning rate: 7.68e-04, weight decay: 3.84e-05
Epoch 33/100
Epoch 33: val_accuracy improved from 0.64876 to 0.66160, saving model to models/weights_stgcn6.h5

Epoch 34: LearningRateScheduler setting learning rate to 0.0007545207078751857.
learning rate: 7.55e-04, weight decay: 3.77e-05
Epoch 34/100
Epoch 34: val_accuracy did not improve from 0.66160

Epoch 35: LearningRateScheduler setting learning rate to 0.0007408768370508576.
learning rate: 7.41e-04, weight decay: 3.70e-05
Epoch 35/100
Epoch 35: val_accuracy improved from 0.66160 to 0.66165, saving model to models/weights_stgcn6.h5

Epoch 36: LearningRateScheduler setting learning rate to 0.0007269952498697733.
learning rate: 7.27e-04, weight decay: 3.63e-05
Epoch 36/100
Epoch 36: val_accuracy did not improve from 0.66165

Epoch 37: LearningRateScheduler setting learning rate to 0.0007128896457825364.
learning rate: 7.13e-04, weight decay: 3.56e-05
Epo


Epoch 52: LearningRateScheduler setting learning rate to 0.00048429462046093585.
learning rate: 4.84e-04, weight decay: 2.42e-05
Epoch 52/100
Epoch 52: val_accuracy improved from 0.69628 to 0.71110, saving model to models/weights_stgcn6.h5

Epoch 53: LearningRateScheduler setting learning rate to 0.0004686047402353433.
learning rate: 4.69e-04, weight decay: 2.34e-05
Epoch 53/100
Epoch 53: val_accuracy improved from 0.71110 to 0.71597, saving model to models/weights_stgcn6.h5

Epoch 54: LearningRateScheduler setting learning rate to 0.00045294584334074284.
learning rate: 4.53e-04, weight decay: 2.26e-05
Epoch 54/100
Epoch 54: val_accuracy did not improve from 0.71597

Epoch 55: LearningRateScheduler setting learning rate to 0.00043733338321784784.
learning rate: 4.37e-04, weight decay: 2.19e-05
Epoch 55/100
Epoch 55: val_accuracy did not improve from 0.71597

Epoch 56: LearningRateScheduler setting learning rate to 0.0004217827674798845.
learning rate: 4.22e-04, weight decay: 2.11e-05



Epoch 70: LearningRateScheduler setting learning rate to 0.0002189583110739348.
learning rate: 2.19e-04, weight decay: 1.09e-05
Epoch 70/100
Epoch 70: val_accuracy improved from 0.75594 to 0.76156, saving model to models/weights_stgcn6.h5

Epoch 71: LearningRateScheduler setting learning rate to 0.00020610737385376348.
learning rate: 2.06e-04, weight decay: 1.03e-05
Epoch 71/100
Epoch 71: val_accuracy did not improve from 0.76156

Epoch 72: LearningRateScheduler setting learning rate to 0.00019354647317351188.
learning rate: 1.94e-04, weight decay: 9.68e-06
Epoch 72/100
Epoch 72: val_accuracy improved from 0.76156 to 0.76215, saving model to models/weights_stgcn6.h5

Epoch 73: LearningRateScheduler setting learning rate to 0.00018128800512565513.
learning rate: 1.81e-04, weight decay: 9.06e-06
Epoch 73/100
Epoch 73: val_accuracy did not improve from 0.76215

Epoch 74: LearningRateScheduler setting learning rate to 0.00016934406733817414.
learning rate: 1.69e-04, weight decay: 8.47e-06


Epoch 88: LearningRateScheduler setting learning rate to 4.112268715800943e-05.
learning rate: 4.11e-05, weight decay: 2.06e-06
Epoch 88/100
Epoch 88: val_accuracy improved from 0.77654 to 0.77724, saving model to models/weights_stgcn6.h5

Epoch 89: LearningRateScheduler setting learning rate to 3.5111757055874326e-05.
learning rate: 3.51e-05, weight decay: 1.76e-06
Epoch 89/100
Epoch 89: val_accuracy improved from 0.77724 to 0.77740, saving model to models/weights_stgcn6.h5

Epoch 90: LearningRateScheduler setting learning rate to 2.9559615522887274e-05.
learning rate: 2.96e-05, weight decay: 1.48e-06
Epoch 90/100
Epoch 90: val_accuracy improved from 0.77740 to 0.77959, saving model to models/weights_stgcn6.h5

Epoch 91: LearningRateScheduler setting learning rate to 2.4471741852423235e-05.
learning rate: 2.45e-05, weight decay: 1.22e-06
Epoch 91/100
Epoch 91: val_accuracy did not improve from 0.77959

Epoch 92: LearningRateScheduler setting learning rate to 1.985315716152847e-05.
le

In [46]:
x_train.shape

(75520, 2, 20, 61)

In [31]:
model.load_weights('models/weights_stgcn5.h5')

In [32]:
preds = model.predict(x_test)

(32, 250)


In [35]:
def read_json_file(file_path):
    """Read a JSON file and parse it into a Python object.

    Args:
        file_path (str): The path to the JSON file to read.

    Returns:
        dict: A dictionary object representing the JSON data.
        
    Raises:
        FileNotFoundError: If the specified file path does not exist.
        ValueError: If the specified file path does not contain valid JSON data.
    """
    try:
        # Open the file and load the JSON data into a Python object
        with open(file_path, 'r') as file:
            json_data = json.load(file)
        return json_data
    except FileNotFoundError:
        # Raise an error if the file path does not exist
        raise FileNotFoundError(f"File not found: {file_path}")
    except ValueError:
        # Raise an error if the file does not contain valid JSON data
        raise ValueError(f"Invalid JSON data in file: {file_path}")
p2s_map = {v:k for k,v in read_json_file(os.path.join('', "sign_to_prediction_index_map.json")).items()}
encoder = lambda x: s2p_map.get(x.lower())
decoder = lambda x: p2s_map.get(x)

In [49]:
cnt = 0
for i , j in zip(preds,y_test):
    i = np.argmax(i, axis=-1)
    if i == j:
        cnt+=1
print(cnt/len(preds))

0.7854773116438356


In [None]:
complete_df = pd.read_csv('/kaggle/input/asl-signs/train.csv')
complete_df = complete_df[complete_df['participant_id']!='37055']
from sklearn.model_selection import train_test_split
y = complete_df['sign']
train_df, test_df = train_test_split(complete_df, test_size=0.2,stratify=y)
all_preds = []
all_labels = []
for index,row in test_df.iterrows():
    demo_output = tflite_keras_model(load_relevant_data_subset('/kaggle/input/asl-signs/'+row['path']))["outputs"]
    all_preds.append(decoder(np.argmax(demo_output.numpy(), axis=-1)))
    all_labels.append(row['sign'])

In [46]:
# https://towardsdatascience.com/model-sub-classing-and-custom-training-loop-from-scratch-in-tensorflow-2-cc1d4f10fb4e
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric   = tf.keras.metrics.CategoricalAccuracy()
# Instantiate a loss function
# loss_fn = tf.keras.losses.CategoricalCrossentropy()
loss_fn=tf.keras.losses.SparseCategoricalCrossentropy()
for epoch in range(30): # <----- start for loop, step 1

  # <-------- start for loop, step 2
  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_ds):

    # <-------- start gradient tape scope, step 3
    # Open a GradientTape to record the operations run
    # during the forward pass, which enables auto-differentiation.
    with tf.GradientTape() as tape:

       # Run the forward pass of the layer.
       # The operations that the layer applies
       # to its inputs are going to be recorded
       # on the GradientTape.
       logits = model(x_batch_train, training=True) 

       # Compute the loss value for this minibatch.
       loss_value = loss_fn(y_batch_train, logits)  
       print(loss_value )

    # compute the gradient of weights w.r.t. loss  <-------- step 5
    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss.
    grads = tape.gradient(loss_value, model.trainable_weights)

    # update the weight based on gradient  <---------- step 6
    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y_batch_train, logits)
    print(train_acc_metric)

NameError: name 'train_ds' is not defined

In [37]:
x_train.shape

(3, 20, 61, 75520, 1)

In [None]:
if CFG.is_training:
    file_name = "model.h5"
#     callbacks = [
#         tf.keras.callbacks.ModelCheckpoint(
#             file_name, 
#             save_best_only=True, 
#             restore_best_weights=True, 
#             monitor="val_accuracy",
#             mode="max"
#         ),
#         tf.keras.callbacks.EarlyStopping(
#             patience=5, 
#             monitor="val_accuracy",
#             mode="max"
#         )
#     ]
    model.fit(train_ds, epochs=1, validation_data=valid_ds)
    model.save('/kaggle/input/islr-convlstm1d/model.h5',save_format='tf')
    model = tf.keras.models.load_model(file_name)
# else:
#     model = tf.keras.models.load_model("/kaggle/input/islr-convlstm1d/model.h5")
model.evaluate(valid_ds)   

## Create Model for inference

In [None]:
# def get_inference_model(model):
#     inputs = tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")
#     vector = tf.image.resize(inputs, (CFG.sequence_length, 543))
#     vector = tf.where(tf.math.is_nan(vector), tf.zeros_like(vector), vector)
#     vector = tf.expand_dims(vector, axis=0)
#     vector = model(vector)
#     output = tf.keras.layers.Activation(activation="linear", name="outputs")(vector)
#     inference_model = tf.keras.Model(inputs=inputs, outputs=output) 
#     inference_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
#     return inference_model

In [None]:
# inference_model = get_inference_model(model)
# inference_model.summary()
# tf.keras.utils.plot_model(inference_model, show_shapes=True)

## Create submission file

In [None]:
# converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
# tflite_model = converter.convert()
# model_path = "model.tflite"
# # Save the model.
# with open(model_path, 'wb') as f:
#     f.write(tflite_model)
# !zip submission.zip $model_path

## Making Predictions

In [None]:
# !pip install tflite-runtime

In [None]:
# import tflite_runtime.interpreter as tflite
# interpreter = tflite.Interpreter(model_path)
# found_signatures = list(interpreter.get_signature_list().keys())
# prediction_fn = interpreter.get_signature_runner("serving_default")
# for i in tqdm(range(10000)):
#     frames = load_relevant_data_subset(f'/kaggle/input/asl-signs/{train.iloc[i].path}')
#     output = prediction_fn(inputs=frames)
#     if i < 100:
#         sign = np.argmax(output["outputs"])
#         print(f"Predicted label: {index_label[sign]}, Actual Label: {train.iloc[i].sign}")