# Isolated Sign Language Recognition with STGCN

In this notebook, I will create Sign Language Recognition model using STGCN. To build an efficient training pipeline, I will use TFRecord Dataset from https://www.kaggle.com/datasets/lonnieqin/islr-12-time-steps-tfrecords created by notebook https://www.kaggle.com/code/lonnieqin/islr-create-tfrecord for training.
The ST-GCN model archetecture was adapated from https://github.com/kdkalvik/ST-GCN
It will take about 1 hour to finish runing this notebook using GPU.

## Configuration

In [1]:
class CFG:
    data_path = ""
    tf_record_path = "/kaggle/input/islr-12-time-steps-tfrecords/"
    sequence_length = 12
    rows_per_frame = 543
    is_training = True

## Import Libraries

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm
import json
import time
import os
import math
np.random.seed(16)
tf.random.set_seed(16)

## Utilities

In [3]:
ROWS_PER_FRAME = 543  # number of landmarks per frame

def load_relevant_data_subset_with_imputation(pq_path):
    data_columns = ['x', 'y']
    data = pd.read_parquet(pq_path, columns=data_columns)
    data.replace(np.nan, 0, inplace=True)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float16)

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dic = json.load(f)
    return dic

## Load data

In [4]:
train = pd.read_csv(f"{CFG.data_path}train.csv")
train.head()

Unnamed: 0,path,participant_id,sequence_id,sign
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie


There are 21 participants. Each of them created about 3000 to 5000 training records.

In [5]:
train.participant_id.nunique()

21

In [6]:
train.participant_id.value_counts().plot(kind="bar")

<AxesSubplot: >

There are 94477 training samples in total.

In [8]:
len(train)

94477

There are 250 kinds of sign languages that we need to make prediction on.

In [7]:
label_index = read_dict(f"{CFG.data_path}sign_to_prediction_index_map.json")
index_label = dict([(label_index[key], key) for key in label_index])
print(label_index)
train["label"] = train["sign"].map(lambda sign: label_index[sign])
train.head()

{'TV': 0, 'after': 1, 'airplane': 2, 'all': 3, 'alligator': 4, 'animal': 5, 'another': 6, 'any': 7, 'apple': 8, 'arm': 9, 'aunt': 10, 'awake': 11, 'backyard': 12, 'bad': 13, 'balloon': 14, 'bath': 15, 'because': 16, 'bed': 17, 'bedroom': 18, 'bee': 19, 'before': 20, 'beside': 21, 'better': 22, 'bird': 23, 'black': 24, 'blow': 25, 'blue': 26, 'boat': 27, 'book': 28, 'boy': 29, 'brother': 30, 'brown': 31, 'bug': 32, 'bye': 33, 'callonphone': 34, 'can': 35, 'car': 36, 'carrot': 37, 'cat': 38, 'cereal': 39, 'chair': 40, 'cheek': 41, 'child': 42, 'chin': 43, 'chocolate': 44, 'clean': 45, 'close': 46, 'closet': 47, 'cloud': 48, 'clown': 49, 'cow': 50, 'cowboy': 51, 'cry': 52, 'cut': 53, 'cute': 54, 'dad': 55, 'dance': 56, 'dirty': 57, 'dog': 58, 'doll': 59, 'donkey': 60, 'down': 61, 'drawer': 62, 'drink': 63, 'drop': 64, 'dry': 65, 'dryer': 66, 'duck': 67, 'ear': 68, 'elephant': 69, 'empty': 70, 'every': 71, 'eye': 72, 'face': 73, 'fall': 74, 'farm': 75, 'fast': 76, 'feet': 77, 'find': 78, '

Unnamed: 0,path,participant_id,sequence_id,sign,label
0,train_landmark_files/26734/1000035562.parquet,26734,1000035562,blow,25
1,train_landmark_files/28656/1000106739.parquet,28656,1000106739,wait,232
2,train_landmark_files/16069/100015657.parquet,16069,100015657,cloud,48
3,train_landmark_files/25571/1000210073.parquet,25571,1000210073,bird,23
4,train_landmark_files/62590/1000240708.parquet,62590,1000240708,owie,164


## Create Tensorflow Dataset

In [8]:
def decode_function(record_bytes):
    return tf.io.parse_single_example(
          # Data
          record_bytes,
          # Schema
          {
              "feature": tf.io.FixedLenFeature([12 * 543 * 3], dtype=tf.float32),
              "label": tf.io.FixedLenFeature([], dtype=tf.int64)
          }
      )
def preprocess(item):
    features = item["feature"]
#     features = tf.reshape(features, (1,CFG.sequence_length, 543,3))
    features=tf.reshape(features, (1,12, 543, 3))
#         "face"       : np.arange(0, 468),
#     "left_hand"  : np.arange(468, 489),
#     "pose"       : np.arange(489, 522),
#     "right_hand" : np.arange(522, 543),
    features=tf.transpose(features, perm=[3, 1, 2, 0])
    features1=features[:,-5:,468:489,:]
    features2=features[:,-5:,522:543,:]
    features=tf.concat([features1, features2],2)
    print(features.shape)
    return features, item["label"]         
def make_dataset(file_paths, batch_size=128, mode="train"):
    ds = tf.data.TFRecordDataset(file_paths)
    ds = ds.map(decode_function)
    ds = ds.map(preprocess)
    options = tf.data.Options()
    if mode == "train":
        ds = ds.shuffle(1024)
        options.experimental_deterministic = False
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.with_options(options) 
    ds = ds.cache().prefetch(tf.data.AUTOTUNE)
    return ds

In [11]:
USE_TYPES = ['left_hand', 'pose', 'right_hand']
START_IDX = 468
LIPS_IDXS0 = np.array([
        61, 185, 40, 39, 37, 0, 267, 269, 270, 409,
        291, 146, 91, 181, 84, 17, 314, 405, 321, 375,
        78, 191, 80, 81, 82, 13, 312, 311, 310, 415,
        95, 88, 178, 87, 14, 317, 402, 318, 324, 308,
    ])
# Landmark indices in original data
LEFT_HAND_IDXS0 = np.arange(468,489)
RIGHT_HAND_IDXS0 = np.arange(522,543)
LEFT_POSE_IDXS0 = np.array([502, 504, 506, 508, 510])
RIGHT_POSE_IDXS0 = np.array([503, 505, 507, 509, 511])

In [12]:
x_train = np.load('X_train_20x61_left.npy')
y_train = np.load('y_train_20x61_left.npy')
x_test = np.load('X_test_20x61_left.npy')
y_test = np.load('y_test_20x61_left.npy')

In [16]:
LIPS_reset = np.arange(len(LIPS_IDXS0))

In [17]:
LIPS_reset

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39])

In [24]:
lips_index = []
for link in FACEMESH_LIPS:
    i,j = link
    lips_index.append((21+LIPS_reset[np.where(LIPS_IDXS0 == i)[0][0]],21+LIPS_reset[np.where(LIPS_IDXS0 == j)[0][0]]))

In [13]:
FACEMESH_LIPS = frozenset([(61, 146), (146, 91), (91, 181), (181, 84), (84, 17),
                           (17, 314), (314, 405), (405, 321), (321, 375),
                           (375, 291), (61, 185), (185, 40), (40, 39), (39, 37),
                           (37, 0), (0, 267),
                           (267, 269), (269, 270), (270, 409), (409, 291),
                           (78, 95), (95, 88), (88, 178), (178, 87), (87, 14),
                           (14, 317), (317, 402), (402, 318), (318, 324),
                           (324, 308), (78, 191), (191, 80), (80, 81), (81, 82),
                           (82, 13), (13, 312), (312, 311), (311, 310),
                           (310, 415), (415, 308)])

In [25]:
lips_index

[(29, 30),
 (56, 57),
 (44, 45),
 (33, 34),
 (25, 26),
 (35, 36),
 (28, 29),
 (39, 40),
 (58, 59),
 (47, 48),
 (50, 60),
 (36, 37),
 (21, 32),
 (41, 51),
 (26, 27),
 (45, 46),
 (37, 38),
 (53, 54),
 (27, 28),
 (21, 22),
 (55, 56),
 (52, 53),
 (22, 23),
 (38, 39),
 (46, 47),
 (59, 60),
 (30, 31),
 (32, 33),
 (54, 55),
 (41, 42),
 (51, 52),
 (48, 49),
 (24, 25),
 (23, 24),
 (57, 58),
 (42, 43),
 (43, 44),
 (49, 50),
 (34, 35),
 (40, 31)]

In [26]:
#x_train = x_train[:,:,:21,:]
#x_test = x_test[:,:,:21,:]

x_train = np.transpose(x_train,(0, 3,1, 2))
x_test = np.transpose(x_test,(0,3, 1, 2))
x_train = np.expand_dims(x_train,axis=-1)
x_test = np.expand_dims(x_test,axis=-1)

In [27]:
unique_ids = np.array(sorted(train.participant_id.unique()))

In [28]:
import numpy as np

def edge2mat(link, num_node):
    A = np.zeros((num_node, num_node))
    for i, j in link:
        A[j, i] = 1
    return A


def normalize_digraph(A):  # 除以每列的和
    Dl = np.sum(A, 0)
    h, w = A.shape
    Dn = np.zeros((w, w))
    for i in range(w):
        if Dl[i] > 0:
            Dn[i, i] = Dl[i] ** (-1)
    AD = np.dot(A, Dn)
    return AD


def get_spatial_graph(num_node, self_link, inward, outward):
    I = edge2mat(self_link, num_node)
    In = normalize_digraph(edge2mat(inward, num_node))
    Out = normalize_digraph(edge2mat(outward, num_node))
    A = np.stack((I, In, Out))
    return A


![handlandmark](https://developers.google.com/static/mediapipe/images/solutions/hand-landmarks.png) create the node graph for hand landmarks 

In [34]:
num_node = 61
self_link = [(i, i) for i in range(num_node)]
inward_ori_index = [(1, 2), (2, 3), (3, 4), (4, 5), (1, 6), (6, 7), (7, 8),
                    (8, 9), (6, 10), (10, 11), (11, 12), (12, 13), (10, 14),
                    (14, 15), (15, 16), (16, 17), (14, 18), (18, 19), (19, 20),
                    (20, 21), (18, 1)]
inward_ori_index2=[(1+21, 2+21), (2+21, 3+21), (3+21, 4+21), (4+21, 5+21), (1+21, 6), 
                   (6+21, 7+21), (7+21, 8+21), (8+21, 9+21), (6+21, 10+21), 
                    (10+21, 11+21), (11+21, 12+21), (12+21, 13+21), (10+21, 14+21),
                    (14+21, 15+21), (15+21, 16+21), (16+21, 17+21), (14+21, 18+21), (18+21, 19+21), (19+21, 20+21),
                    (20+21, 21+21), (18+21, 1+21)]
inward_ori_index.extend(lips_index)
inward = [(i - 1, j - 1) for (i, j) in inward_ori_index]
outward = [(j, i) for (i, j) in inward]
neighbor = inward + outward

In [35]:
len(outward)

61

In [36]:

class Graph:
    def __init__(self, labeling_mode='spatial'):
        self.A = self.get_adjacency_matrix(labeling_mode)
        self.num_node = num_node
        self.self_link = self_link
        self.inward = inward
        self.outward = outward
        self.neighbor = neighbor

    def get_adjacency_matrix(self, labeling_mode=None):
        if labeling_mode is None:
            return self.A
        if labeling_mode == 'spatial':
            A = get_spatial_graph(num_node, self_link, inward, outward)
        else:
            raise ValueError()
        return A


## Modeling

In [37]:
REGULARIZER = tf.keras.regularizers.l2(l=0.001)
INITIALIZER = tf.keras.initializers.VarianceScaling(scale=2.,
                                                    mode="fan_out",
                                                    distribution="truncated_normal")
class SGCN(tf.keras.Model):
    def __init__(self, filters, kernel_size=3):
        super().__init__()
        self.kernel_size = kernel_size
        self.conv = tf.keras.layers.Conv2D(filters*kernel_size,
                                           kernel_size=1,
                                           padding='same',
                                           kernel_initializer=INITIALIZER,
                                           data_format='channels_first',
                                           kernel_regularizer=REGULARIZER)

    # N, C, T, V
    def call(self, x, A, training):
        x = self.conv(x)

        N = tf.shape(x)[0]
        C = tf.shape(x)[1]
        T = tf.shape(x)[2]
        V = tf.shape(x)[3]

        x = tf.reshape(x, [N, self.kernel_size, C//self.kernel_size, T, V])
        x = tf.einsum('nkctv,kvw->nctw', x, A)
        return x, A


"""Applies a spatial temporal graph convolution over an input graph sequence.
    Args:
        filters (int): Number of channels produced by the convolution
        kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
        stride (int, optional): Stride of the temporal convolution. Default: 1
        activation (activation function/name, optional): activation function to use
        residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
        downsample (bool, optional): If ``True``, applies a downsampling residual mechanism. Default: ``True``
                                     the value is used only when residual is ``True``
    Shape:
        - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
        - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
        - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
        - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
        where
            :math:`N` is a batch size,
            :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
            :math:`T_{in}/T_{out}` is a length of input/output sequence,
            :math:`V` is the number of graph nodes.
"""
class STGCN(tf.keras.Model):
    def __init__(self, filters, kernel_size=[9, 3], stride=1, activation='relu',
                 residual=True, downsample=False):
        super().__init__()
        self.sgcn = SGCN(filters, kernel_size=kernel_size[1])

        self.tgcn = tf.keras.Sequential()
        self.tgcn.add(tf.keras.layers.BatchNormalization(axis=1))
        self.tgcn.add(tf.keras.layers.Activation(activation))
        self.tgcn.add(tf.keras.layers.Conv2D(filters,
                                                kernel_size=[kernel_size[0], 1],
                                                strides=[stride, 1],
                                                padding='same',
                                                kernel_initializer=INITIALIZER,
                                                data_format='channels_first',
                                                kernel_regularizer=REGULARIZER))
        self.tgcn.add(tf.keras.layers.BatchNormalization(axis=1))

        self.act = tf.keras.layers.Activation(activation)

        if not residual:
            self.residual = lambda x, training=False: 0
        elif residual and stride == 1 and not downsample:
            self.residual = lambda x, training=False: x
        else:
            self.residual = tf.keras.Sequential()
            self.residual.add(tf.keras.layers.Conv2D(filters,
                                                        kernel_size=[1, 1],
                                                        strides=[stride, 1],
                                                        padding='same',
                                                        kernel_initializer=INITIALIZER,
                                                        data_format='channels_first',
                                                        kernel_regularizer=REGULARIZER))
            self.residual.add(tf.keras.layers.BatchNormalization(axis=1))

    def call(self, x, A, training=True):
        res = self.residual(x, training=training)
        x, A = self.sgcn(x, A, training=training)
        x = self.tgcn(x, training=training)
        x += res
        x = self.act(x)
        return x, A


"""Spatial temporal graph convolutional networks.
    Args:
        num_class (int): Number of classes for the classification task
    Shape:(3, 5, 42, 1)
        - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
        - Output: :math:`(N, num_class)` where
            :math:`N` is a batch size,
            :math:`T_{in}` is a length of input sequence,
            :math:`V_{in}` is the number of graph nodes,
            :math:`M_{in}` is the number of instance in a frame.
"""
class Model(tf.keras.Model):
    def __init__(self, num_classes=250):
        super().__init__()

        graph = Graph()
        self.A = tf.Variable(graph.A,
                             dtype=tf.float32,
                             trainable=False,
                             name='adjacency_matrix')

        self.data_bn = tf.keras.layers.BatchNormalization(axis=1)

        self.STGCN_layers = []
        self.STGCN_layers.append(STGCN(64, residual=False))
        #self.STGCN_layers.append(STGCN(64))
        #self.STGCN_layers.append(STGCN(64))
        #self.STGCN_layers.append(STGCN(64))
        self.STGCN_layers.append(STGCN(128, stride=2, downsample=True))
        #self.STGCN_layers.append(STGCN(128))
        #self.STGCN_layers.append(STGCN(128))
        self.STGCN_layers.append(STGCN(256, stride=2, downsample=True))
        #self.STGCN_layers.append(STGCN(256))
        #self.STGCN_layers.append(STGCN(256))

        self.pool = tf.keras.layers.GlobalAveragePooling2D(data_format='channels_first')

        self.logits = tf.keras.layers.Conv2D(num_classes,
                                             kernel_size=1,
                                             padding='same',
                                             kernel_initializer=INITIALIZER,
                                             data_format='channels_first',
                                             kernel_regularizer=REGULARIZER)

    def call(self, x, training):
        N = tf.shape(x)[0]
        C = tf.shape(x)[1]
        T = tf.shape(x)[2]
        V = tf.shape(x)[3]
        M = tf.shape(x)[4]

        x = tf.transpose(x, perm=[0, 4, 3, 1, 2])
        x = tf.reshape(x, [N * M, V * C, T])
        x = self.data_bn(x, training=training)
        x = tf.reshape(x, [N, M, V, C, T])
        x = tf.transpose(x, perm=[0, 1, 3, 4, 2])
        x = tf.reshape(x, [N * M, C, T, V])

        A = self.A
        for layer in self.STGCN_layers:
            x, A = layer(x, A, training=training)

        # N*M,C,T,V
        x = self.pool(x)
        x = tf.reshape(x, [N, M, -1, 1, 1])
        x = tf.reduce_mean(x, axis=1)
        x = self.logits(x)
        x = tf.reshape(x, [N, -1])
        x = tf.nn.softmax(x,axis=-1)
        print(x.shape)
        return x


In [38]:
model =  Model(num_classes=250)

In [39]:
def get_model():
          
    model =  Model(num_classes=250)
    model.build((128,3,20,61,1))
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=[
            "accuracy",
        ]
    )
    return model

In [40]:
model = get_model()
model.summary()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
(128, 250)
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_9 (Batc  multiple                 732       
 hNormalization)                                                 
                                                                 
 stgcn_3 (STGCN)             multiple                  38208     
                                                                 
 stgcn_4 (STGCN)             multiple                  182400    
                                                                 
 stgcn_5 (STGCN)             multiple                  725248    
                                                                 
 global_average_pooling2d_1   multiple       



In [41]:
# If True, processing data from scratch
# If False, loads preprocessed data
PREPROCESS_DATA = False
TRAIN_MODEL = True
# True: use 10% of participants as validation set
# False: use all data for training -> gives better LB result
USE_VAL = False
N_ROWS = 543
N_DIMS = 3
DIM_NAMES = ['x', 'y', 'z']
SEED = 42
NUM_CLASSES = 250
INPUT_SIZE = 64
BATCH_ALL_SIGNS_N = 4
BATCH_SIZE = 256
N_EPOCHS = 100
LR_MAX = 1e-3
N_WARMUP_EPOCHS = 0
WD_RATIO = 0.05
MASK_VAL = 4237
N_COLS = 61
# Custom callback to update weight decay with learning rate
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=WD_RATIO):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=N_EPOCHS):
    
    if current_step < num_warmup_steps:
        if WARMUP_METHOD == 'log':
            return lr_max * 0.10 ** (num_warmup_steps - current_step)
        else:
            return lr_max * 2 ** -(num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max
# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=N_WARMUP_EPOCHS, lr_max=LR_MAX, num_cycles=0.50) for step in range(N_EPOCHS)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=1)


In [42]:
tf.keras.backend.clear_session()

# Get new fresh model
file_name = 'models/weights_stgcn4.h5'
#model = tf.keras.models.load_model('models/041423_21_02.h5')

# Sanity Check
model.summary(expand_nested=True)

# Actual Training
history = model.fit(
        x=x_train,
        y=y_train,
        epochs=100,
        # Only used for validation data since training data is a generator
        batch_size=128,
        validation_data=(x_test,y_test),
        callbacks=[
            tf.keras.callbacks.ModelCheckpoint(
            file_name,
            save_weights_only = True,
            save_best_only=True, 
            monitor="val_accuracy",
            mode="max",
            verbose = 1),
            lr_callback,
            WeightDecayCallback(),
        ],
        verbose = 1,
    )

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization_9 (Batc  multiple                 732       
 hNormalization)                                                 
                                                                 
 stgcn_3 (STGCN)             multiple                  38208     
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| sgcn_3 (SGCN)             multiple                  768       |
||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||
|| conv2d_9 (Conv2D)       multiple                  768       ||
|¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯|
| sequential_5 (Sequential)  (128, 64, 20, 61)        37440     |
||¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯||
|| batch_normalization_10 (Bat  (128, 64, 20, 61)    256       ||
|| chNormalization)                                        

learning rate: 9.96e-04, weight decay: 4.98e-05
Epoch 5/100
Epoch 5: val_accuracy improved from 0.43413 to 0.49283, saving model to models/weights_stgcn4.h5

Epoch 6: LearningRateScheduler setting learning rate to 0.0009938441702975688.
learning rate: 9.94e-04, weight decay: 4.97e-05
Epoch 6/100
Epoch 6: val_accuracy improved from 0.49283 to 0.50615, saving model to models/weights_stgcn4.h5

Epoch 7: LearningRateScheduler setting learning rate to 0.0009911436253643444.
learning rate: 9.91e-04, weight decay: 4.96e-05
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.50615

Epoch 8: LearningRateScheduler setting learning rate to 0.0009879583809693738.
learning rate: 9.88e-04, weight decay: 4.94e-05
Epoch 8/100
Epoch 8: val_accuracy improved from 0.50615 to 0.53542, saving model to models/weights_stgcn4.h5

Epoch 9: LearningRateScheduler setting learning rate to 0.0009842915805643156.
learning rate: 9.84e-04, weight decay: 4.92e-05
Epoch 9/100
Epoch 9: val_accuracy improved from 0.

Epoch 23: val_accuracy improved from 0.63420 to 0.63431, saving model to models/weights_stgcn4.h5

Epoch 24: LearningRateScheduler setting learning rate to 0.0008750555348152298.
learning rate: 8.75e-04, weight decay: 4.38e-05
Epoch 24/100
Epoch 24: val_accuracy improved from 0.63431 to 0.64426, saving model to models/weights_stgcn4.h5

Epoch 25: LearningRateScheduler setting learning rate to 0.0008644843137107057.
learning rate: 8.64e-04, weight decay: 4.32e-05
Epoch 25/100
Epoch 25: val_accuracy did not improve from 0.64426

Epoch 26: LearningRateScheduler setting learning rate to 0.0008535533905932737.
learning rate: 8.54e-04, weight decay: 4.27e-05
Epoch 26/100
Epoch 26: val_accuracy improved from 0.64426 to 0.64796, saving model to models/weights_stgcn4.h5

Epoch 27: LearningRateScheduler setting learning rate to 0.0008422735529643444.
learning rate: 8.42e-04, weight decay: 4.21e-05
Epoch 27/100
Epoch 27: val_accuracy did not improve from 0.64796

Epoch 28: LearningRateScheduler s


Epoch 42: LearningRateScheduler setting learning rate to 0.0006394955530196147.
learning rate: 6.39e-04, weight decay: 3.20e-05
Epoch 42/100
Epoch 42: val_accuracy did not improve from 0.70120

Epoch 43: LearningRateScheduler setting learning rate to 0.0006243449435824276.
learning rate: 6.24e-04, weight decay: 3.12e-05
Epoch 43/100
Epoch 43: val_accuracy improved from 0.70120 to 0.71046, saving model to models/weights_stgcn4.h5

Epoch 44: LearningRateScheduler setting learning rate to 0.0006090716206982714.
learning rate: 6.09e-04, weight decay: 3.05e-05
Epoch 44/100
Epoch 44: val_accuracy did not improve from 0.71046

Epoch 45: LearningRateScheduler setting learning rate to 0.0005936906572928624.
learning rate: 5.94e-04, weight decay: 2.97e-05
Epoch 45/100
Epoch 45: val_accuracy did not improve from 0.71046

Epoch 46: LearningRateScheduler setting learning rate to 0.0005782172325201155.
learning rate: 5.78e-04, weight decay: 2.89e-05
Epoch 46/100
Epoch 46: val_accuracy did not impro


Epoch 61: LearningRateScheduler setting learning rate to 0.00034549150281252633.
learning rate: 3.45e-04, weight decay: 1.73e-05
Epoch 61/100
Epoch 61: val_accuracy did not improve from 0.73721

Epoch 62: LearningRateScheduler setting learning rate to 0.0003306310398773543.
learning rate: 3.31e-04, weight decay: 1.65e-05
Epoch 62/100
Epoch 62: val_accuracy did not improve from 0.73721

Epoch 63: LearningRateScheduler setting learning rate to 0.00031593772365766105.
learning rate: 3.16e-04, weight decay: 1.58e-05
Epoch 63/100
Epoch 63: val_accuracy did not improve from 0.73721

Epoch 64: LearningRateScheduler setting learning rate to 0.00030142605468260977.
learning rate: 3.01e-04, weight decay: 1.51e-05
Epoch 64/100
Epoch 64: val_accuracy improved from 0.73721 to 0.73775, saving model to models/weights_stgcn4.h5

Epoch 65: LearningRateScheduler setting learning rate to 0.00028711035421746366.
learning rate: 2.87e-04, weight decay: 1.44e-05
Epoch 65/100
Epoch 65: val_accuracy improved 

Epoch 79: val_accuracy did not improve from 0.77371

Epoch 80: LearningRateScheduler setting learning rate to 0.00010492249381215479.
learning rate: 1.05e-04, weight decay: 5.25e-06
Epoch 80/100
Epoch 80: val_accuracy did not improve from 0.77371

Epoch 81: LearningRateScheduler setting learning rate to 9.549150281252633e-05.
learning rate: 9.55e-05, weight decay: 4.77e-06
Epoch 81/100
Epoch 81: val_accuracy improved from 0.77371 to 0.77868, saving model to models/weights_stgcn4.h5

Epoch 82: LearningRateScheduler setting learning rate to 8.645971286271903e-05.
learning rate: 8.65e-05, weight decay: 4.32e-06
Epoch 82/100
Epoch 82: val_accuracy improved from 0.77868 to 0.77879, saving model to models/weights_stgcn4.h5

Epoch 83: LearningRateScheduler setting learning rate to 7.783603724899258e-05.
learning rate: 7.78e-05, weight decay: 3.89e-06
Epoch 83/100
Epoch 83: val_accuracy improved from 0.77879 to 0.78168, saving model to models/weights_stgcn4.h5

Epoch 84: LearningRateScheduler 

Epoch 97: val_accuracy did not improve from 0.78574

Epoch 98: LearningRateScheduler setting learning rate to 2.219017698460002e-06.
learning rate: 2.22e-06, weight decay: 1.11e-07
Epoch 98/100
Epoch 98: val_accuracy did not improve from 0.78574

Epoch 99: LearningRateScheduler setting learning rate to 9.866357858642206e-07.
learning rate: 9.87e-07, weight decay: 4.93e-08
Epoch 99/100
Epoch 99: val_accuracy did not improve from 0.78574

Epoch 100: LearningRateScheduler setting learning rate to 2.467198171342e-07.
learning rate: 2.47e-07, weight decay: 1.23e-08
Epoch 100/100
Epoch 100: val_accuracy did not improve from 0.78574


In [46]:
# https://towardsdatascience.com/model-sub-classing-and-custom-training-loop-from-scratch-in-tensorflow-2-cc1d4f10fb4e
optimizer = tf.keras.optimizers.Adam()
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
val_acc_metric   = tf.keras.metrics.CategoricalAccuracy()
# Instantiate a loss function
# loss_fn = tf.keras.losses.CategoricalCrossentropy()
loss_fn=tf.keras.losses.SparseCategoricalCrossentropy()
for epoch in range(30): # <----- start for loop, step 1

  # <-------- start for loop, step 2
  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_ds):

    # <-------- start gradient tape scope, step 3
    # Open a GradientTape to record the operations run
    # during the forward pass, which enables auto-differentiation.
    with tf.GradientTape() as tape:

       # Run the forward pass of the layer.
       # The operations that the layer applies
       # to its inputs are going to be recorded
       # on the GradientTape.
       logits = model(x_batch_train, training=True) 

       # Compute the loss value for this minibatch.
       loss_value = loss_fn(y_batch_train, logits)  
       print(loss_value )

    # compute the gradient of weights w.r.t. loss  <-------- step 5
    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss.
    grads = tape.gradient(loss_value, model.trainable_weights)

    # update the weight based on gradient  <---------- step 6
    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y_batch_train, logits)
    print(train_acc_metric)

NameError: name 'train_ds' is not defined

In [37]:
x_train.shape

(3, 20, 61, 75520, 1)

In [None]:
if CFG.is_training:
    file_name = "model.h5"
#     callbacks = [
#         tf.keras.callbacks.ModelCheckpoint(
#             file_name, 
#             save_best_only=True, 
#             restore_best_weights=True, 
#             monitor="val_accuracy",
#             mode="max"
#         ),
#         tf.keras.callbacks.EarlyStopping(
#             patience=5, 
#             monitor="val_accuracy",
#             mode="max"
#         )
#     ]
    model.fit(train_ds, epochs=1, validation_data=valid_ds)
    model.save('/kaggle/input/islr-convlstm1d/model.h5',save_format='tf')
    model = tf.keras.models.load_model(file_name)
# else:
#     model = tf.keras.models.load_model("/kaggle/input/islr-convlstm1d/model.h5")
model.evaluate(valid_ds)   

## Create Model for inference

In [None]:
# def get_inference_model(model):
#     inputs = tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")
#     vector = tf.image.resize(inputs, (CFG.sequence_length, 543))
#     vector = tf.where(tf.math.is_nan(vector), tf.zeros_like(vector), vector)
#     vector = tf.expand_dims(vector, axis=0)
#     vector = model(vector)
#     output = tf.keras.layers.Activation(activation="linear", name="outputs")(vector)
#     inference_model = tf.keras.Model(inputs=inputs, outputs=output) 
#     inference_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
#     return inference_model

In [None]:
# inference_model = get_inference_model(model)
# inference_model.summary()
# tf.keras.utils.plot_model(inference_model, show_shapes=True)

## Create submission file

In [None]:
# converter = tf.lite.TFLiteConverter.from_keras_model(inference_model)
# tflite_model = converter.convert()
# model_path = "model.tflite"
# # Save the model.
# with open(model_path, 'wb') as f:
#     f.write(tflite_model)
# !zip submission.zip $model_path

## Making Predictions

In [None]:
# !pip install tflite-runtime

In [None]:
# import tflite_runtime.interpreter as tflite
# interpreter = tflite.Interpreter(model_path)
# found_signatures = list(interpreter.get_signature_list().keys())
# prediction_fn = interpreter.get_signature_runner("serving_default")
# for i in tqdm(range(10000)):
#     frames = load_relevant_data_subset(f'/kaggle/input/asl-signs/{train.iloc[i].path}')
#     output = prediction_fn(inputs=frames)
#     if i < 100:
#         sign = np.argmax(output["outputs"])
#         print(f"Predicted label: {index_label[sign]}, Actual Label: {train.iloc[i].sign}")