In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedGroupKFold, GroupKFold, KFold
import gc
# !pip install ipywidgets
from tqdm.auto import tqdm
import Levenshtein
import time
import os

In [70]:
DEBUG = True #False for submission
N = -1 #-1 for all samples
MODEL_PATH=['model/aslfr-fp16-192d-17l-ctcattjoint-seed42-foldall-last.h5']

In [5]:
class CFG:
    seed = 42
    n_splits = 5

In [6]:
import json
with open('character_to_prediction_index.json') as json_file:
    CHAR_TO_NUM = json.load(json_file)
NUM_TO_CHAR = dict([(y+1,x) for x,y in CHAR_TO_NUM.items()] )
NUM_TO_CHAR[60] = 'S'
NUM_TO_CHAR[61] = 'E'
NUM_TO_CHAR[0] = 'P'

In [7]:
CHAR_TO_NUM

{' ': 0,
 '!': 1,
 '#': 2,
 '$': 3,
 '%': 4,
 '&': 5,
 "'": 6,
 '(': 7,
 ')': 8,
 '*': 9,
 '+': 10,
 ',': 11,
 '-': 12,
 '.': 13,
 '/': 14,
 '0': 15,
 '1': 16,
 '2': 17,
 '3': 18,
 '4': 19,
 '5': 20,
 '6': 21,
 '7': 22,
 '8': 23,
 '9': 24,
 ':': 25,
 ';': 26,
 '=': 27,
 '?': 28,
 '@': 29,
 '[': 30,
 '_': 31,
 'a': 32,
 'b': 33,
 'c': 34,
 'd': 35,
 'e': 36,
 'f': 37,
 'g': 38,
 'h': 39,
 'i': 40,
 'j': 41,
 'k': 42,
 'l': 43,
 'm': 44,
 'n': 45,
 'o': 46,
 'p': 47,
 'q': 48,
 'r': 49,
 's': 50,
 't': 51,
 'u': 52,
 'v': 53,
 'w': 54,
 'x': 55,
 'y': 56,
 'z': 57,
 '~': 58}

In [8]:
#for the lip_lr function. LEFT[i] is matching with RIGHT[i](i.e LEFT[i](x) == -RIGHT[i](x)).
#computed from https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_geometry/data/canonical_face_model.obj

LEFT = [
         248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264,
         265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281,
         282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
         299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
         316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332,
         333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,
         350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366,
         367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383,
         384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400,
         401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417,
         418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
         435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451,
         452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,  #LFACE
         468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, #LHAND
         493, 494, 495, 497, 499, 501, 503, 505, 507, 509, 511, 513, #LPOSE
         515, 517, 519, 521, #LLEG
         ]

RIGHT = [
         3, 7, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
         39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
         60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
         81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102,
         103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
         121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
         139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 153, 154, 155, 156, 157, 158,
         159, 160, 161, 162, 163, 165, 166, 167, 169, 170, 171, 172, 173, 174, 176, 177, 178, 179,
         180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 196, 198, 201,
         202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
         220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
         238, 239, 240, 241, 242, 243, 244, 245, 246, 247, #RFACE
        522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, #RHAND
        490, 491, 492, 496, 498, 500, 502, 504, 506, 508, 510, 512, #RPOSE
        514, 516, 518, 520, #RLEG
        ]

CENTRE = [
          0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 94, 151, 152, 164, 168, 175, 195, 197, 199, 200, #FACE
          489, #POSE
          ]

print(len(LEFT+RIGHT+CENTRE))

543


In [9]:
ROWS_PER_FRAME = 543
MAX_LEN = 768
CROP_LEN = MAX_LEN
NUM_CLASSES  = len(NUM_TO_CHAR.values()) #62
PAD = -100.

LHAND = np.arange(468, 489).tolist()
RHAND = np.arange(522, 543).tolist()
POINT_LANDMARKS = list(range(543))

NUM_NODES = len(POINT_LANDMARKS)
CHANNELS = 3*NUM_NODES

print(NUM_NODES)
print(CHANNELS)

def interp1d_(x, target_len, method='random'):
    length = tf.shape(x)[1]
    target_len = tf.maximum(1,target_len)
    if method == 'random':
        if tf.random.uniform(()) < 0.33:
            x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'bilinear')
        else:
            if tf.random.uniform(()) < 0.5:
                x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'bicubic')
            else:
                x = tf.image.resize(x, (target_len,tf.shape(x)[1]),'nearest')
    else:
        x = tf.image.resize(x, (target_len,tf.shape(x)[1]),method)
    return x

def tf_nan_mean(x, axis=0, keepdims=False):
    return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis, keepdims=keepdims) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis, keepdims=keepdims)

def tf_nan_std(x, center=None, axis=0, keepdims=False):
    if center is None:
        center = tf_nan_mean(x, axis=axis,  keepdims=True)
    d = x - center
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis, keepdims=keepdims))

def filter_nans_tf(x, ref_point=POINT_LANDMARKS):
    print("INSIDE FILTER",x.shape)
    mask = tf.math.logical_not(tf.reduce_all(tf.math.is_nan(tf.gather(x,ref_point,axis=1)), axis=[-2,-1]))
    x = tf.boolean_mask(x, mask, axis=0)
    return x

def is_left_handed(x, left=LHAND, right=RHAND):
    lhand = tf.gather(x, left, axis=1)
    rhand = tf.gather(x, right, axis=1)
    lhand_nans = tf.reduce_sum(tf.cast(tf.math.is_nan(lhand), tf.int32))
    rhand_nans = tf.reduce_sum(tf.cast(tf.math.is_nan(rhand), tf.int32))
    return lhand_nans < rhand_nans

def flip_lr(x):
    x,y,z = tf.unstack(x, axis=-1)
    x = 1-x
    new_x = tf.stack([x,y,z], -1)
    new_x = tf.transpose(new_x, [1,0,2])
    l_x = tf.gather(new_x, LEFT, axis=0)
    r_x = tf.gather(new_x, RIGHT, axis=0)
    c_x = tf.gather(new_x, CENTRE, axis=0)
#     new_x = tf.tensor_scatter_nd_update(new_x, tf.constant(left)[...,None], r_x) <-weird behavior in tflite!!!:(
#     new_x = tf.tensor_scatter_nd_update(new_x, tf.constant(right)[...,None], l_x)
    new_xr = tf.scatter_nd(tf.constant(LEFT)[...,None], r_x, tf.shape(new_x))
    new_xl = tf.scatter_nd(tf.constant(RIGHT)[...,None], l_x, tf.shape(new_x))
    new_xc = tf.scatter_nd(tf.constant(CENTRE)[...,None], c_x, tf.shape(new_x))
    new_x = new_xr + new_xl + new_xc
    new_x = tf.transpose(new_x, [1,0,2])
    return new_x

class Preprocess(tf.keras.layers.Layer):
    def __init__(self, max_len=MAX_LEN, point_landmarks=POINT_LANDMARKS, **kwargs):
        super().__init__(**kwargs)
        self.max_len = max_len
        self.point_landmarks = point_landmarks

    def call(self, inputs):
        # if tf.rank(inputs) == 3:
        #     x = inputs[None,...]
        # else:
        #     x = inputs
        x = inputs
#         print(inputs)
#         print(inputs.shape)
        print(inputs)
        print("\n----\n",inputs.shape)
        x = filter_nans_tf(x)
        x = tf.cond(is_left_handed(x), lambda:flip_lr(x), lambda:x)
        x = x[None,...]

        if self.max_len is not None:
            x = x[:,:self.max_len]
        length = tf.shape(x)[1]

        mean = tf_nan_mean(tf.gather(x, self.point_landmarks, axis=2), axis=[1,2], keepdims=True)
        mean = tf.where(tf.math.is_nan(mean), tf.constant([0.5,0.5,0.],x.dtype), mean)
        x = tf.gather(x, self.point_landmarks, axis=2) #N,T,P,C
        std = tf_nan_std(x, center=mean, axis=[1,2], keepdims=True)

        x = (x - mean)/std

        x = tf.concat([
            tf.reshape(x, (-1,length,3*len(self.point_landmarks))),
            # tf.reshape(dx, (-1,length,3*len(self.point_landmarks))),
        ], axis = -1)

        x = tf.where(tf.math.is_nan(x),tf.constant(0.,x.dtype),x)
        print(x)
        return x

543
1629


In [10]:
class ECA(tf.keras.layers.Layer):
    def __init__(self, kernel_size=5, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.kernel_size = kernel_size
        self.conv = tf.keras.layers.Conv1D(1, kernel_size=kernel_size, strides=1, padding="same", use_bias=False)

    def call(self, inputs, mask=None):
        nn = tf.keras.layers.GlobalAveragePooling1D()(inputs, mask=mask)
        nn = tf.expand_dims(nn, -1)
        nn = self.conv(nn)
        nn = tf.squeeze(nn, -1)
        nn = tf.nn.sigmoid(nn)
        nn = nn[:,None,:]
        return inputs * nn

class LateDropout(tf.keras.layers.Layer):
    def __init__(self, rate, noise_shape=None, start_step=0, **kwargs):
        super().__init__(**kwargs)
        self.supports_masking = True
        self.rate = rate
        self.start_step = start_step
        self.dropout = tf.keras.layers.Dropout(rate, noise_shape=noise_shape)

    def build(self, input_shape):
        super().build(input_shape)
        agg = tf.VariableAggregation.ONLY_FIRST_REPLICA
        self._train_counter = tf.Variable(0, dtype="int64", aggregation=agg, trainable=False)

    def call(self, inputs, training=False):
        x = tf.cond(self._train_counter < self.start_step, lambda:inputs, lambda:self.dropout(inputs, training=training))
        if training:
            self._train_counter.assign_add(1)
        return x

class MaskingConv1D(tf.keras.layers.Layer):
    def __init__(self, filters, kernel_size, groups=1, strides=1,
        dilation_rate=1,
        padding='same',
        use_bias=False,
        kernel_initializer='glorot_uniform',**kwargs):
        super().__init__(**kwargs)
        assert padding == 'same'
        self.filters = filter_dataset_eager_fallback
        self.strides = strides
        self.groups = groups
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.use_bias = use_bias
        self.padding = padding
        self.conv = tf.keras.layers.Conv1D(
                            filters,
                            kernel_size,
                            strides=strides,
                            groups=groups,
                            dilation_rate=dilation_rate,
                            padding=padding,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer)
        self.supports_masking = True

    def compute_mask(self, inputs, mask=None):
      if mask is not None:
        if self.strides > 1:
          mask = mask[:,::self.strides]
      return mask

    def call(self, inputs, mask=None):
        x = inputs
        if mask is not None:
            x = tf.where(mask[...,None], x, tf.constant(0., dtype=x.dtype))
        x = self.conv(x)
        return x

class MaskingDWConv1D(tf.keras.layers.Layer):
    def __init__(self, kernel_size, strides=1,
        dilation_rate=1,
        padding='same',
        use_bias=False,
        kernel_initializer='glorot_uniform',**kwargs):
        super().__init__(**kwargs)
        assert padding == 'same'
        self.strides = strides
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.use_bias = use_bias
        self.padding = padding
        self.conv = tf.keras.layers.DepthwiseConv1D(
                            kernel_size,
                            strides=strides,
                            dilation_rate=dilation_rate,
                            padding=padding,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer)
        self.supports_masking = True

    def compute_mask(self, inputs, mask=None):
      if mask is not None:
        if self.strides > 1:
          mask = mask[:,::self.strides]
      return mask

    def call(self, inputs, mask=None):
        x = inputs
        if mask is not None:
            x = tf.where(mask[...,None], x, tf.constant(0., dtype=x.dtype))
        x = self.conv(x)
        return x

def Conv1DBlock(channel_size,
          kernel_size,
          dilation_rate=1,
          strides=1,
          drop_rate=0.0,
          expand_ratio=2,
          se_ratio=0.25,
          activation='swish',
          name=None):
    '''
    efficient conv1d block, @hoyso48
    '''
    if name is None:
        name = str(tf.keras.backend.get_uid("mbblock"))
    # Expansion phase
    def apply(inputs):
        channels_in = tf.keras.backend.int_shape(inputs)[-1]
        channels_expand = channels_in * expand_ratio

        skip = inputs
        
        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + 'pre_bn')(inputs)
        
        x = tf.keras.layers.Dense(
            channels_expand,
            use_bias=True,
            activation=activation,
            name=name + '_expand_conv')(x)

        # Depthwise Convolution
        x = MaskingDWConv1D(kernel_size,
            dilation_rate=dilation_rate,
            strides=strides,
            use_bias=False,
            name=name + '_dwconv')(x)
        # x = MaskingConv1D(channels_expand,kernel_size,
        #     dilation_rate=dilation_rate,
        #     groups=8,
        #     use_bias=False,
        #     name=name + '_dwconv')(x)

        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + 'conv_bn')(x)

        x  = ECA()(x)

        x = tf.keras.layers.Dense(
            channel_size,
            use_bias=True,
            name=name + '_project_conv')(x)

        if drop_rate > 0:
            x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1), name=name + '_drop')(x)

        if (channels_in == channel_size) and (strides == 1):
            x = tf.keras.layers.add([x, skip], name=name + '_add')
        return x

    return apply

def Conv2DBlock(channel_size,
          kernel_size,
          strides=1,
          dilation_rate=1,
          drop_rate=0.0,
          expand_ratio=2,
          se_ratio=0.25,
          activation='gelu',
          name=None):
    '''
    efficient conv1d block, @hoyso48
    '''
    if name is None:
        name = str(tf.keras.backend.get_uid("mbblock"))
    # Expansion phase
    def apply(inputs):
        channels_in = tf.keras.backend.int_shape(inputs)[-1]
        channels_expand = channels_in * expand_ratio

        skip = inputs

        x = tf.keras.layers.Dense(
            channels_expand,
            use_bias=True,
            activation=activation,
            name=name + '_expand_conv')(inputs)

        # Depthwise Convolution
        # x = MaskingDWConv2D(kernel_size,
        #     strides=strides,
        #     dilation_rate=dilation_rate,
        #     use_bias=False,
        #     name=name + '_dwconv')(x)
        x = MaskingConv2D(
            channels_expand,
            kernel_size,
            strides=strides,
            dilation_rate=dilation_rate,
            use_bias=False,
            groups=channels_expand,
            name=name+'_dwconv',
        )(x)

        # x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + '_bn', fused=False)(x)
        # x = MaskingBatchNorm2D()(x)
        # x = ReduceMask()(x)
        x = tf.keras.layers.LayerNormalization()(x)


        # x  = ECA()(x)

        x = tf.keras.layers.Dense(
            channel_size,
            use_bias=True,
            name=name + '_project_conv')(x)

        if drop_rate > 0:
            x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1,1), name=name + '_drop')(x)

        if (channels_in == channel_size) and (strides == 1):
            x = tf.keras.layers.add([x, skip], name=name + '_add')
        return x

    return apply

In [11]:
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, dim=256, num_heads=4, dropout=0, **kwargs):
        super().__init__(**kwargs)
        self.dim = dim
        self.scale = self.dim ** -0.5
        self.num_heads = num_heads
        # self.qkv = tf.keras.layers.Dense(3 * dim, use_bias=False)
        self.q = tf.keras.layers.Dense(dim, use_bias=False)
        self.k = tf.keras.layers.Dense(dim, use_bias=False)
        self.v = tf.keras.layers.Dense(dim, use_bias=False)
        self.drop1 = tf.keras.layers.Dropout(dropout)
        self.proj = tf.keras.layers.Dense(dim, use_bias=False)
        self.supports_masking = True

    def get_causal_mask(self, q, k):
        q_len = tf.shape(q)[1]
        k_len = tf.shape(k)[1]
        i = tf.range(q_len)[:, None]
        j = tf.range(k_len)
        mask = i >= j
        mask = tf.reshape(mask, (q_len, k_len))
        return mask

    def merge_input_state(self, input, state, layer):
        if input is not None and state is not None:
            return tf.keras.layers.Concatenate(axis=1)([state, layer(input)])
        elif input is not None and state is None:
            return layer(input)
        elif input is None and state is not None:
            return state
        else:
            raise ValueError
        # return out

    def call(self, q, k=None, v=None, key_state=None, value_state=None, return_states=False, use_causal_mask=False):
        q = self.q(q)
        k = self.merge_input_state(k, key_state, self.k)
        v = self.merge_input_state(v, value_state, self.v)
        mask = getattr(k, '_keras_mask', None)
        if mask is not None:
            mask = mask[:,None,None,:]
        if use_causal_mask:
            if mask is not None:
                mask = tf.logical_and(mask, self.get_causal_mask(q,k)[None,None,:,:])
            else:
                mask = self.get_causal_mask(q,k)[None,None,:,:]
        q_ = tf.keras.layers.Permute((2, 1, 3))(tf.keras.layers.Reshape((-1, self.num_heads, self.dim // self.num_heads))(q))
        k_ = tf.keras.layers.Permute((2, 1, 3))(tf.keras.layers.Reshape((-1, self.num_heads, self.dim // self.num_heads))(k))
        v_ = tf.keras.layers.Permute((2, 1, 3))(tf.keras.layers.Reshape((-1, self.num_heads, self.dim // self.num_heads))(v))
        attn = tf.matmul(q_, k_, transpose_b=True) * self.scale

        attn = tf.keras.layers.Softmax(axis=-1)(attn, mask=mask)
        attn = self.drop1(attn)

        x = attn @ v_
        x = tf.keras.layers.Reshape((-1, self.dim))(tf.keras.layers.Permute((2, 1, 3))(x))
        x = self.proj(x)
        if return_states:
            return x, k, v
        else:
            return x

class PosEmbedding(tf.keras.layers.Layer):
    def __init__(self, dim=64, max_len=64, **kwargs):
        super().__init__(**kwargs)
        self.pos_emb = tf.keras.layers.Embedding(input_dim=max_len, output_dim=dim)
        self.supports_masking = True

    def call(self, x, positions=None):
        if positions is None:
            maxlen = tf.shape(x)[1]
            positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        return x + positions
    
def TransformerDecoderBlock(dim=256, num_heads=4, expand=4, attn_dropout=0.2, drop_rate=0.2, activation='swish', name=''):
    def apply(q,k,v):
        x = q
        # key_mask=None
        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + '_bn1')(x)
        x = MultiHeadAttention(dim=dim,num_heads=num_heads,dropout=attn_dropout, name=name + '_self_attn')(x,x,x,use_causal_mask=True)
        # print(x.shape, q.shape)
        # x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add(name=name + '_add1')([q, x])
        attn_out1 = x

        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + '_bn2')(x)
        x = MultiHeadAttention(dim=dim,num_heads=num_heads,dropout=attn_dropout, name=name + '_cross_attn')(x,k,v)
        # x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add(name=name + '_add2')([attn_out1, x])
        attn_out2 = x

        x = tf.keras.layers.BatchNormalization(momentum=0.95, name=name + '_bn3')(x)
        x = tf.keras.layers.Dense(dim*expand, use_bias=False, activation=activation, name=name + '_fc1')(x)
        x = tf.keras.layers.Dense(dim, use_bias=False, name=name + '_fc2')(x)
        # x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add(name=name + '_add3')([attn_out2, x])
        return x
    return apply

In [12]:
class MultiHeadSelfAttention(tf.keras.layers.Layer):
    def __init__(self, dim=256, num_heads=4, dropout=0, **kwargs):
        super().__init__(**kwargs)
        self.dim = dim
        self.scale = self.dim ** -0.5
        self.num_heads = num_heads
        self.qkv = tf.keras.layers.Dense(3 * dim, use_bias=False)
        self.drop1 = tf.keras.layers.Dropout(dropout)
        self.proj = tf.keras.layers.Dense(dim, use_bias=False)
        self.supports_masking = True

    def call(self, inputs, mask=None):
        qkv = self.qkv(inputs)
        qkv = tf.keras.layers.Permute((2, 1, 3))(tf.keras.layers.Reshape((-1, self.num_heads, self.dim * 3 // self.num_heads))(qkv))
        q, k, v = tf.split(qkv, [self.dim // self.num_heads] * 3, axis=-1)

        attn = tf.matmul(q, k, transpose_b=True) * self.scale

        if mask is not None:
            mask = mask[:, None, None, :]
            # print('selfattn mask', mask.shape)

        attn = tf.keras.layers.Softmax(axis=-1)(attn, mask=mask)
        attn = self.drop1(attn)

        x = attn @ v
        x = tf.keras.layers.Reshape((-1, self.dim))(tf.keras.layers.Permute((2, 1, 3))(x))
        x = self.proj(x)
        return x

def TransformerBlock(dim=256, num_heads=4, expand=4, attn_dropout=0.2, drop_rate=0.2, activation='swish'):
    def apply(inputs):
        x = inputs
        x = tf.keras.layers.BatchNormalization(momentum=0.95)(x)
        x = MultiHeadSelfAttention(dim=dim,num_heads=num_heads,dropout=attn_dropout)(x)
        x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add()([inputs, x])
        attn_out = x

        x = tf.keras.layers.BatchNormalization(momentum=0.95)(x)
        x = tf.keras.layers.Dense(dim*expand, use_bias=False, activation=activation)(x)
        x = tf.keras.layers.Dense(dim, use_bias=False)(x)
        x = tf.keras.layers.Dropout(drop_rate, noise_shape=(None,1,1))(x)
        x = tf.keras.layers.Add()([attn_out, x])
        return x
    return apply

In [13]:
def get_model(max_len=MAX_LEN, target_len=64, dim=192, dtype='float32'):
    ################# ENCODER #################
    inp1 = tf.keras.Input((max_len,CHANNELS),dtype=dtype)
#     x = tf.keras.layers.Masking(mask_value=PAD,input_shape=(max_len,CHANNELS))(inp1)
    x = inp1
    ksize = 17
    drop_rate = 0.2
    x = tf.keras.layers.Dense(dim,use_bias=False,name='stem_conv')(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = TransformerBlock(dim,expand=2,num_heads=4,drop_rate=drop_rate,attn_dropout=0.2)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = TransformerBlock(dim,expand=2,num_heads=4,drop_rate=drop_rate,attn_dropout=0.2)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=0,strides=2)(x) #drop_rate=0 since we don't want to drop the whole output
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = TransformerBlock(dim,expand=2,num_heads=4,drop_rate=drop_rate,attn_dropout=0.2)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = Conv1DBlock(dim,ksize,expand_ratio=4,drop_rate=drop_rate)(x)
    x = TransformerBlock(dim,expand=2,num_heads=4,drop_rate=drop_rate,attn_dropout=0.2)(x)
    x = tf.keras.layers.BatchNormalization(momentum=0.95)(x)

    encoder = tf.keras.Model(inp1,x,name='encoder')

    ################# CTC DECDODER #################
    inp3 = tf.keras.Input((x.shape[1],dim),name='ctc_decoder_inp2',dtype=dtype)
    x = inp3
    x = tf.keras.layers.RNN(tf.keras.layers.GRUCell(dim), return_sequences=True)(x)
    x = tf.keras.layers.Dense(dim*2)(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(NUM_CLASSES,name='ctc_classifier')(x) #include sos, eos token
    ctc_decoder = tf.keras.Model(inp3,x,name='ctc_decoder')

    ################# ATT DECODER #################
    inp2 = tf.keras.Input((None,),name='att_decoder_inp1',dtype='int32')
    inp3 = tf.keras.Input((x.shape[1],dim),name='att_decoder_inp2',dtype=dtype)

    x = inp3
#     y = tf.keras.layers.Masking(mask_value=0,input_shape=(None,),name='att_decoder_input_masking')(inp2)
    y = inp2
    y = tf.keras.layers.Embedding(NUM_CLASSES,dim,name='att_decoder_token_emb')(y) #include sos token
    y = PosEmbedding(dim,max_len=target_len,name='att_decoder_pos_emb')(y)
    y = TransformerDecoderBlock(dim,expand=2,num_heads=4,attn_dropout=0.2,name='att_decoder_block1')(y,x,x)
    y = tf.keras.layers.Dropout(0.5)(y)
    y = tf.keras.layers.Dense(NUM_CLASSES,name='att_decoder_classifier')(y)

    decoder = tf.keras.Model([inp2,inp3],y,name='att_decoder')

    ################### MODEL #####################
    inp1 = tf.keras.Input((max_len,CHANNELS),dtype=dtype)
    inp2 = tf.keras.Input((None,),dtype='int32')

    x = inp1
    enc_out = encoder(x)
    y = inp2
    dec_out = decoder([y, enc_out])
    ctc_out = ctc_decoder(enc_out)
    model = tf.keras.Model([inp1,inp2], [dec_out,ctc_out])

    return model

model = get_model()
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 768, 1629)]          0         []                            
                                                                                                  
 input_3 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 encoder (Functional)        (None, 384, 192)             5565377   ['input_2[0][0]']             
                                                                                                  
 att_decoder (Functional)    (None, None, 62)             480830    ['input_3[0][0]',             
                                                                     'encoder[0][0]']         

In [14]:
model.load_weights('model/aslfr-fp16-192d-17l-ctcattjoint-seed42-foldall-last.h5')

In [15]:
class CTCGreedyDecoder(tf.keras.layers.Layer):
    def __init__(self, model, pad_token_idx=0, **kwargs):
        super().__init__(**kwargs)
        self.encoder = model.get_layer('encoder')
        self.ctc_decoder = model.get_layer('ctc_decoder')
        self.pad_token_idx = pad_token_idx
        
    def decode_phrase(self, pred):
        x = tf.argmax(pred, axis=1, output_type=tf.int32)
        diff = tf.not_equal(x[:-1], x[1:])
        adjacent_indices = tf.where(diff)[:, 0]
        x = tf.gather(x, adjacent_indices)
        mask = x != self.pad_token_idx
        x = tf.boolean_mask(x, mask, axis=0)
        return x
    
    def call(self, batch_x):
        encoder_out = self.encoder(batch_x)
        ctc_probs = self.ctc_decoder(encoder_out)
        return tf.identity([self.decode_phrase(ctc_probs[0])])

In [16]:
class ATTGreedyDecoder(tf.keras.layers.Layer):
    def __init__(self, model, max_output_length=64, input_strides=2, sos_token_idx=60, eos_token_idx=61, pad_token_idx=0, **kwargs):
        super().__init__(**kwargs)
        self.model = model
        self.encoder = self.model.get_layer('encoder')
        self.decoder = self.model.get_layer('att_decoder')
        self.max_output_length = max_output_length
        self.sos_token_idx = sos_token_idx
        self.eos_token_idx = eos_token_idx
        self.pad_token_idx = pad_token_idx
        self.input_strides = input_strides

    def att_inference_module(self, query, query_position, key_state, value_state, encoder_key_state, encoder_value_state):
        x = self.decoder.get_layer('att_decoder_inp1')(query)
        x = self.decoder.get_layer('att_decoder_token_emb')(x)
        x = self.decoder.get_layer('att_decoder_pos_emb')(x, positions=query_position)

        q = x
        x = self.decoder.get_layer('att_decoder_block1_bn1')(x)
        x, k, v = self.decoder.get_layer('att_decoder_block1_self_attn')(x, x, x, key_state=key_state, value_state=value_state, return_states=True)
        x = self.decoder.get_layer('att_decoder_block1_add1')([q,x])
        attn_out1 = x

        x = self.decoder.get_layer('att_decoder_block1_bn2')(x)
        x = self.decoder.get_layer('att_decoder_block1_cross_attn')(x, None, None, key_state=encoder_key_state, value_state=encoder_value_state)
        x = self.decoder.get_layer('att_decoder_block1_add2')([attn_out1,x])
        attn_out2 = x

        x = self.decoder.get_layer('att_decoder_block1_bn3')(x)
        x = self.decoder.get_layer('att_decoder_block1_fc1')(x)
        x = self.decoder.get_layer('att_decoder_block1_fc2')(x)
        x = self.decoder.get_layer('att_decoder_block1_add3')([attn_out2,x])
        out = self.decoder.get_layer('att_decoder_classifier')(x)
        return out, k, v

    def compute_input_length(self, batch_x):
        input_length = tf.cast(tf.shape(batch_x)[1], tf.float32)
        input_length = tf.math.ceil(input_length/self.input_strides)
        return tf.cast(input_length, tf.int32)
    
    def call(self, batch_x):
        encoder_out = self.encoder(batch_x)
        input_length = self.compute_input_length(batch_x)
        encoder_key_state = self.decoder.get_layer('att_decoder_block1_cross_attn').k(encoder_out)
        encoder_value_state = self.decoder.get_layer('att_decoder_block1_cross_attn').v(encoder_out)

        time = tf.constant(0, dtype=tf.int32)
        predictions = tf.ones((tf.shape(batch_x)[0],1), dtype=tf.int32) * self.sos_token_idx
        pad = tf.ones((tf.shape(batch_x)[0],), dtype=tf.int32) * self.pad_token_idx
        init = True
        key_state = tf.zeros((0,0,192))
        value_state = tf.zeros((0,0,192))

        def condition(_time, predictions, key_state, value_state, init):
            return tf.logical_and(tf.logical_and(_time < self.max_output_length, tf.logical_not(tf.reduce_all(tf.reduce_any(predictions==self.eos_token_idx, axis=1)))), tf.reduce_any(_time < input_length))

        def body(_time, predictions, key_state, value_state, init):
            if init:
                out, key_state, value_state = self.att_inference_module(predictions[:,-1:], _time, None, None, encoder_key_state, encoder_value_state)
                init = False
            else:
                out, key_state, value_state = self.att_inference_module(predictions[:,-1:],  _time, key_state, value_state, encoder_key_state, encoder_value_state)
            pred_curr = tf.where(tf.logical_or(tf.reduce_any(predictions==self.eos_token_idx, axis=1), _time >= input_length), pad, tf.argmax(out[:,-1], axis=-1, output_type=tf.int32))
            predictions = tf.concat([predictions, pred_curr[...,None]], axis=1)
            return _time+1, predictions, key_state, value_state, init

        _, predictions, _, _, _ = tf.while_loop(condition, body, 
                                                shape_invariants=[tf.TensorShape([]),
                                                                  tf.TensorShape([None,None]),
                                                                  tf.TensorShape([None,None,192]),
                                                                  tf.TensorShape([None,None,192]),
                                                                  tf.TensorShape([])], 
                                                loop_vars=[time, predictions, key_state, value_state, init])
        print("PREDICTIONS[ATT_Decoder]:\n",predictions,"\n---------------------\n")
        return predictions

In [17]:
def get_ctc_initial_states(log_probs, blank_idx=0):

    blank_probs = log_probs[...,blank_idx]
    states_n = tf.ones_like(blank_probs, dtype=tf.float32) * tf.float32.min
    states_b = tf.math.cumsum(blank_probs)
    
    return states_n, states_b
    
def compute_ctc_prefix_scores(beams, log_probs, states_n, states_b, eos_idx=61, blank_idx=0):
    # beams: (N=hypothesis_length)
    # probs: (L=(padded/strided)input_length,M=num_models,V=vocab_size)
    # states_n: (L,M)
    # states_b: (L,M)
    
    N = tf.shape(beams)[0]
    L = tf.shape(states_n)[0]
    V = tf.shape(log_probs)[-1]
    M = tf.shape(states_n)[1]
    new_states_n = tf.ones((L,M,V), dtype=tf.float32) * tf.float32.min
    new_states_b = tf.ones((L,M,V), dtype=tf.float32) * tf.float32.min
    new_states_n = tf.cond(N==1, lambda:log_probs, lambda:new_states_n)
    
    r_sum = tf.math.reduce_logsumexp([states_n, states_b], axis=0) #(B,N)
    last = beams[-1] #(1,)

    repeated_idx = last 
     
    log_phi_ = tf.repeat(r_sum[None,...], repeats=V, axis=0) #(V,L,M)
    log_phi = tf.tensor_scatter_nd_update(log_phi_, [[repeated_idx]], [states_b])
    log_phi = tf.transpose(log_phi, (1,2,0)) #(L,M,V)
    
    log_phi = tf.cond(N==1, lambda:tf.transpose(log_phi_, (1,2,0)), lambda:log_phi)
    
    def step_function(prev, inputs):
        prev_r_n, prev_r_b = prev
        current_log_phi, current_prob = inputs
        updated_r_n = tf.math.reduce_logsumexp([prev_r_n, current_log_phi], axis=0) + current_prob
        updated_r_b = tf.math.reduce_logsumexp([prev_r_b, prev_r_n], axis=0) + current_prob[...,blank_idx][...,None]
        return updated_r_n, updated_r_b
    
    start = 1
    log_psi = new_states_n[start-1]

    sequence_log_phi = log_phi[start-1:L-1]
    sequence_probs = log_probs[start-1+N:L-1+N]
    sequences = (sequence_log_phi, sequence_probs) #((L-start,M,V), (L-start,M,V))

    initial_state = (new_states_n[start-1], new_states_b[start-1]) #((M,V),(M,V),(M,V))
    
    log_psi = tf.math.reduce_logsumexp([tf.math.reduce_logsumexp(sequence_log_phi + sequence_probs, axis=0), log_psi], axis=0)

    new_states_n, new_states_b = tf.scan(step_function, sequences, initial_state)

    log_psi_eos = r_sum[-1]
    model_idx = tf.range(M)
    eos_idxs = tf.stack([model_idx, tf.fill((M,), eos_idx)], axis=-1)
    blank_idxs = tf.stack([model_idx, tf.fill((M,), blank_idx)], axis=-1)
    log_psi = tf.tensor_scatter_nd_update(log_psi, eos_idxs, log_psi_eos)
    log_psi = tf.tensor_scatter_nd_update(log_psi, blank_idxs, tf.fill((M,), tf.float32.min))
    
    print("\n Log_psi:",log_psi,"\nnew_States_n:",new_states_n,"\nnew_states_b:",new_states_b)
    return log_psi, new_states_n, new_states_b #(M,V), (L,M,V), (L,M,V)


class EnsembleCTCAttentionJointGreedyDecoder(tf.keras.layers.Layer):
    def __init__(self, model_list, ctc_weight=0.2, input_strides=2, max_output_length=64, blank_idx=0, pad_frame_idx=-100, sos_token_idx=60, eos_token_idx=61, pad_token_idx=0, from_logits=True, **kwargs):
        super().__init__(**kwargs)
        self.encoder_list = [m.get_layer('encoder') for m in model_list]
        self.decoder_list = [m.get_layer('att_decoder') for m in model_list]
        self.ctc_decoder_list = [m.get_layer('ctc_decoder') for m in model_list]
        self.ctc_weight = ctc_weight
        self.input_strides = input_strides
        self.max_output_length = max_output_length
        self.blank_idx = blank_idx
        self.pad_frame_idx = pad_frame_idx
        self.sos_token_idx = sos_token_idx
        self.eos_token_idx = eos_token_idx
        self.pad_token_idx = pad_token_idx
        self.from_logits = from_logits
        
    def compute_input_length(self, batch_x):
        input_length = tf.cast(tf.shape(batch_x)[1], tf.float32)#tf.reduce_sum(tf.cast(mask, tf.float32), axis=-1)
        input_length = tf.math.ceil(input_length/self.input_strides)
        return tf.cast(input_length, tf.int32)

    def att_inference_module(self, query, query_position, key_state_list, value_state_list, encoder_key_state_list, encoder_value_state_list):
        outputs = []
        key_states = []
        value_states = []
        for i in range(len(self.decoder_list)):
            decoder = self.decoder_list[i]
            key_state = key_state_list[i] if key_state_list is not None else None
            value_state = value_state_list[i] if value_state_list is not None else None
            encoder_key_state = encoder_key_state_list[i] if encoder_key_state_list is not None else None
            encoder_value_state = encoder_value_state_list[i] if encoder_value_state_list is not None else None
            x = decoder.get_layer('att_decoder_inp1')(query)
            x = decoder.get_layer('att_decoder_token_emb')(x)
            x = decoder.get_layer('att_decoder_pos_emb')(x, positions=query_position)

            q = x
            x = decoder.get_layer('att_decoder_block1_bn1')(x)
            x, k, v = decoder.get_layer('att_decoder_block1_self_attn')(x, x, x, key_state=key_state, value_state=value_state, return_states=True)
            x = decoder.get_layer('att_decoder_block1_add1')([q,x])
            attn_out1 = x

            x = decoder.get_layer('att_decoder_block1_bn2')(x)
            x = decoder.get_layer('att_decoder_block1_cross_attn')(x, None, None, key_state=encoder_key_state, value_state=encoder_value_state)
            x = decoder.get_layer('att_decoder_block1_add2')([attn_out1,x])
            attn_out2 = x

            x = decoder.get_layer('att_decoder_block1_bn3')(x)
            x = decoder.get_layer('att_decoder_block1_fc1')(x)
            x = decoder.get_layer('att_decoder_block1_fc2')(x)
            x = decoder.get_layer('att_decoder_block1_add3')([attn_out2,x])
            out = decoder.get_layer('att_decoder_classifier')(x)
            outputs.append(out)
            key_states.append(k)
            value_states.append(v)
        return tf.identity(outputs), tf.identity(key_states), tf.identity(value_states)

    def get_initial_states(self, batch_x):
        encoder_outputs = [enc(batch_x) for enc in self.encoder_list]
        encoder_key_states = [dec.get_layer('att_decoder_block1_cross_attn').k(x) for dec, x in zip(self.decoder_list, encoder_outputs)]
        encoder_value_states = [dec.get_layer('att_decoder_block1_cross_attn').v(x) for dec, x in zip(self.decoder_list, encoder_outputs)]
        key_states = [tf.zeros((0,0,192)) for _ in self.encoder_list]
        value_states = [tf.zeros((0,0,192)) for _ in self.encoder_list]
        ctc_probs = [dec(x)[0] for dec,x in zip(self.ctc_decoder_list, encoder_outputs)]
        
        encoder_key_states = tf.stack(encoder_key_states)
        encoder_value_states = tf.stack(encoder_value_states)
        key_states = tf.stack(key_states)
        value_states = tf.stack(value_states)
        encoder_outputs = tf.stack(encoder_outputs)
        
        if self.from_logits:
            ctc_probs = [tf.nn.softmax(x, axis=-1) for x in ctc_probs]
        ctc_probs = tf.stack([tf.math.log(x) for x in ctc_probs], axis=1)
        ctc_states_n, ctc_states_b = get_ctc_initial_states(ctc_probs, self.blank_idx)
        return encoder_key_states, encoder_value_states, key_states, value_states, ctc_probs, ctc_states_n, ctc_states_b

    def call(self, batch_x):

        encoder_key_state, encoder_value_state, key_state, value_state, ctc_log_probs, ctc_states_n, ctc_states_b = self.get_initial_states(batch_x)
        input_length = self.compute_input_length(batch_x)

        time = tf.constant(0, dtype=tf.int32)
        predictions = tf.ones((tf.shape(batch_x)[0],1), dtype=tf.int32) * self.sos_token_idx#tf.TensorArray(dtype=tf.int32,size=self.max_output_length)
        pad = tf.ones((tf.shape(batch_x)[0],), dtype=tf.int32) * self.pad_token_idx
        init = True

        def condition(_time, predictions, ctc_states_n, ctc_states_b, key_state, value_state, init):
            return tf.logical_and(_time < tf.minimum(self.max_output_length, input_length), tf.logical_not(tf.reduce_all(tf.reduce_any(predictions==self.eos_token_idx, axis=1))))

        def body(_time, predictions, ctc_states_n, ctc_states_b, key_state, value_state, init):
            if init:
                out, key_state, value_state = self.att_inference_module(predictions[:,-1:], _time, None, None, encoder_key_state, encoder_value_state)
                init = False
            else:
                out, key_state, value_state = self.att_inference_module(predictions[:,-1:],  _time, key_state, value_state, encoder_key_state, encoder_value_state)

            log_ctc, new_ctc_states_n, new_ctc_states_b = compute_ctc_prefix_scores(predictions[0], 
                                                                                   ctc_log_probs, 
                                                                                   ctc_states_n, 
                                                                                   ctc_states_b, 
                                                                                   self.eos_token_idx, 
                                                                                   self.blank_idx)
            log_ctc = tf.reduce_mean(log_ctc, axis=0) #log-prob ensemble
            out = out[:,0,0] #(M,V)
            if self.from_logits:
                out = tf.nn.softmax(out, axis=-1)
            out = tf.math.log(out)
            log_att = tf.reduce_mean(out, axis=0) #log-prob ensemble
            
            probs_final = (1-self.ctc_weight) * log_att + self.ctc_weight * log_ctc #tf.expand_dims(log_psi, axis=0)
            next_token = tf.argmax(probs_final, axis=-1, output_type=tf.int32)#[0]
            
            ctc_states_n = new_ctc_states_n[...,next_token]
            ctc_states_b = new_ctc_states_b[...,next_token]
            
            predictions = tf.concat([predictions, [next_token[...,None]]], axis=1)
            return _time+1, predictions, ctc_states_n, ctc_states_b, key_state, value_state, init

        _, predictions, _, _, _, _, _ = tf.while_loop(condition, body, 
                                                shape_invariants=[tf.TensorShape([]),
                                                                  tf.TensorShape([1,None]),
                                                                  tf.TensorShape([None,len(self.encoder_list)]),
                                                                  tf.TensorShape([None,len(self.encoder_list)]),
                                                                  tf.TensorShape([len(self.encoder_list),None,None,None]),
                                                                  tf.TensorShape([len(self.encoder_list),None,None,None]),
                                                                  tf.TensorShape([])], 
                                                loop_vars=[time, predictions, ctc_states_n, ctc_states_b, key_state, value_state, init])
        print("PREDICTIONS[CTC_Decoder]:\n",predictions,"\n---------------------\n")
        return predictions

In [20]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 768, 1629)]          0         []                            
                                                                                                  
 input_3 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 encoder (Functional)        (None, 384, 192)             5565377   ['input_2[0][0]']             
                                                                                                  
 att_decoder (Functional)    (None, None, 62)             480830    ['input_3[0][0]',             
                                                                     'encoder[0][0]']         

NameError: name 'model_list' is not defined

In [127]:
interpreter = tf.lite.Interpreter("model.tflite")

REQUIRED_SIGNATURE = "serving_default"
REQUIRED_OUTPUT = "outputs"

prediction_fn = interpreter.get_signature_runner(REQUIRED_SIGNATURE)

In [128]:
#INTEGRATION WITH CAM


In [129]:
import cv2
import numpy as np 
import mediapipe as mp
import tensorflow as tf

In [130]:
# OPENCV
cap = cv2.VideoCapture(0)
capture = cv2.VideoCapture(0)

In [131]:
# Initialize MediaPipe Holistic model
mp_holistic = mp.solutions.holistic
holistic_model = mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

frame_counter = 0

all_landmarks_list = []

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils

In [132]:
while capture.isOpened() and frame_counter < 100:
    ret, frame = capture.read()

    if not ret:
        break

    frame = cv2.resize(frame, (800, 600))
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Use holistic model to detect landmarks
    image.flags.writeable = False
    results = holistic_model.process(image)
    image.flags.writeable = True

    # Convert back to BGR for rendering
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Draw landmarks
    mp_drawing.draw_landmarks(
        image,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0,255,255), thickness=1, circle_radius=1),
        connection_drawing_spec=mp_drawing.DrawingSpec(color=(255,0,255), thickness=1, circle_radius=1)
    )

    mp_drawing.draw_landmarks(
        image, 
        results.right_hand_landmarks, 
        mp_holistic.HAND_CONNECTIONS
    )

    mp_drawing.draw_landmarks(
        image, 
        results.left_hand_landmarks, 
        mp_holistic.HAND_CONNECTIONS
    )

    # Display the resulting image with landmarks
    cv2.imshow('Holistic Model Landmarks', image)

    all_landmarks = []

    # Extract pose landmarks
    if results.pose_landmarks:
        pose_landmarks = [[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]
        all_landmarks.extend(pose_landmarks)

    # Extract face landmarks
    if results.face_landmarks:
        face_landmarks = [[lm.x, lm.y, lm.z] for lm in results.face_landmarks.landmark]
        all_landmarks.extend(face_landmarks)

    # Extract left hand landmarks
    if results.left_hand_landmarks:
        left_hand_landmarks = [[lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark]
        all_landmarks.extend(left_hand_landmarks)

    # Extract right hand landmarks
    if results.right_hand_landmarks:
        right_hand_landmarks = [[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark]
        all_landmarks.extend(right_hand_landmarks)

    # Append the landmarks of this frame to the list
    all_landmarks_list.append(all_landmarks)

    frame_counter += 1

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

capture.release()
cv2.destroyAllWindows()

In [133]:
# Define the expected order of landmarks
expected_landmark_order = []

# Add face landmarks (assuming 468 landmarks)
for i in range(468):
    expected_landmark_order.append(i)

# Add right hand landmarks (assuming 21 landmarks)
for i in range(468, 468 + 21):
    expected_landmark_order.append(i)

# Add left hand landmarks (assuming 21 landmarks)
for i in range(468 + 21, 468 + 21 + 21):
    expected_landmark_order.append(i)

# Add pose landmarks (assuming 33 landmarks)
for i in range(468 + 21 + 21, 468 + 21 + 21 + 33):
    expected_landmark_order.append(i)

# Find the maximum number of landmarks
max_landmarks = max(len(landmarks) for landmarks in all_landmarks_list)
# Ensure that the shape is (100, 543, 3) by padding with NaN values
padded_landmarks = []
for landmarks in all_landmarks_list:
    padded_landmarks.append(landmarks + [[np.nan, np.nan, np.nan]] * (543 - len(landmarks)))

# Convert the list of landmarks to a TensorFlow tensor
all_landmarks_tensor = tf.convert_to_tensor(padded_landmarks, dtype=tf.float32)

print("Shape of all landmarks tensor before reshaping:", all_landmarks_tensor.shape)

# Reshape the tensor to have shape (100, 1629)
all_landmarks_tensor_reshaped = tf.reshape(all_landmarks_tensor, (100, -1))

print("Shape of all landmarks tensor after reshaping:", all_landmarks_tensor_reshaped.shape)

Shape of all landmarks tensor before reshaping: (100, 543, 3)
Shape of all landmarks tensor after reshaping: (100, 1629)


In [134]:
output = prediction_fn(inputs=all_landmarks_tensor_reshaped)
prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
prediction_str

'166 south road'

In [94]:
# rows = []

# for i in range(all_landmarks_tensor_reshaped.shape[0]):
#     # Extract the i-th row
#     row = all_landmarks_tensor_reshaped[i:i+1, :]
#     # Append the row to the list
#     print(row.shape)
#     predictions = model.predict(row)
#     rows.append(row)
# print(rows.shape)

In [95]:
# output_tensor = tf.expand_dims(all_landmarks_tensor_reshaped, axis=0)
# output_tensor

# predictions = model.predict(output_tensor)


In [96]:
print(model.input)

[<KerasTensor: shape=(None, 768, 1629) dtype=float32 (created by layer 'input_8')>, <KerasTensor: shape=(None, None) dtype=int32 (created by layer 'input_9')>]


In [66]:
SEL_COLS=['x_face_0', 'x_face_1', 'x_face_2', 'x_face_3', 'x_face_4', 'x_face_5', 'x_face_6', 'x_face_7', 'x_face_8', 'x_face_9', 'x_face_10', 'x_face_11', 'x_face_12', 'x_face_13', 'x_face_14', 'x_face_15', 'x_face_16', 'x_face_17', 'x_face_18', 'x_face_19', 'x_face_20', 'x_face_21', 'x_face_22', 'x_face_23', 'x_face_24', 'x_face_25', 'x_face_26', 'x_face_27', 'x_face_28', 'x_face_29', 'x_face_30', 'x_face_31', 'x_face_32', 'x_face_33', 'x_face_34', 'x_face_35', 'x_face_36', 'x_face_37', 'x_face_38', 'x_face_39', 'x_face_40', 'x_face_41', 'x_face_42', 'x_face_43', 'x_face_44', 'x_face_45', 'x_face_46', 'x_face_47', 'x_face_48', 'x_face_49', 'x_face_50', 'x_face_51', 'x_face_52', 'x_face_53', 'x_face_54', 'x_face_55', 'x_face_56', 'x_face_57', 'x_face_58', 'x_face_59', 'x_face_60', 'x_face_61', 'x_face_62', 'x_face_63', 'x_face_64', 'x_face_65', 'x_face_66', 'x_face_67', 'x_face_68', 'x_face_69', 'x_face_70', 'x_face_71', 'x_face_72', 'x_face_73', 'x_face_74', 'x_face_75', 'x_face_76', 'x_face_77', 'x_face_78', 'x_face_79', 'x_face_80', 'x_face_81', 'x_face_82', 'x_face_83', 'x_face_84', 'x_face_85', 'x_face_86', 'x_face_87', 'x_face_88', 'x_face_89', 'x_face_90', 'x_face_91', 'x_face_92', 'x_face_93', 'x_face_94', 'x_face_95', 'x_face_96', 'x_face_97', 'x_face_98', 'x_face_99', 'x_face_100', 'x_face_101', 'x_face_102', 'x_face_103', 'x_face_104', 'x_face_105', 'x_face_106', 'x_face_107', 'x_face_108', 'x_face_109', 'x_face_110', 'x_face_111', 'x_face_112', 'x_face_113', 'x_face_114', 'x_face_115', 'x_face_116', 'x_face_117', 'x_face_118', 'x_face_119', 'x_face_120', 'x_face_121', 'x_face_122', 'x_face_123', 'x_face_124', 'x_face_125', 'x_face_126', 'x_face_127', 'x_face_128', 'x_face_129', 'x_face_130', 'x_face_131', 'x_face_132', 'x_face_133', 'x_face_134', 'x_face_135', 'x_face_136', 'x_face_137', 'x_face_138', 'x_face_139', 'x_face_140', 'x_face_141', 'x_face_142', 'x_face_143', 'x_face_144', 'x_face_145', 'x_face_146', 'x_face_147', 'x_face_148', 'x_face_149', 'x_face_150', 'x_face_151', 'x_face_152', 'x_face_153', 'x_face_154', 'x_face_155', 'x_face_156', 'x_face_157', 'x_face_158', 'x_face_159', 'x_face_160', 'x_face_161', 'x_face_162', 'x_face_163', 'x_face_164', 'x_face_165', 'x_face_166', 'x_face_167', 'x_face_168', 'x_face_169', 'x_face_170', 'x_face_171', 'x_face_172', 'x_face_173', 'x_face_174', 'x_face_175', 'x_face_176', 'x_face_177', 'x_face_178', 'x_face_179', 'x_face_180', 'x_face_181', 'x_face_182', 'x_face_183', 'x_face_184', 'x_face_185', 'x_face_186', 'x_face_187', 'x_face_188', 'x_face_189', 'x_face_190', 'x_face_191', 'x_face_192', 'x_face_193', 'x_face_194', 'x_face_195', 'x_face_196', 'x_face_197', 'x_face_198', 'x_face_199', 'x_face_200', 'x_face_201', 'x_face_202', 'x_face_203', 'x_face_204', 'x_face_205', 'x_face_206', 'x_face_207', 'x_face_208', 'x_face_209', 'x_face_210', 'x_face_211', 'x_face_212', 'x_face_213', 'x_face_214', 'x_face_215', 'x_face_216', 'x_face_217', 'x_face_218', 'x_face_219', 'x_face_220', 'x_face_221', 'x_face_222', 'x_face_223', 'x_face_224', 'x_face_225', 'x_face_226', 'x_face_227', 'x_face_228', 'x_face_229', 'x_face_230', 'x_face_231', 'x_face_232', 'x_face_233', 'x_face_234', 'x_face_235', 'x_face_236', 'x_face_237', 'x_face_238', 'x_face_239', 'x_face_240', 'x_face_241', 'x_face_242', 'x_face_243', 'x_face_244', 'x_face_245', 'x_face_246', 'x_face_247', 'x_face_248', 'x_face_249', 'x_face_250', 'x_face_251', 'x_face_252', 'x_face_253', 'x_face_254', 'x_face_255', 'x_face_256', 'x_face_257', 'x_face_258', 'x_face_259', 'x_face_260', 'x_face_261', 'x_face_262', 'x_face_263', 'x_face_264', 'x_face_265', 'x_face_266', 'x_face_267', 'x_face_268', 'x_face_269', 'x_face_270', 'x_face_271', 'x_face_272', 'x_face_273', 'x_face_274', 'x_face_275', 'x_face_276', 'x_face_277', 'x_face_278', 'x_face_279', 'x_face_280', 'x_face_281', 'x_face_282', 'x_face_283', 'x_face_284', 'x_face_285', 'x_face_286', 'x_face_287', 'x_face_288', 'x_face_289', 'x_face_290', 'x_face_291', 'x_face_292', 'x_face_293', 'x_face_294', 'x_face_295', 'x_face_296', 'x_face_297', 'x_face_298', 'x_face_299', 'x_face_300', 'x_face_301', 'x_face_302', 'x_face_303', 'x_face_304', 'x_face_305', 'x_face_306', 'x_face_307', 'x_face_308', 'x_face_309', 'x_face_310', 'x_face_311', 'x_face_312', 'x_face_313', 'x_face_314', 'x_face_315', 'x_face_316', 'x_face_317', 'x_face_318', 'x_face_319', 'x_face_320', 'x_face_321', 'x_face_322', 'x_face_323', 'x_face_324', 'x_face_325', 'x_face_326', 'x_face_327', 'x_face_328', 'x_face_329', 'x_face_330', 'x_face_331', 'x_face_332', 'x_face_333', 'x_face_334', 'x_face_335', 'x_face_336', 'x_face_337', 'x_face_338', 'x_face_339', 'x_face_340', 'x_face_341', 'x_face_342', 'x_face_343', 'x_face_344', 'x_face_345', 'x_face_346', 'x_face_347', 'x_face_348', 'x_face_349', 'x_face_350', 'x_face_351', 'x_face_352', 'x_face_353', 'x_face_354', 'x_face_355', 'x_face_356', 'x_face_357', 'x_face_358', 'x_face_359', 'x_face_360', 'x_face_361', 'x_face_362', 'x_face_363', 'x_face_364', 'x_face_365', 'x_face_366', 'x_face_367', 'x_face_368', 'x_face_369', 'x_face_370', 'x_face_371', 'x_face_372', 'x_face_373', 'x_face_374', 'x_face_375', 'x_face_376', 'x_face_377', 'x_face_378', 'x_face_379', 'x_face_380', 'x_face_381', 'x_face_382', 'x_face_383', 'x_face_384', 'x_face_385', 'x_face_386', 'x_face_387', 'x_face_388', 'x_face_389', 'x_face_390', 'x_face_391', 'x_face_392', 'x_face_393', 'x_face_394', 'x_face_395', 'x_face_396', 'x_face_397', 'x_face_398', 'x_face_399', 'x_face_400', 'x_face_401', 'x_face_402', 'x_face_403', 'x_face_404', 'x_face_405', 'x_face_406', 'x_face_407', 'x_face_408', 'x_face_409', 'x_face_410', 'x_face_411', 'x_face_412', 'x_face_413', 'x_face_414', 'x_face_415', 'x_face_416', 'x_face_417', 'x_face_418', 'x_face_419', 'x_face_420', 'x_face_421', 'x_face_422', 'x_face_423', 'x_face_424', 'x_face_425', 'x_face_426', 'x_face_427', 'x_face_428', 'x_face_429', 'x_face_430', 'x_face_431', 'x_face_432', 'x_face_433', 'x_face_434', 'x_face_435', 'x_face_436', 'x_face_437', 'x_face_438', 'x_face_439', 'x_face_440', 'x_face_441', 'x_face_442', 'x_face_443', 'x_face_444', 'x_face_445', 'x_face_446', 'x_face_447', 'x_face_448', 'x_face_449', 'x_face_450', 'x_face_451', 'x_face_452', 'x_face_453', 'x_face_454', 'x_face_455', 'x_face_456', 'x_face_457', 'x_face_458', 'x_face_459', 'x_face_460', 'x_face_461', 'x_face_462', 'x_face_463', 'x_face_464', 'x_face_465', 'x_face_466', 'x_face_467', 'x_left_hand_0', 'x_left_hand_1', 'x_left_hand_2', 'x_left_hand_3', 'x_left_hand_4', 'x_left_hand_5', 'x_left_hand_6', 'x_left_hand_7', 'x_left_hand_8', 'x_left_hand_9', 'x_left_hand_10', 'x_left_hand_11', 'x_left_hand_12', 'x_left_hand_13', 'x_left_hand_14', 'x_left_hand_15', 'x_left_hand_16', 'x_left_hand_17', 'x_left_hand_18', 'x_left_hand_19', 'x_left_hand_20', 'x_pose_0', 'x_pose_1', 'x_pose_2', 'x_pose_3', 'x_pose_4', 'x_pose_5', 'x_pose_6', 'x_pose_7', 'x_pose_8', 'x_pose_9', 'x_pose_10', 'x_pose_11', 'x_pose_12', 'x_pose_13', 'x_pose_14', 'x_pose_15', 'x_pose_16', 'x_pose_17', 'x_pose_18', 'x_pose_19', 'x_pose_20', 'x_pose_21', 'x_pose_22', 'x_pose_23', 'x_pose_24', 'x_pose_25', 'x_pose_26', 'x_pose_27', 'x_pose_28', 'x_pose_29', 'x_pose_30', 'x_pose_31', 'x_pose_32', 'x_right_hand_0', 'x_right_hand_1', 'x_right_hand_2', 'x_right_hand_3', 'x_right_hand_4', 'x_right_hand_5', 'x_right_hand_6', 'x_right_hand_7', 'x_right_hand_8', 'x_right_hand_9', 'x_right_hand_10', 'x_right_hand_11', 'x_right_hand_12', 'x_right_hand_13', 'x_right_hand_14', 'x_right_hand_15', 'x_right_hand_16', 'x_right_hand_17', 'x_right_hand_18', 'x_right_hand_19', 'x_right_hand_20', 'y_face_0', 'y_face_1', 'y_face_2', 'y_face_3', 'y_face_4', 'y_face_5', 'y_face_6', 'y_face_7', 'y_face_8', 'y_face_9', 'y_face_10', 'y_face_11', 'y_face_12', 'y_face_13', 'y_face_14', 'y_face_15', 'y_face_16', 'y_face_17', 'y_face_18', 'y_face_19', 'y_face_20', 'y_face_21', 'y_face_22', 'y_face_23', 'y_face_24', 'y_face_25', 'y_face_26', 'y_face_27', 'y_face_28', 'y_face_29', 'y_face_30', 'y_face_31', 'y_face_32', 'y_face_33', 'y_face_34', 'y_face_35', 'y_face_36', 'y_face_37', 'y_face_38', 'y_face_39', 'y_face_40', 'y_face_41', 'y_face_42', 'y_face_43', 'y_face_44', 'y_face_45', 'y_face_46', 'y_face_47', 'y_face_48', 'y_face_49', 'y_face_50', 'y_face_51', 'y_face_52', 'y_face_53', 'y_face_54', 'y_face_55', 'y_face_56', 'y_face_57', 'y_face_58', 'y_face_59', 'y_face_60', 'y_face_61', 'y_face_62', 'y_face_63', 'y_face_64', 'y_face_65', 'y_face_66', 'y_face_67', 'y_face_68', 'y_face_69', 'y_face_70', 'y_face_71', 'y_face_72', 'y_face_73', 'y_face_74', 'y_face_75', 'y_face_76', 'y_face_77', 'y_face_78', 'y_face_79', 'y_face_80', 'y_face_81', 'y_face_82', 'y_face_83', 'y_face_84', 'y_face_85', 'y_face_86', 'y_face_87', 'y_face_88', 'y_face_89', 'y_face_90', 'y_face_91', 'y_face_92', 'y_face_93', 'y_face_94', 'y_face_95', 'y_face_96', 'y_face_97', 'y_face_98', 'y_face_99', 'y_face_100', 'y_face_101', 'y_face_102', 'y_face_103', 'y_face_104', 'y_face_105', 'y_face_106', 'y_face_107', 'y_face_108', 'y_face_109', 'y_face_110', 'y_face_111', 'y_face_112', 'y_face_113', 'y_face_114', 'y_face_115', 'y_face_116', 'y_face_117', 'y_face_118', 'y_face_119', 'y_face_120', 'y_face_121', 'y_face_122', 'y_face_123', 'y_face_124', 'y_face_125', 'y_face_126', 'y_face_127', 'y_face_128', 'y_face_129', 'y_face_130', 'y_face_131', 'y_face_132', 'y_face_133', 'y_face_134', 'y_face_135', 'y_face_136', 'y_face_137', 'y_face_138', 'y_face_139', 'y_face_140', 'y_face_141', 'y_face_142', 'y_face_143', 'y_face_144', 'y_face_145', 'y_face_146', 'y_face_147', 'y_face_148', 'y_face_149', 'y_face_150', 'y_face_151', 'y_face_152', 'y_face_153', 'y_face_154', 'y_face_155', 'y_face_156', 'y_face_157', 'y_face_158', 'y_face_159', 'y_face_160', 'y_face_161', 'y_face_162', 'y_face_163', 'y_face_164', 'y_face_165', 'y_face_166', 'y_face_167', 'y_face_168', 'y_face_169', 'y_face_170', 'y_face_171', 'y_face_172', 'y_face_173', 'y_face_174', 'y_face_175', 'y_face_176', 'y_face_177', 'y_face_178', 'y_face_179', 'y_face_180', 'y_face_181', 'y_face_182', 'y_face_183', 'y_face_184', 'y_face_185', 'y_face_186', 'y_face_187', 'y_face_188', 'y_face_189', 'y_face_190', 'y_face_191', 'y_face_192', 'y_face_193', 'y_face_194', 'y_face_195', 'y_face_196', 'y_face_197', 'y_face_198', 'y_face_199', 'y_face_200', 'y_face_201', 'y_face_202', 'y_face_203', 'y_face_204', 'y_face_205', 'y_face_206', 'y_face_207', 'y_face_208', 'y_face_209', 'y_face_210', 'y_face_211', 'y_face_212', 'y_face_213', 'y_face_214', 'y_face_215', 'y_face_216', 'y_face_217', 'y_face_218', 'y_face_219', 'y_face_220', 'y_face_221', 'y_face_222', 'y_face_223', 'y_face_224', 'y_face_225', 'y_face_226', 'y_face_227', 'y_face_228', 'y_face_229', 'y_face_230', 'y_face_231', 'y_face_232', 'y_face_233', 'y_face_234', 'y_face_235', 'y_face_236', 'y_face_237', 'y_face_238', 'y_face_239', 'y_face_240', 'y_face_241', 'y_face_242', 'y_face_243', 'y_face_244', 'y_face_245', 'y_face_246', 'y_face_247', 'y_face_248', 'y_face_249', 'y_face_250', 'y_face_251', 'y_face_252', 'y_face_253', 'y_face_254', 'y_face_255', 'y_face_256', 'y_face_257', 'y_face_258', 'y_face_259', 'y_face_260', 'y_face_261', 'y_face_262', 'y_face_263', 'y_face_264', 'y_face_265', 'y_face_266', 'y_face_267', 'y_face_268', 'y_face_269', 'y_face_270', 'y_face_271', 'y_face_272', 'y_face_273', 'y_face_274', 'y_face_275', 'y_face_276', 'y_face_277', 'y_face_278', 'y_face_279', 'y_face_280', 'y_face_281', 'y_face_282', 'y_face_283', 'y_face_284', 'y_face_285', 'y_face_286', 'y_face_287', 'y_face_288', 'y_face_289', 'y_face_290', 'y_face_291', 'y_face_292', 'y_face_293', 'y_face_294', 'y_face_295', 'y_face_296', 'y_face_297', 'y_face_298', 'y_face_299', 'y_face_300', 'y_face_301', 'y_face_302', 'y_face_303', 'y_face_304', 'y_face_305', 'y_face_306', 'y_face_307', 'y_face_308', 'y_face_309', 'y_face_310', 'y_face_311', 'y_face_312', 'y_face_313', 'y_face_314', 'y_face_315', 'y_face_316', 'y_face_317', 'y_face_318', 'y_face_319', 'y_face_320', 'y_face_321', 'y_face_322', 'y_face_323', 'y_face_324', 'y_face_325', 'y_face_326', 'y_face_327', 'y_face_328', 'y_face_329', 'y_face_330', 'y_face_331', 'y_face_332', 'y_face_333', 'y_face_334', 'y_face_335', 'y_face_336', 'y_face_337', 'y_face_338', 'y_face_339', 'y_face_340', 'y_face_341', 'y_face_342', 'y_face_343', 'y_face_344', 'y_face_345', 'y_face_346', 'y_face_347', 'y_face_348', 'y_face_349', 'y_face_350', 'y_face_351', 'y_face_352', 'y_face_353', 'y_face_354', 'y_face_355', 'y_face_356', 'y_face_357', 'y_face_358', 'y_face_359', 'y_face_360', 'y_face_361', 'y_face_362', 'y_face_363', 'y_face_364', 'y_face_365', 'y_face_366', 'y_face_367', 'y_face_368', 'y_face_369', 'y_face_370', 'y_face_371', 'y_face_372', 'y_face_373', 'y_face_374', 'y_face_375', 'y_face_376', 'y_face_377', 'y_face_378', 'y_face_379', 'y_face_380', 'y_face_381', 'y_face_382', 'y_face_383', 'y_face_384', 'y_face_385', 'y_face_386', 'y_face_387', 'y_face_388', 'y_face_389', 'y_face_390', 'y_face_391', 'y_face_392', 'y_face_393', 'y_face_394', 'y_face_395', 'y_face_396', 'y_face_397', 'y_face_398', 'y_face_399', 'y_face_400', 'y_face_401', 'y_face_402', 'y_face_403', 'y_face_404', 'y_face_405', 'y_face_406', 'y_face_407', 'y_face_408', 'y_face_409', 'y_face_410', 'y_face_411', 'y_face_412', 'y_face_413', 'y_face_414', 'y_face_415', 'y_face_416', 'y_face_417', 'y_face_418', 'y_face_419', 'y_face_420', 'y_face_421', 'y_face_422', 'y_face_423', 'y_face_424', 'y_face_425', 'y_face_426', 'y_face_427', 'y_face_428', 'y_face_429', 'y_face_430', 'y_face_431', 'y_face_432', 'y_face_433', 'y_face_434', 'y_face_435', 'y_face_436', 'y_face_437', 'y_face_438', 'y_face_439', 'y_face_440', 'y_face_441', 'y_face_442', 'y_face_443', 'y_face_444', 'y_face_445', 'y_face_446', 'y_face_447', 'y_face_448', 'y_face_449', 'y_face_450', 'y_face_451', 'y_face_452', 'y_face_453', 'y_face_454', 'y_face_455', 'y_face_456', 'y_face_457', 'y_face_458', 'y_face_459', 'y_face_460', 'y_face_461', 'y_face_462', 'y_face_463', 'y_face_464', 'y_face_465', 'y_face_466', 'y_face_467', 'y_left_hand_0', 'y_left_hand_1', 'y_left_hand_2', 'y_left_hand_3', 'y_left_hand_4', 'y_left_hand_5', 'y_left_hand_6', 'y_left_hand_7', 'y_left_hand_8', 'y_left_hand_9', 'y_left_hand_10', 'y_left_hand_11', 'y_left_hand_12', 'y_left_hand_13', 'y_left_hand_14', 'y_left_hand_15', 'y_left_hand_16', 'y_left_hand_17', 'y_left_hand_18', 'y_left_hand_19', 'y_left_hand_20', 'y_pose_0', 'y_pose_1', 'y_pose_2', 'y_pose_3', 'y_pose_4', 'y_pose_5', 'y_pose_6', 'y_pose_7', 'y_pose_8', 'y_pose_9', 'y_pose_10', 'y_pose_11', 'y_pose_12', 'y_pose_13', 'y_pose_14', 'y_pose_15', 'y_pose_16', 'y_pose_17', 'y_pose_18', 'y_pose_19', 'y_pose_20', 'y_pose_21', 'y_pose_22', 'y_pose_23', 'y_pose_24', 'y_pose_25', 'y_pose_26', 'y_pose_27', 'y_pose_28', 'y_pose_29', 'y_pose_30', 'y_pose_31', 'y_pose_32', 'y_right_hand_0', 'y_right_hand_1', 'y_right_hand_2', 'y_right_hand_3', 'y_right_hand_4', 'y_right_hand_5', 'y_right_hand_6', 'y_right_hand_7', 'y_right_hand_8', 'y_right_hand_9', 'y_right_hand_10', 'y_right_hand_11', 'y_right_hand_12', 'y_right_hand_13', 'y_right_hand_14', 'y_right_hand_15', 'y_right_hand_16', 'y_right_hand_17', 'y_right_hand_18', 'y_right_hand_19', 'y_right_hand_20', 'z_face_0', 'z_face_1', 'z_face_2', 'z_face_3', 'z_face_4', 'z_face_5', 'z_face_6', 'z_face_7', 'z_face_8', 'z_face_9', 'z_face_10', 'z_face_11', 'z_face_12', 'z_face_13', 'z_face_14', 'z_face_15', 'z_face_16', 'z_face_17', 'z_face_18', 'z_face_19', 'z_face_20', 'z_face_21', 'z_face_22', 'z_face_23', 'z_face_24', 'z_face_25', 'z_face_26', 'z_face_27', 'z_face_28', 'z_face_29', 'z_face_30', 'z_face_31', 'z_face_32', 'z_face_33', 'z_face_34', 'z_face_35', 'z_face_36', 'z_face_37', 'z_face_38', 'z_face_39', 'z_face_40', 'z_face_41', 'z_face_42', 'z_face_43', 'z_face_44', 'z_face_45', 'z_face_46', 'z_face_47', 'z_face_48', 'z_face_49', 'z_face_50', 'z_face_51', 'z_face_52', 'z_face_53', 'z_face_54', 'z_face_55', 'z_face_56', 'z_face_57', 'z_face_58', 'z_face_59', 'z_face_60', 'z_face_61', 'z_face_62', 'z_face_63', 'z_face_64', 'z_face_65', 'z_face_66', 'z_face_67', 'z_face_68', 'z_face_69', 'z_face_70', 'z_face_71', 'z_face_72', 'z_face_73', 'z_face_74', 'z_face_75', 'z_face_76', 'z_face_77', 'z_face_78', 'z_face_79', 'z_face_80', 'z_face_81', 'z_face_82', 'z_face_83', 'z_face_84', 'z_face_85', 'z_face_86', 'z_face_87', 'z_face_88', 'z_face_89', 'z_face_90', 'z_face_91', 'z_face_92', 'z_face_93', 'z_face_94', 'z_face_95', 'z_face_96', 'z_face_97', 'z_face_98', 'z_face_99', 'z_face_100', 'z_face_101', 'z_face_102', 'z_face_103', 'z_face_104', 'z_face_105', 'z_face_106', 'z_face_107', 'z_face_108', 'z_face_109', 'z_face_110', 'z_face_111', 'z_face_112', 'z_face_113', 'z_face_114', 'z_face_115', 'z_face_116', 'z_face_117', 'z_face_118', 'z_face_119', 'z_face_120', 'z_face_121', 'z_face_122', 'z_face_123', 'z_face_124', 'z_face_125', 'z_face_126', 'z_face_127', 'z_face_128', 'z_face_129', 'z_face_130', 'z_face_131', 'z_face_132', 'z_face_133', 'z_face_134', 'z_face_135', 'z_face_136', 'z_face_137', 'z_face_138', 'z_face_139', 'z_face_140', 'z_face_141', 'z_face_142', 'z_face_143', 'z_face_144', 'z_face_145', 'z_face_146', 'z_face_147', 'z_face_148', 'z_face_149', 'z_face_150', 'z_face_151', 'z_face_152', 'z_face_153', 'z_face_154', 'z_face_155', 'z_face_156', 'z_face_157', 'z_face_158', 'z_face_159', 'z_face_160', 'z_face_161', 'z_face_162', 'z_face_163', 'z_face_164', 'z_face_165', 'z_face_166', 'z_face_167', 'z_face_168', 'z_face_169', 'z_face_170', 'z_face_171', 'z_face_172', 'z_face_173', 'z_face_174', 'z_face_175', 'z_face_176', 'z_face_177', 'z_face_178', 'z_face_179', 'z_face_180', 'z_face_181', 'z_face_182', 'z_face_183', 'z_face_184', 'z_face_185', 'z_face_186', 'z_face_187', 'z_face_188', 'z_face_189', 'z_face_190', 'z_face_191', 'z_face_192', 'z_face_193', 'z_face_194', 'z_face_195', 'z_face_196', 'z_face_197', 'z_face_198', 'z_face_199', 'z_face_200', 'z_face_201', 'z_face_202', 'z_face_203', 'z_face_204', 'z_face_205', 'z_face_206', 'z_face_207', 'z_face_208', 'z_face_209', 'z_face_210', 'z_face_211', 'z_face_212', 'z_face_213', 'z_face_214', 'z_face_215', 'z_face_216', 'z_face_217', 'z_face_218', 'z_face_219', 'z_face_220', 'z_face_221', 'z_face_222', 'z_face_223', 'z_face_224', 'z_face_225', 'z_face_226', 'z_face_227', 'z_face_228', 'z_face_229', 'z_face_230', 'z_face_231', 'z_face_232', 'z_face_233', 'z_face_234', 'z_face_235', 'z_face_236', 'z_face_237', 'z_face_238', 'z_face_239', 'z_face_240', 'z_face_241', 'z_face_242', 'z_face_243', 'z_face_244', 'z_face_245', 'z_face_246', 'z_face_247', 'z_face_248', 'z_face_249', 'z_face_250', 'z_face_251', 'z_face_252', 'z_face_253', 'z_face_254', 'z_face_255', 'z_face_256', 'z_face_257', 'z_face_258', 'z_face_259', 'z_face_260', 'z_face_261', 'z_face_262', 'z_face_263', 'z_face_264', 'z_face_265', 'z_face_266', 'z_face_267', 'z_face_268', 'z_face_269', 'z_face_270', 'z_face_271', 'z_face_272', 'z_face_273', 'z_face_274', 'z_face_275', 'z_face_276', 'z_face_277', 'z_face_278', 'z_face_279', 'z_face_280', 'z_face_281', 'z_face_282', 'z_face_283', 'z_face_284', 'z_face_285', 'z_face_286', 'z_face_287', 'z_face_288', 'z_face_289', 'z_face_290', 'z_face_291', 'z_face_292', 'z_face_293', 'z_face_294', 'z_face_295', 'z_face_296', 'z_face_297', 'z_face_298', 'z_face_299', 'z_face_300', 'z_face_301', 'z_face_302', 'z_face_303', 'z_face_304', 'z_face_305', 'z_face_306', 'z_face_307', 'z_face_308', 'z_face_309', 'z_face_310', 'z_face_311', 'z_face_312', 'z_face_313', 'z_face_314', 'z_face_315', 'z_face_316', 'z_face_317', 'z_face_318', 'z_face_319', 'z_face_320', 'z_face_321', 'z_face_322', 'z_face_323', 'z_face_324', 'z_face_325', 'z_face_326', 'z_face_327', 'z_face_328', 'z_face_329', 'z_face_330', 'z_face_331', 'z_face_332', 'z_face_333', 'z_face_334', 'z_face_335', 'z_face_336', 'z_face_337', 'z_face_338', 'z_face_339', 'z_face_340', 'z_face_341', 'z_face_342', 'z_face_343', 'z_face_344', 'z_face_345', 'z_face_346', 'z_face_347', 'z_face_348', 'z_face_349', 'z_face_350', 'z_face_351', 'z_face_352', 'z_face_353', 'z_face_354', 'z_face_355', 'z_face_356', 'z_face_357', 'z_face_358', 'z_face_359', 'z_face_360', 'z_face_361', 'z_face_362', 'z_face_363', 'z_face_364', 'z_face_365', 'z_face_366', 'z_face_367', 'z_face_368', 'z_face_369', 'z_face_370', 'z_face_371', 'z_face_372', 'z_face_373', 'z_face_374', 'z_face_375', 'z_face_376', 'z_face_377', 'z_face_378', 'z_face_379', 'z_face_380', 'z_face_381', 'z_face_382', 'z_face_383', 'z_face_384', 'z_face_385', 'z_face_386', 'z_face_387', 'z_face_388', 'z_face_389', 'z_face_390', 'z_face_391', 'z_face_392', 'z_face_393', 'z_face_394', 'z_face_395', 'z_face_396', 'z_face_397', 'z_face_398', 'z_face_399', 'z_face_400', 'z_face_401', 'z_face_402', 'z_face_403', 'z_face_404', 'z_face_405', 'z_face_406', 'z_face_407', 'z_face_408', 'z_face_409', 'z_face_410', 'z_face_411', 'z_face_412', 'z_face_413', 'z_face_414', 'z_face_415', 'z_face_416', 'z_face_417', 'z_face_418', 'z_face_419', 'z_face_420', 'z_face_421', 'z_face_422', 'z_face_423', 'z_face_424', 'z_face_425', 'z_face_426', 'z_face_427', 'z_face_428', 'z_face_429', 'z_face_430', 'z_face_431', 'z_face_432', 'z_face_433', 'z_face_434', 'z_face_435', 'z_face_436', 'z_face_437', 'z_face_438', 'z_face_439', 'z_face_440', 'z_face_441', 'z_face_442', 'z_face_443', 'z_face_444', 'z_face_445', 'z_face_446', 'z_face_447', 'z_face_448', 'z_face_449', 'z_face_450', 'z_face_451', 'z_face_452', 'z_face_453', 'z_face_454', 'z_face_455', 'z_face_456', 'z_face_457', 'z_face_458', 'z_face_459', 'z_face_460', 'z_face_461', 'z_face_462', 'z_face_463', 'z_face_464', 'z_face_465', 'z_face_466', 'z_face_467', 'z_left_hand_0', 'z_left_hand_1', 'z_left_hand_2', 'z_left_hand_3', 'z_left_hand_4', 'z_left_hand_5', 'z_left_hand_6', 'z_left_hand_7', 'z_left_hand_8', 'z_left_hand_9', 'z_left_hand_10', 'z_left_hand_11', 'z_left_hand_12', 'z_left_hand_13', 'z_left_hand_14', 'z_left_hand_15', 'z_left_hand_16', 'z_left_hand_17', 'z_left_hand_18', 'z_left_hand_19', 'z_left_hand_20', 'z_pose_0', 'z_pose_1', 'z_pose_2', 'z_pose_3', 'z_pose_4', 'z_pose_5', 'z_pose_6', 'z_pose_7', 'z_pose_8', 'z_pose_9', 'z_pose_10', 'z_pose_11', 'z_pose_12', 'z_pose_13', 'z_pose_14', 'z_pose_15', 'z_pose_16', 'z_pose_17', 'z_pose_18', 'z_pose_19', 'z_pose_20', 'z_pose_21', 'z_pose_22', 'z_pose_23', 'z_pose_24', 'z_pose_25', 'z_pose_26', 'z_pose_27', 'z_pose_28', 'z_pose_29', 'z_pose_30', 'z_pose_31', 'z_pose_32', 'z_right_hand_0', 'z_right_hand_1', 'z_right_hand_2', 'z_right_hand_3', 'z_right_hand_4', 'z_right_hand_5', 'z_right_hand_6', 'z_right_hand_7', 'z_right_hand_8', 'z_right_hand_9', 'z_right_hand_10', 'z_right_hand_11', 'z_right_hand_12', 'z_right_hand_13', 'z_right_hand_14', 'z_right_hand_15', 'z_right_hand_16', 'z_right_hand_17', 'z_right_hand_18', 'z_right_hand_19', 'z_right_hand_20']


In [67]:
class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()
        self.model = model
        self.preprocess = Preprocess()
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[None, len(SEL_COLS)], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs, training=False):
        # Preprocess Data
        x = tf.transpose(tf.reshape(inputs, (-1,3,543)), (0,2,1))
        x = tf.cond(tf.shape(x)[0] == 0, lambda: tf.zeros((1, 543, 3),dtype=tf.float32), lambda: tf.identity(x))
        x = self.preprocess(x)
        x = self.model(x)[0]
        x = x - 1
        idxs = tf.where((0<=x) & (x<=58))[...,0]
        x = tf.gather(x, idxs)
        x = tf.cond(tf.shape(x)[0] == 0, lambda: tf.zeros(1, tf.int32), lambda: tf.identity(x))
        x = tf.one_hot(x, 59)
        return {'outputs': x}

In [73]:
model_list = [get_model() for _ in MODEL_PATH]
for model, path in zip(model_list, MODEL_PATH):
    model.load_weights(path)

In [75]:
tflitemodel_base = TFLiteModel(EnsembleCTCAttentionJointGreedyDecoder(model_list[:3], ctc_weight=0.3))

In [76]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflitemodel_base)
keras_model_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]#, tf.lite.OpsSet.SELECT_TF_OPS]
keras_model_converter._experimental_default_to_single_batch_in_tensor_list_ops = True
keras_model_converter.optimizations = [tf.lite.Optimize.DEFAULT]
keras_model_converter.target_spec.supported_types = [tf.float16]
tflite_model = keras_model_converter.convert()
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

Tensor("cond/Identity:0", shape=(None, 543, 3), dtype=float32)

----
 (None, 543, 3)
INSIDE FILTER (None, 543, 3)
Tensor("preprocess/SelectV2_5:0", shape=(None, None, 1629), dtype=float32)

 Log_psi: Tensor("ensemble_ctc_attention_joint_greedy_decoder_2/while/TensorScatterUpdate_2:0", shape=(1, 62), dtype=float32) 
new_States_n: Tensor("ensemble_ctc_attention_joint_greedy_decoder_2/while/scan/TensorArrayV2Stack/TensorListStack:0", shape=(None, 1, 62), dtype=float32) 
new_states_b: Tensor("ensemble_ctc_attention_joint_greedy_decoder_2/while/scan/TensorArrayV2Stack_1/TensorListStack:0", shape=(None, 1, 62), dtype=float32)
PREDICTIONS[CTC_Decoder]:
 Tensor("ensemble_ctc_attention_joint_greedy_decoder_2/while:3", shape=(1, None), dtype=int32) 
---------------------

INFO:tensorflow:Assets written to: C:\Users\admin\AppData\Local\Temp\tmpalxc85q3\assets


INFO:tensorflow:Assets written to: C:\Users\admin\AppData\Local\Temp\tmpalxc85q3\assets


In [121]:
output = prediction_fn(inputs=all_landmarks_tensor_reshaped)
output

{'outputs': array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.,

In [122]:
with open ("character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)
rev_character_map = {j:i for i,j in character_map.items()}
# pred = tflitemodel_base(frames)["outputs"].numpy().argmax(-1)
# ''.join([rev_character_map[x] for x in pred]), phrase

In [123]:
prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])


In [124]:
prediction_str

'www.sanarane.com'

In [125]:
tf.keras.backend.clear_session()
gc.collect()

129

In [126]:
#TESTING
output = prediction_fn(inputs=all_landmarks_tensor_reshaped)
prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)])
prediction_str

'www.sanarane.com'