In [2]:
import os
import logging
import datetime

import numpy as np
import pandas as pd
import tensorflow as tf

logger = logging.getLogger(__name__)
logger.setLevel("INFO")

In [3]:
# Constants
IMG_SIZE = (512, 512, 3)
VAL_SIZE = 0.2

train_dataset_path = "train_landmark_files/"
save_dataset_path = "transformed_imgs/"
class_path = "train.csv"
class_to_label_path = "sign_to_prediction_index_map.json"
train_data_with_label = "train_data_with_label.csv"

df = pd.read_csv(train_data_with_label)
df = df.sample(frac = 1.0)
n_classes = df.y_label.nunique()

val_df = df.iloc[0:int(np.floor(df.shape[0] * VAL_SIZE)), :]
train_df = df.iloc[int(np.floor(df.shape[0] * VAL_SIZE)):df.shape[0], :]

In [54]:
class GISLRSequence(tf.keras.utils.Sequence):
    def __init__(self, df: pd.DataFrame, x_col: str, y_col: str, sample_size: int, batch_size: int, shuffle: bool = True) -> None:
        self.batch_size = batch_size
        self.sample_size = sample_size
        self.shuffle = shuffle

        self.x_files_path = df[x_col]
        self.y = df[y_col]
        
        self.nframes = df.nframes
        self.indices = df.index.to_list()

    def __len__(self):
        return (len(self.x_files_path) // self.batch_size)

    def __getitem__(self, idx):
        subset = self.indices[(idx * self.batch_size):((idx + 1) * self.batch_size)]
        batch_x = self.x_files_path[subset]
        batch_y = self.y[subset]

        X, y = self._get_data(batch_x, batch_y)

        return X, tf.one_hot(y, depth=n_classes)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def _get_data(self, batch_x, batch_y):
        temp_X, temp_y = [], []

        for idx, idy in zip(batch_x, batch_y):
            temp = self._get_transformed_data(idx)

            temp_X.append(temp)
            temp_y.append(idy)

        return np.array(temp_X), np.array(temp_y)

    def _get_transformed_data(self, dataset_file_path) -> np.ndarray:
        df = pd.read_parquet(dataset_file_path)
        logger.info(f"Found: {df.frame.nunique()} frames in {dataset_file_path}")

        all_imgs = []

        sample_frames = df.frame.unique()[:self.sample_size]

        for single_frame in sample_frames:
            temp = df[(df.frame == single_frame)]
            temp_feat = np.array(temp.loc[:, ["x", "y", "z"]])
            temp_feat = temp_feat.reshape((temp.shape[0] * 3))
            # temp_feat = np.where(np.isnan(temp_feat), 0, temp_feat)
            all_imgs.append(temp_feat)

        # if len(sample_frames) < self.sample_size:
        #     for i in range(self.sample_size-len(sample_frames)):
        #         all_imgs.append(np.zeros(temp.shape[0] * 3))

        return np.array(all_imgs)


In [5]:
train_dataset = GISLRSequence(
    train_df.sample(20000),
    x_col = "filename",
    y_col = "y_label",
    sample_size=32,
    batch_size = 16
)

val_dataset = GISLRSequence(
    val_df.sample(2000),
    x_col = "filename",
    y_col = "y_label",
    sample_size=32,
    batch_size = 16
)


In [6]:
class GISLRModelv1():
    def __init__(self, input_shape = (32, 1629)):
        model_input = tf.keras.Input(shape=input_shape)
        dense = tf.keras.layers.Dense(128)(model_input)

        value_input = dense

        # CNN layer.
        cnn_layer = tf.keras.layers.Conv1D(filters=512, kernel_size=4, padding='same')
        query_seq_encoding = cnn_layer(value_input)
        value_seq_encoding = cnn_layer(value_input)

        query_value_attention_seq, attn_score = tf.keras.layers.Attention()([query_seq_encoding, value_seq_encoding], return_attention_scores=True)

        query_encoding = tf.keras.layers.GlobalAveragePooling1D()(query_seq_encoding)
        query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(query_value_attention_seq)
        query_concat = tf.keras.layers.Concatenate()([query_encoding, query_value_attention])

        dense_attn = tf.keras.layers.Dense(512)(query_concat)
        dropout = tf.keras.layers.Dropout(0.2)(dense_attn)

        output = tf.keras.layers.Dense(n_classes, activation="softmax")(dropout)
        self.model = tf.keras.Model(model_input, output)

    def get_model(self):
        self.model.compile(
            loss='categorical_crossentropy',
            optimizer='adam',
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.CategoricalCrossentropy()]
        )
        print(self.model.summary())


In [23]:
class BaseAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

class GlobalSelfAttention(BaseAttention):
    def call(self, x):
        attn_output = self.mha(
            query=x,
            value=x,
            key=x
        )
        # x = self.add([x, attn_output])
        # x = self.layernorm(x)
        return attn_output

class GISLRModelv2():
    def __init__(self, input_shape = (32, 1629)) -> None:
        # Multi head attention
        model_input = tf.keras.Input(shape=input_shape)
        # CNN layer.
        query_seq_encoding = tf.keras.layers.Conv1D(filters=128, kernel_size=4, padding='same')(model_input)

        mha = GlobalSelfAttention(num_heads=2, key_dim=128)(query_seq_encoding)

        # query_encoding = tf.keras.layers.GlobalAveragePooling1D()(query_seq_encoding)
        query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(mha)
        # layernorm = tf.keras.layers.LayerNormalization()(query_value_attention)

        dense_attn = tf.keras.layers.Dense(256, activation="relu")(query_value_attention)
        dropout = tf.keras.layers.Dropout(0.2)(dense_attn)

        output = tf.keras.layers.Dense(n_classes, activation="softmax")(dropout)

        self.model = tf.keras.Model(model_input, output)

    def get_model(self):
        self.model.compile(
            loss='categorical_crossentropy',
            optimizer='adam',
            metrics=[tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.CategoricalCrossentropy()]
        )
        print(self.model.summary())

        return self.model


In [24]:
model = GISLRModelv2().get_model()

Model: "model_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 32, 1629)]        0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 32, 128)           834176    
_________________________________________________________________
global_self_attention_8 (Glo (None, 32, 128)           131968    
_________________________________________________________________
global_average_pooling1d_8 ( (None, 128)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 256)               33024     
_________________________________________________________________
dropout_8 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 250)               6425

In [21]:
log_dir = os.path.join("./logs/fit/", datetime.datetime.now().strftime("%Y%m%d-%H%M"))

tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir, histogram_freq=1
)

history = model.fit(
    train_dataset,
    epochs=12,
    verbose=1,
    validation_data=val_dataset,
    callbacks=[tensorboard_callback]
)

Epoch 1/12
Epoch 2/12
Epoch 3/12
  29/1250 [..............................] - ETA: 16:54 - loss: 5.5175 - categorical_accuracy: 0.0043 - categorical_crossentropy: 5.5175

KeyboardInterrupt: 

In [59]:
test = GISLRSequence(
    train_df,
    x_col="filename",
    y_col="y_label",
    sample_size=64,
    batch_size=16
)


In [60]:
for x, y in test:
    break



In [65]:
x[1].shape

(54, 1629)

In [67]:
zz = pd.DataFrame(x[1])

In [69]:
zz.shape

(54, 1629)