In [1]:
%load_ext autoreload
%autoreload 2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


from src.constants import TARGET_MAX_LENGHT, MAX_LENGHT_SOURCE
from src.data_utils.dataset import build_datset_train_val, VOCAB_SIZE, LHAND_IDX, LHAND_IDX, start_token_idx, end_token_idx, pre_process, pad_token_idx, FEATURE_COLUMNS
from src.prod_models.builder import build_prod_transformer_model_v2
from src.callbacks import get_predefine_callbacks
import optuna
import tensorflow as tf
import numpy as np


TRIALS = 15
EPOCHS = 5000
EPOCHS_PER_TRIAL = 10
BATCH_SIZE = 128
TRAIN_SPLIT = 0.8
MODEL_NAME = "prod_v2"

In [2]:
FEATURE_COLUMNS.shape[0]/2

52.0

In [3]:
train_dataset, val_dataset = build_datset_train_val(split=TRAIN_SPLIT, batch_size=BATCH_SIZE)

train split: 28160 | val split: 6656


In [4]:
next(iter(train_dataset))

((<tf.Tensor: shape=(128, 128, 52), dtype=float32, numpy=
  array([[[0.27098814, 0.8101677 , 0.3654514 , ..., 0.75940263,
           0.4921588 , 0.7733124 ],
          [0.32206193, 0.8371788 , 0.41678107, ..., 0.7513371 ,
           0.47293442, 0.76303047],
          [0.        , 0.        , 0.        , ..., 0.72678065,
           0.5150777 , 0.7387048 ],
          ...,
          [0.18945673, 0.8469254 , 0.32516694, ..., 0.705916  ,
           0.39349455, 0.7400357 ],
          [0.18166085, 0.85682863, 0.3225292 , ..., 0.711607  ,
           0.3447126 , 0.74080074],
          [0.17465053, 0.85880864, 0.3080451 , ..., 0.7054042 ,
           0.33971542, 0.73193616]],
  
         [[0.25190622, 0.7471702 , 0.32096434, ..., 0.67004734,
           0.24290428, 0.69245994],
          [0.26125604, 0.7291572 , 0.33123383, ..., 0.6084038 ,
           0.29093763, 0.6359742 ],
          [0.        , 0.        , 0.        , ..., 0.5928458 ,
           0.28803307, 0.6307548 ],
          ...,
        

In [5]:
def objective(trial):
    tf.keras.backend.clear_session()
    model = build_prod_transformer_model_v2(trial=trial)
    model.build([(None, MAX_LENGHT_SOURCE, int(FEATURE_COLUMNS.shape[0]/2)), (None, TARGET_MAX_LENGHT)])
    model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS_PER_TRIAL, callbacks=get_predefine_callbacks(model_name=MODEL_NAME, patience=3), verbose=0)
    levenshtein = model.evaluate(val_dataset)[-1]

    return  levenshtein

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=TRIALS, gc_after_trial=True, show_progress_bar=True)

[I 2023-08-24 23:39:54,369] A new study created in memory with name: no-name-31db08f6-208d-47d3-9ea6-06f12150a65d


  0%|          | 0/15 [00:00<?, ?it/s]

[I 2023-08-24 23:41:24,474] Trial 0 finished with value: 0.24664828181266785 and parameters: {'attention_heads': 3, 'learning_rate': 1.3466229815841302e-05, 'drop_out_out_decoder': 0.5, 'dense_layers': 5, 'drop_out': 0.2, 'encoder_kernel_size': 5}. Best is trial 0 with value: 0.24664828181266785.
[I 2023-08-24 23:43:16,439] Trial 1 finished with value: 0.21152471005916595 and parameters: {'attention_heads': 6, 'learning_rate': 0.0006269092262292042, 'drop_out_out_decoder': 0.05, 'dense_layers': 5, 'drop_out': 0.30000000000000004, 'encoder_kernel_size': 11}. Best is trial 1 with value: 0.21152471005916595.
[I 2023-08-24 23:45:13,637] Trial 2 finished with value: 0.22399476170539856 and parameters: {'attention_heads': 2, 'learning_rate': 0.0024734140958120538, 'drop_out_out_decoder': 0.5, 'dense_layers': 5, 'drop_out': 0.25, 'encoder_kernel_size': 11}. Best is trial 1 with value: 0.21152471005916595.
[I 2023-08-24 23:46:32,978] Trial 3 finished with value: 0.29194554686546326 and paramet

In [7]:
trials = study.best_trials

In [8]:
trials[0]

FrozenTrial(number=1, state=TrialState.COMPLETE, values=[0.21152471005916595], datetime_start=datetime.datetime(2023, 8, 24, 23, 41, 24, 606679), datetime_complete=datetime.datetime(2023, 8, 24, 23, 43, 16, 439325), params={'attention_heads': 6, 'learning_rate': 0.0006269092262292042, 'drop_out_out_decoder': 0.05, 'dense_layers': 5, 'drop_out': 0.30000000000000004, 'encoder_kernel_size': 11}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'attention_heads': IntDistribution(high=8, log=False, low=1, step=1), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'drop_out_out_decoder': FloatDistribution(high=0.5, log=False, low=0.0, step=0.05), 'dense_layers': IntDistribution(high=5, log=False, low=1, step=1), 'drop_out': FloatDistribution(high=0.5, log=False, low=0.0, step=0.05), 'encoder_kernel_size': IntDistribution(high=12, log=False, low=3, step=1)}, trial_id=1, value=None)

In [9]:
tf.keras.backend.clear_session()
trials = study.best_trials

for index, trial in enumerate(trials):
    print(f"Best model: {index+1}")

    model = build_prod_transformer_model_v2(trial=trial)

    model.build([(None, MAX_LENGHT_SOURCE, int(FEATURE_COLUMNS.shape[0]/2)), (None, TARGET_MAX_LENGHT)])

    print(model.summary())
    model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=get_predefine_callbacks(model_name=MODEL_NAME, patience=10))
   
    print('validation levenshtein distance: {}'.format(trial.value))
    print("Best hyperparameters: {}".format(trial.params))

    model.load_weights(f"../best_model/prototype/{MODEL_NAME}")

    print(f"Metrics in Validation: {model.evaluate(val_dataset)}")

Best model: 1
Model: "finger_spelling_v2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 landmark_embedding_v2 (Lan  multiple                  89984     
 dmarkEmbeddingV2)                                               
                                                                 
 basic_positional_embedding  multiple                  8064      
 s (BasicPositionalEmbeddin                                      
 gs)                                                             
                                                                 
 transformer_encoder (Trans  multiple                  51207     
 formerEncoder)                                                  
                                                                 
 transformer_decoder (Trans  multiple                  102093    
 formerDecoder)                                                  
                                  

In [10]:
from src.data_utils.dataset import char_to_num, num_to_char

In [11]:
# target_sequence = [char_to_num[w] for w in ["<"]]

# for batch_index, batch in enumerate(val_dataset):
#     batch = batch[0]

#     sources = batch[0] #batch["source"]
#     targets = batch[1] #batch["target"]
    
#     print(sources.shape)
#     print(targets.shape)

#     for index_sample, (source, target) in enumerate(zip(sources, targets)):
#         source = tf.expand_dims(source, axis=0)
#         target_sequence = [char_to_num[w] for w in ["<"]]
#         y_true = "".join([num_to_char[w] for w in target.numpy()])
    
#         for i in range(TARGET_MAX_LENGHT):
#             next_token = tf.expand_dims(tf.pad(tf.constant(target_sequence),
#              [[0, TARGET_MAX_LENGHT-len(target_sequence)]],
#               mode='CONSTANT',
#                constant_values=pad_token_idx,
#                 name=None),
#                  axis=0)

#             print("next target sequence to predict: ", next_token)
#             y_pred = model((source, next_token))

#             y_pred = tf.cast(tf.argmax(y_pred, axis=2), dtype=tf.int32)

#             print("argmax:", y_pred)

#             mask = tf.not_equal(y_pred, pad_token_idx)
#             next_token = y_pred[mask][-1].numpy()

#             target_sequence.append(next_token)

#             print("sequence so far: ", "".join([num_to_char[w] for w in target_sequence]))
#             print("Label: ", y_true)

#             if num_to_char[next_token]==">":
#                 break

#         print(f"================================={index_sample}=========================================")
#         if index_sample==1:
#             break

#     if batch_index==1:
#         break

In [12]:
model.summary()

Model: "finger_spelling_v2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 landmark_embedding_v2 (Lan  multiple                  89984     
 dmarkEmbeddingV2)                                               
                                                                 
 basic_positional_embedding  multiple                  8064      
 s (BasicPositionalEmbeddin                                      
 gs)                                                             
                                                                 
 transformer_encoder (Trans  multiple                  51207     
 formerEncoder)                                                  
                                                                 
 transformer_decoder (Trans  multiple                  102093    
 formerDecoder)                                                  
                                                

In [13]:
# Save model

model.save(f"../models/{MODEL_NAME}", save_format="tf")

INFO:tensorflow:Assets written to: ../models/prod_v2/assets


INFO:tensorflow:Assets written to: ../models/prod_v2/assets


# TF lite

In [14]:
class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()
        self.target_start_token_idx = start_token_idx
        self.target_end_token_idx = end_token_idx
        # Load the feature generation and main models
        self.model = model

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, FEATURE_COLUMNS.shape[0]], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs, training=False):
        # Preprocess Data
        x = tf.cast(inputs, tf.float32)

        x = x[None]

        x = tf.cond(tf.shape(x)[1] == 0, lambda: tf.zeros((1, 1, FEATURE_COLUMNS.shape[0])), lambda: tf.identity(x))

        x = x[0]

        x = pre_process(x)
        #shape after [MAX_LENGHT_SOURCE, FEATURE_SIZE]

        x = x[None]

        x = self.model.generate(x)

        x = x[0]
        idx = tf.argmax(tf.cast(tf.equal(x, self.target_end_token_idx), tf.int32))
        idx = tf.where(tf.math.less(idx, 1), tf.constant(2, dtype=tf.int64), idx)
        x = x[1:idx]

        x = tf.one_hot(x, 59)
        return {"outputs": x}

tflitemodel_base = TFLiteModel(model)

In [15]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflitemodel_base)
keras_model_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
keras_model_converter.allow_custom_ops = True
keras_model_converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = keras_model_converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmp_d6nmpai/assets


INFO:tensorflow:Assets written to: /tmp/tmp_d6nmpai/assets


In [16]:
import json

# with open('/kaggle/working/model.tflite', 'wb') as f:
with open("../models/model.tflite", "wb") as f:    
    f.write(tflite_model)

infargs = {"selected_columns" : list(FEATURE_COLUMNS)}

# with open("inference_args.json", "w") as json_file:
with open("../models/inference_args.json", "w") as json_file:
    json.dump(infargs, json_file)

In [17]:
!zip submission.zip  '../models/model.tflite' '../models/inference_args.json'

updating: ../models/model.tflite (deflated 78%)
updating: ../models/inference_args.json (deflated 84%)


# Test results

In [18]:
# from batch 1

source_batch, target_batch = next(iter(val_dataset))[0]

In [19]:
REQUIRED_SIGNATURE = "serving_default"
REQUIRED_OUTPUT = "outputs"

# interpreter = tf.lite.Interpreter("model.tflite")
interpreter = tf.lite.Interpreter("../models/model.tflite")

# with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
with open ("../data/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)

rev_character_map = {j:i for i,j in character_map.items()}
found_signatures = list(interpreter.get_signature_list().keys())

if REQUIRED_SIGNATURE not in found_signatures:
    raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner(REQUIRED_SIGNATURE)

prediction_str = ""
for source_element, target_element in zip(source_batch, target_batch):
    # print(tf.expand_dims(target_element, axis=0).numpy())

    output = prediction_fn(inputs=source_element)

    # print(output[REQUIRED_OUTPUT])

    # break

    print("generated: ", "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)]))
    print("target: ", "".join([rev_character_map.get(s, "") for s in target_element.numpy()]))

generated:  www.curre
target:  144-421-3778
generated:  4494 bricre roort da
target:  annemarie vance
generated:  +655-10-15-1-35
target:  805283 little bald branch road
generated:  999
target:  423730 west tobacco road
generated:  www
target:  533-348-6983
generated:  briri cerr
target:  www.elg4ml.com
generated:  www.cecrasar
target:  lydia mullins
generated:  brie brer
target:  8107 tallagson lane northeast
generated:  sirie comans
target:  921 marc avenue
generated:  999 s fir
target:  completos66.rssing.com/8902
generated:  476 bre brer
target:  graphemica.com/
generated:  www.frercorce
target:  +7-1975-49
generated:  4547 co routcor
target:  www.runfengweixiu.com
generated:  brociri crisa
target:  848-463-2595
generated:  ribarrie ru
target:  +61-928-216
generated:  www.frer
target:  376 russell county
generated:  brerr cerr
target:  jerod cardenas
generated:  www.frrice
target:  iecoevent.com/thisandthatbajan
generated:  riri
target:  +44-0923-79-920
generated:  979 leice
target

In [20]:
interpreter.get_input_details()

[{'name': 'serving_default_inputs:0',
  'index': 0,
  'shape': array([128,  52], dtype=int32),
  'shape_signature': array([ -1, 104], dtype=int32),
  'dtype': numpy.float32,
  'quantization': (0.0, 0),
  'quantization_parameters': {'scales': array([], dtype=float32),
   'zero_points': array([], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]