In [1]:
%load_ext autoreload
%autoreload 2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


from src.constants import TARGET_MAX_LENGHT, MAX_LENGHT_SOURCE, FEATURE_COLUMNS
from src.data_utils.dataset import build_datset_train_val, VOCAB_SIZE, LHAND_IDX, LHAND_IDX, start_token_idx, end_token_idx, pre_process, pad_token_idx
from src.prod_models.builder import build_prod_transformer_model_v2
from src.callbacks import get_predefine_callbacks
import optuna
import tensorflow as tf
import numpy as np


TRIALS = 80
EPOCHS = 5000
EPOCHS_PER_TRIAL = 10
BATCH_SIZE = 128
TRAIN_SPLIT = 0.8
MODEL_NAME = "prod_v2"

In [2]:
train_dataset, val_dataset = build_datset_train_val(split=TRAIN_SPLIT, batch_size=BATCH_SIZE)

train split: 28160 | val split: 6656


In [3]:
def objective(trial):
    tf.keras.backend.clear_session()
    model = build_prod_transformer_model_v2(trial=trial)
    model.build([(None, MAX_LENGHT_SOURCE, int(FEATURE_COLUMNS.shape[0]/2)), (None, TARGET_MAX_LENGHT)])
    model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS_PER_TRIAL, callbacks=get_predefine_callbacks(model_name=MODEL_NAME, patience=3), verbose=0)
    levenshtein = model.evaluate(val_dataset)[-1]

    return  levenshtein

In [4]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=TRIALS, gc_after_trial=True, show_progress_bar=True)

[I 2023-08-24 16:09:13,866] A new study created in memory with name: no-name-dd6f015f-36a4-4f77-a50c-9eb5d9ad08b8


  0%|          | 0/80 [00:00<?, ?it/s]

[I 2023-08-24 16:10:54,821] Trial 0 finished with value: 0.22779132425785065 and parameters: {'attention_heads': 3, 'learning_rate': 8.487681652646925e-05, 'drop_out_out_decoder': 0.15000000000000002, 'dense_layers': 4, 'drop_out': 0.05, 'encoder_kernel_size': 9}. Best is trial 0 with value: 0.22779132425785065.
[I 2023-08-24 16:13:00,316] Trial 1 finished with value: 0.21052058041095734 and parameters: {'attention_heads': 5, 'learning_rate': 0.0008058347570333092, 'drop_out_out_decoder': 0.1, 'dense_layers': 3, 'drop_out': 0.0, 'encoder_kernel_size': 12}. Best is trial 1 with value: 0.21052058041095734.
[I 2023-08-24 16:14:52,580] Trial 2 finished with value: 0.22480256855487823 and parameters: {'attention_heads': 8, 'learning_rate': 0.00907519569562131, 'drop_out_out_decoder': 0.1, 'dense_layers': 1, 'drop_out': 0.30000000000000004, 'encoder_kernel_size': 8}. Best is trial 1 with value: 0.21052058041095734.
[I 2023-08-24 16:16:18,662] Trial 3 finished with value: 0.21985526382923126 

In [5]:
trials = study.best_trials

In [6]:
trials[0]

FrozenTrial(number=44, state=TrialState.COMPLETE, values=[0.18621140718460083], datetime_start=datetime.datetime(2023, 8, 24, 17, 47, 37, 114347), datetime_complete=datetime.datetime(2023, 8, 24, 17, 49, 45, 892933), params={'attention_heads': 7, 'learning_rate': 0.0027196542256953437, 'drop_out_out_decoder': 0.05, 'dense_layers': 3, 'drop_out': 0.0, 'encoder_kernel_size': 6}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'attention_heads': IntDistribution(high=8, log=False, low=1, step=1), 'learning_rate': FloatDistribution(high=0.1, log=True, low=1e-05, step=None), 'drop_out_out_decoder': FloatDistribution(high=0.5, log=False, low=0.0, step=0.05), 'dense_layers': IntDistribution(high=5, log=False, low=1, step=1), 'drop_out': FloatDistribution(high=0.5, log=False, low=0.0, step=0.05), 'encoder_kernel_size': IntDistribution(high=12, log=False, low=3, step=1)}, trial_id=44, value=None)

In [19]:
tf.keras.backend.clear_session()
trials = study.best_trials

for index, trial in enumerate(trials):
    print(f"Best model: {index+1}")

    model = build_prod_transformer_model_v2(trial=None)

    model.build([(None, MAX_LENGHT_SOURCE, int(FEATURE_COLUMNS.shape[0]/2)), (None, TARGET_MAX_LENGHT)])

    print(model.summary())
    model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=get_predefine_callbacks(model_name=MODEL_NAME, patience=10))
   
    print('validation levenshtein distance: {}'.format(trial.value))
    print("Best hyperparameters: {}".format(trial.params))

    model.load_weights(f"../best_model/prototype/{MODEL_NAME}")

    print(f"Metrics in Validation: {model.evaluate(val_dataset)}")

Best model: 1
Model: "finger_spelling_v2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 landmark_embedding_v2 (Lan  multiple                  147200    
 dmarkEmbeddingV2)                                               
                                                                 
 basic_positional_embedding  multiple                  16128     
 s (BasicPositionalEmbeddin                                      
 gs)                                                             
                                                                 
 transformer_encoder (Trans  multiple                  233992    
 formerEncoder)                                                  
                                                                 
 transformer_decoder (Trans  multiple                  467343    
 formerDecoder)                                                  
                                  

In [20]:
from src.data_utils.dataset import char_to_num, num_to_char

In [21]:
# target_sequence = [char_to_num[w] for w in ["<"]]

# for batch_index, batch in enumerate(val_dataset):
#     batch = batch[0]

#     sources = batch[0] #batch["source"]
#     targets = batch[1] #batch["target"]
    
#     print(sources.shape)
#     print(targets.shape)

#     for index_sample, (source, target) in enumerate(zip(sources, targets)):
#         source = tf.expand_dims(source, axis=0)
#         target_sequence = [char_to_num[w] for w in ["<"]]
#         y_true = "".join([num_to_char[w] for w in target.numpy()])
    
#         for i in range(TARGET_MAX_LENGHT):
#             next_token = tf.expand_dims(tf.pad(tf.constant(target_sequence),
#              [[0, TARGET_MAX_LENGHT-len(target_sequence)]],
#               mode='CONSTANT',
#                constant_values=pad_token_idx,
#                 name=None),
#                  axis=0)

#             print("next target sequence to predict: ", next_token)
#             y_pred = model((source, next_token))

#             y_pred = tf.cast(tf.argmax(y_pred, axis=2), dtype=tf.int32)

#             print("argmax:", y_pred)

#             mask = tf.not_equal(y_pred, pad_token_idx)
#             next_token = y_pred[mask][-1].numpy()

#             target_sequence.append(next_token)

#             print("sequence so far: ", "".join([num_to_char[w] for w in target_sequence]))
#             print("Label: ", y_true)

#             if num_to_char[next_token]==">":
#                 break

#         print(f"================================={index_sample}=========================================")
#         if index_sample==1:
#             break

#     if batch_index==1:
#         break

In [22]:
model.summary()

Model: "finger_spelling_v2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 landmark_embedding_v2 (Lan  multiple                  147200    
 dmarkEmbeddingV2)                                               
                                                                 
 basic_positional_embedding  multiple                  16128     
 s (BasicPositionalEmbeddin                                      
 gs)                                                             
                                                                 
 transformer_encoder (Trans  multiple                  233992    
 formerEncoder)                                                  
                                                                 
 transformer_decoder (Trans  multiple                  467343    
 formerDecoder)                                                  
                                                

Total params: 878165 (3.35 MB)
Trainable params: 878165 (3.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
# Save model

model.save(f"../models/{MODEL_NAME}", save_format="tf")

INFO:tensorflow:Assets written to: ../models/prod_v2/assets


INFO:tensorflow:Assets written to: ../models/prod_v2/assets


# TF lite

In [36]:
class TFLiteModel(tf.Module):
    def __init__(self, model):
        super(TFLiteModel, self).__init__()
        self.target_start_token_idx = start_token_idx
        self.target_end_token_idx = end_token_idx
        # Load the feature generation and main models
        self.model = model

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, FEATURE_COLUMNS.shape[0]], dtype=tf.float32, name='inputs')])
    def __call__(self, inputs, training=False):
        # Preprocess Data
        x = tf.cast(inputs, tf.float32)

        x = x[None]

        x = tf.cond(tf.shape(x)[1] == 0, lambda: tf.zeros((1, 1, FEATURE_COLUMNS.shape[0])), lambda: tf.identity(x))

        x = x[0]

        x = pre_process(x)
        #shape after [MAX_LENGHT_SOURCE, FEATURE_SIZE]

        x = x[None]

        x = self.model.generate(x)

        x = x[0]
        idx = tf.argmax(tf.cast(tf.equal(x, self.target_end_token_idx), tf.int32))
        idx = tf.where(tf.math.less(idx, 1), tf.constant(2, dtype=tf.int64), idx)
        x = x[1:idx]

        x = tf.one_hot(x, 59)
        return {"outputs": x}

tflitemodel_base = TFLiteModel(model)

In [37]:
keras_model_converter = tf.lite.TFLiteConverter.from_keras_model(tflitemodel_base)
keras_model_converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
keras_model_converter.allow_custom_ops = True
keras_model_converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = keras_model_converter.convert()

INFO:tensorflow:Assets written to: /tmp/tmpgbn0u_n9/assets


INFO:tensorflow:Assets written to: /tmp/tmpgbn0u_n9/assets


In [38]:
import json

# with open('/kaggle/working/model.tflite', 'wb') as f:
with open("../models/model.tflite", "wb") as f:    
    f.write(tflite_model)

infargs = {"selected_columns" : list(FEATURE_COLUMNS)}

# with open("inference_args.json", "w") as json_file:
with open("../models/inference_args.json", "w") as json_file:
    json.dump(infargs, json_file)

In [39]:
!zip submission.zip  '../models/model.tflite' '../models/inference_args.json'

updating: ../models/model.tflite (deflated 72%)
updating: ../models/inference_args.json (deflated 84%)


# Test results

In [40]:
# from batch 1

source_batch, target_batch = next(iter(val_dataset))[0]

In [41]:
REQUIRED_SIGNATURE = "serving_default"
REQUIRED_OUTPUT = "outputs"

# interpreter = tf.lite.Interpreter("model.tflite")
interpreter = tf.lite.Interpreter("../models/model.tflite")

# with open ("/kaggle/input/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
with open ("../data/asl-fingerspelling/character_to_prediction_index.json", "r") as f:
    character_map = json.load(f)

rev_character_map = {j:i for i,j in character_map.items()}
found_signatures = list(interpreter.get_signature_list().keys())

if REQUIRED_SIGNATURE not in found_signatures:
    raise KernelEvalException('Required input signature not found.')

prediction_fn = interpreter.get_signature_runner(REQUIRED_SIGNATURE)

prediction_str = ""
for source_element, target_element in zip(source_batch, target_batch):
    # print(tf.expand_dims(target_element, axis=0).numpy())

    output = prediction_fn(inputs=source_element)

    # print(output[REQUIRED_OUTPUT])

    # break

    print("generated: ", "".join([rev_character_map.get(s, "") for s in np.argmax(output[REQUIRED_OUTPUT], axis=1)]))
    print("target: ", "".join([rev_character_map.get(s, "") for s in target_element.numpy()]))

generated:  jespreltelto.com
target:  5065 west 8th street
generated:  jell co
target:  +974-124-1526-3338
generated:  jeliel conton
target:  510-316-6437
generated:  arclil ellon
target:  2151 east hopi trail
generated:  lililil stort
target:  www.summitortho.com/
generated:  jeil sonel
target:  modul-pelatihan-akuntansi
generated:  jesest conton
target:  +92-32-64-950-534
generated:  jelifa conton
target:  dewayne levy
generated:  jell forn
target:  ginecologa.altervista.org
generated:  coccelnellellonel.com
target:  nuveitech/jupiler-league
generated:  arrel janellon
target:  frankie ponce
generated:  kanla co
target:  921 shoemaker canyon
generated:  ililce branton
target:  george munoz
generated:  jelc finton
target:  292-989-6767
generated:  bill sannon
target:  247656
generated:  jicgie coun
target:  843-494-0857
generated:  jein co
target:  663 owen oaks drive
generated:  jiane count
target:  downloadables
generated:  seaccclillill
target:  https://m1.mingkyaa.com
generated:  j

In [42]:
interpreter.get_input_details()

[{'name': 'serving_default_inputs:0',
  'index': 0,
  'shape': array([128,  42], dtype=int32),
  'shape_signature': array([-1, 84], dtype=int32),
  'dtype': numpy.float32,
  'quantization': (0.0, 0),
  'quantization_parameters': {'scales': array([], dtype=float32),
   'zero_points': array([], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]