In [24]:
import os
import pickle
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt

In [23]:
from tensorflow import keras
from tensorflow.keras.layers import (
    Input, Dense, Dropout, BatchNormalization
)
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import (
    ModelCheckpoint, EarlyStopping
)

In [None]:
%run make_datasets.ipynb
%run make_embeddings.ipynb

In [4]:
df = pd.read_csv('../data/df.csv')
df.dropna(inplace=True)
df = df.reset_index(drop=True)
print(df.shape)
df.head()

(6867, 5)


Unnamed: 0,name,price,category,brand,target
0,cimento forte multiuso cpii cimento forte,34.89,argamassas_rejuntes,CIMENTO FORTE,7
1,po de brita saco granumix,7.9,argamassas_rejuntes,GRANUMIX,7
2,areia vegetal saco granumix av,9.9,argamassas_rejuntes,GRANUMIX,7
3,areia industrial grossa saco granumix,9.9,argamassas_rejuntes,GRANUMIX,7
4,saibro saco granumix sb,7.9,argamassas_rejuntes,GRANUMIX,7


In [13]:
train, test, valid = split_data(df)
embed_dim = 512
print(train.shape, test.shape, valid.shape)

(4119, 5) (1374, 5) (1374, 5)


In [14]:
class L2NormalizeLayer(keras.Layer):
    def __init__(self, **kwargs):
        super(L2NormalizeLayer, self).__init__(**kwargs)

    def call(self, inputs):
        return tf.math.l2_normalize(inputs, axis=1)

class TripletLossBlock(keras.Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossBlock, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = keras.ops.sum(keras.ops.square(a - p), axis=-1)
        n_dist = keras.ops.sum(keras.ops.square(a - n), axis=-1)
        return keras.ops.sum(keras.ops.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss

In [15]:
def build_model(hp):
    input_one = Input(shape=(embed_dim,))
    
    dense1_units = hp.Int('dense1_units', min_value=128, max_value=512, step=64)
    dense2_units = hp.Int('dense2_units', min_value=64, max_value=256, step=32)
    
    x = Dense(units=dense1_units, activation='relu')(input_one)
    x = Dropout(hp.Float('dropout1', min_value=0.2, max_value=0.5, step=0.1))(x)
    x = BatchNormalization()(x)
    
    x = Dense(units=dense2_units, activation='relu', 
              kernel_regularizer=keras.regularizers.l2(hp.Float('l2_reg', min_value=0.001, max_value=0.01, sampling='log')))(x)
    x = Dropout(hp.Float('dropout2', min_value=0.2, max_value=0.5, step=0.1))(x)
    
    dense_layer = Dense(8, name='dense_layer')(x)
    norm_layer = L2NormalizeLayer(name='norm_layer')(dense_layer)
    
    base_model = Model(inputs=input_one, outputs=norm_layer)
    
    input_a = Input(shape=(embed_dim,))
    input_p = Input(shape=(embed_dim,))
    input_n = Input(shape=(embed_dim,))
    
    embed_a = base_model(input_a)
    embed_p = base_model(input_p)
    embed_n = base_model(input_n)
    
    alpha = hp.Float('alpha', min_value=0.2, max_value=0.8, step=0.2)
    triplet_loss = TripletLossBlock(alpha=alpha, name='triplet_loss_block')([embed_a, embed_p, embed_n])
    
    snn_model = Model([input_a, input_p, input_n], triplet_loss)
    
    snn_model.compile(
        optimizer=keras.optimizers.Adam(hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')),
        loss=None
    )
    
    return snn_model

tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=20,
    executions_per_trial=1,
    directory='hyperparam_tuning_bert',
    project_name='siamese_bert_tuning'
)

tuner.search_space_summary()

Search space summary
Default search space size: 7
dense1_units (Int)
{'default': None, 'conditions': [], 'min_value': 128, 'max_value': 512, 'step': 64, 'sampling': 'linear'}
dense2_units (Int)
{'default': None, 'conditions': [], 'min_value': 64, 'max_value': 256, 'step': 32, 'sampling': 'linear'}
dropout1 (Float)
{'default': 0.2, 'conditions': [], 'min_value': 0.2, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
l2_reg (Float)
{'default': 0.001, 'conditions': [], 'min_value': 0.001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}
dropout2 (Float)
{'default': 0.2, 'conditions': [], 'min_value': 0.2, 'max_value': 0.5, 'step': 0.1, 'sampling': 'linear'}
alpha (Float)
{'default': 0.2, 'conditions': [], 'min_value': 0.2, 'max_value': 0.8, 'step': 0.2, 'sampling': 'linear'}
learning_rate (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


In [25]:
batch_size, colfeat, collabel = 64, 'name', 'target'
dataset_train = create_trip_dtset(batch_size, train.loc[:3], colfeat, collabel, get_use_embed, embed_dim)
dataset_val = create_trip_dtset(batch_size, valid.loc[:3], colfeat, collabel, get_use_embed, embed_dim)
steps = len(train) // batch_size

In [None]:
checkpoint = ModelCheckpoint(
    filepath='best_bert_siamese_model.keras',
    monitor='val_loss',
    mode='min',
    save_best_only=True,
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    restore_best_weights=True
)

tuner.search(
    dataset_train,
    epochs=20,
    steps_per_epoch=steps,
    validation_data=dataset_val,
    validation_steps=steps,
    callbacks=[checkpoint, early_stopping]
)