In [None]:
from ner import *
from cnn_models import *
from transformer_prof import *
from tqdm import tqdm

In [None]:
embedding_sizes = [1024]
vocab_size = [1000]
class_weights = [True]
paddings = ["post"]

# Total combinations of hyperparameters for tqdm progress bar
total_iterations = len(embedding_sizes) * len(vocab_size) * len(class_weights) * len(model_types) * len(paddings)

# Wrapping the outer loop with tqdm to display the total progress
with tqdm(total=total_iterations, desc="Training Models") as pbar:
    for emb_size in embedding_sizes:
        for voc_size in vocab_size:
            for clss_w in class_weights:
                for pad in paddings:
                    train_config = {'use_sample_weights': clss_w}
                    hyperparams = {
                        'vocab_size': voc_size, 
                        'embedding_dim': emb_size, 
                        'epochs': 10, 
                        'batch_size': 32
                    }

                    preprocessing_config = {'lemmatize':False, 'stem':True, 'remove_stopwords':False, 'custom_stopwords':None, 'padding':pad}

                    for name in model_types:
                        model = create_model(name)
                        basic = NamedEntityRecognition(
                            model, 
                            hyperparams, 
                            training_times=2, 
                            train_config=train_config, 
                            verbosing=0, 
                            name=name,
                            results_file_name="./results/NER_cnn_f1"
                        )
                        basic.train_model()
                        basic.print_training_information()
                        
                        # Update the progress bar after each model training iteration
                        pbar.update(1)

Training Models:   0%|          | 0/64 [00:00<?, ?it/s]

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 32ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


Training Models:   2%|▏         | 1/64 [01:55<2:01:25, 115.64s/it]



Average Training Accuracy: 0.986227422952652
Average Training F1: 0.29106631875038147
Average Training Loss: 0.04623243398964405
Average Validation Accuracy: 0.9802239835262299
Average Validation F1: 0.2612162232398987
Average Validation Loss: 0.10471392795443535
Best Model Validation Accuracy: 0.9813872575759888
Best Model Validation F1: 0.26301833987236023

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 38ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step


Training Models:   3%|▎         | 2/64 [05:17<2:51:36, 166.07s/it]



Average Training Accuracy: 0.9903829991817474
Average Training F1: 0.2988511919975281
Average Training Loss: 0.021738569252192974
Average Validation Accuracy: 0.9841743409633636
Average Validation F1: 0.26710928976535797
Average Validation Loss: 0.08901358395814896
Best Model Validation Accuracy: 0.9843440651893616
Best Model Validation F1: 0.2664371430873871

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


Training Models:   5%|▍         | 3/64 [07:22<2:29:49, 147.37s/it]



Average Training Accuracy: 0.9873869121074677
Average Training F1: 0.2915613353252411
Average Training Loss: 0.046520547941327095
Average Validation Accuracy: 0.9808661937713623
Average Validation F1: 0.26019057631492615
Average Validation Loss: 0.10251652076840401
Best Model Validation Accuracy: 0.9810479879379272
Best Model Validation F1: 0.25927603244781494

[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step
897
897
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step


Training Models:   6%|▋         | 4/64 [11:22<2:50:41, 170.69s/it]



Average Training Accuracy: 0.9896284937858582
Average Training F1: 0.2962684631347656
Average Training Loss: 0.022766927257180214
Average Validation Accuracy: 0.9839925467967987
Average Validation F1: 0.2653697431087494
Average Validation Loss: 0.09229683503508568
Best Model Validation Accuracy: 0.9844408631324768
Best Model Validation F1: 0.2659398019313812






In [None]:
blocks = [1,2]
heads = [1,2,4,8]
ff_mult = [1,2,4, 8]
embeddings = [50, 100, 256, 512]


for num_blocks in blocks:
    for num_heads in heads:
        for ff in ff_mult:
            for emb in embeddings:
                model = create_transformer(embed_dim=emb, number_of_blocks=num_blocks, num_heads=num_heads, ff_mult = ff)
                train_config = { 'use_sample_weights':True}
                hyperparams = {'vocab_size': 500, 'embedding_dim': emb, 'epochs': 10, 'batch_size': 32}
                basic = NamedEntityRecognition(model, hyperparams, is_transformer=True, training_times=2, train_config=train_config, verbosing=2, name="Transformer",  results_file_name="./results/NER_bucle_trans")
                basic.train_model()
                basic.print_training_information()

Number of classes in training data: 119
Training model 1/2Epoch 1/10

128/128 - 7s - 58ms/step - accuracy: 0.3814 - f1: 0.0904 - loss: 4.1586 - val_accuracy: 0.8170 - val_f1: 0.1441 - val_loss: 2.1999
Epoch 2/10
128/128 - 2s - 12ms/step - accuracy: 0.8021 - f1: 0.1632 - loss: 2.5686 - val_accuracy: 0.8070 - val_f1: 0.1591 - val_loss: 1.5895
Epoch 3/10
128/128 - 2s - 13ms/step - accuracy: 0.7373 - f1: 0.1760 - loss: 1.7805 - val_accuracy: 0.7326 - val_f1: 0.1615 - val_loss: 1.3967
Epoch 4/10
128/128 - 2s - 13ms/step - accuracy: 0.7233 - f1: 0.1742 - loss: 1.3970 - val_accuracy: 0.7603 - val_f1: 0.1580 - val_loss: 1.2999
Epoch 5/10
128/128 - 2s - 12ms/step - accuracy: 0.7275 - f1: 0.1804 - loss: 1.1304 - val_accuracy: 0.7203 - val_f1: 0.1543 - val_loss: 1.3360
Epoch 6/10
128/128 - 2s - 12ms/step - accuracy: 0.7233 - f1: 0.1861 - loss: 0.9242 - val_accuracy: 0.7634 - val_f1: 0.1659 - val_loss: 1.1506
Epoch 7/10
128/128 - 2s - 12ms/step - accuracy: 0.7573 - f1: 0.1939 - loss: 0.7988 - val_

KeyboardInterrupt: 

In [2]:
emb = 256
num_blocks = 1
num_heads = 2
ff = 1
model = create_transformer(embed_dim=emb, number_of_blocks=num_blocks, num_heads=num_heads, ff_mult = ff)
train_config = { 'use_sample_weights':True}
hyperparams = {'vocab_size': 1000, 'embedding_dim': emb, 'epochs': 10, 'batch_size': 32}
basic = NamedEntityRecognition(model, hyperparams, is_transformer=True, training_times=1, train_config=train_config, verbosing=2, name="Transformer_256_1b_2h_1ff_lr",  results_file_name="./results/NER_Transformer")
basic.train_model()
basic.print_training_information()

Number of classes in training data: 119
Training model 1/1Epoch 1/10

128/128 - 127s - 993ms/step - accuracy: 0.6927 - f1: 0.0480 - loss: 4.6981 - val_accuracy: 0.8719 - val_f1: 0.0939 - val_loss: 0.8835
Epoch 2/10
128/128 - 16s - 124ms/step - accuracy: 0.8906 - f1: 0.1283 - loss: 3.2928 - val_accuracy: 0.9097 - val_f1: 0.1454 - val_loss: 0.6452
Epoch 3/10
128/128 - 9s - 73ms/step - accuracy: 0.9177 - f1: 0.1696 - loss: 2.3694 - val_accuracy: 0.9216 - val_f1: 0.1671 - val_loss: 0.5230
Epoch 4/10
128/128 - 13s - 103ms/step - accuracy: 0.9264 - f1: 0.1882 - loss: 1.7331 - val_accuracy: 0.9246 - val_f1: 0.1725 - val_loss: 0.4977
Epoch 5/10
128/128 - 27s - 210ms/step - accuracy: 0.9306 - f1: 0.1961 - loss: 1.3391 - val_accuracy: 0.9269 - val_f1: 0.1747 - val_loss: 0.4014
Epoch 6/10
128/128 - 20s - 154ms/step - accuracy: 0.9319 - f1: 0.2027 - loss: 1.0806 - val_accuracy: 0.9289 - val_f1: 0.1792 - val_loss: 0.3541
Epoch 7/10
128/128 - 18s - 140ms/step - accuracy: 0.9348 - f1: 0.2087 - loss: 