# Encoder-only Transformer Architektur

In [1]:
%load_ext autoreload
%autoreload

import sys
import torch
from torch.utils.data import DataLoader

sys.path.append('../scripts')
#sys.path.append('/content/drive/MyDrive/PMDS/Notebooks')
import ml_helper as mlh
import encoder as e

In [2]:
data_path = '../data'
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)

cuda:1


## Unit Tests for Model Architecture

In [3]:
e.load_train_valid_data("E.Coli")

Länge train_dataset: 3555
Länge valid_dataset: 420


In [4]:
EMBED_DIM = 64
NUM_ENCODER_LAYERS = 2
NUM_HEADS = 4
DROP_OUT = 0.5

In [5]:
model = e.EncoderClassifier(
    embed_dim=EMBED_DIM,
    num_layers=NUM_ENCODER_LAYERS,
    num_heads=NUM_HEADS,
    dropout=DROP_OUT,
    pos_enc=True
).to(device)
print(model)

EncoderClassifier(
  (emb): Embedding(22, 64, padding_idx=21)
  (pos_encoder): PositionalEncoding(
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
    )
    (linear1): Linear(in_features=64, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=64, bias=True)
    (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (lin

In [6]:
# Total parameters and trainable parameters.
def print_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    print(f"{total_params:,} total parameters.")
    total_trainable_params = sum(
        p.numel() for p in model.parameters() if p.requires_grad)
    print(f"{total_trainable_params:,} training parameters.")

In [7]:
print_parameters(model)

849,089 total parameters.
849,089 training parameters.


In [8]:
def test_forward_pass(model, data_loader):
  batch_data, batch_label = next(iter(data_loader))
  print(f"input dim: {batch_data.shape}")
  output, attn_weights = model(batch_data, attn_weights_needed=True)
  output = model(batch_data)
  print(f"output dim: {output.shape}")
  print(f"attn_weights dim: {attn_weights}")

In [9]:
test_forward_pass(model, e.train_loader)

input dim: torch.Size([32, 500])
output dim: torch.Size([32, 500, 65])
attn_weights dim: [tensor([[[[0.0000e+00, 1.3633e-03, 2.1592e-02,  ..., 1.9552e-03,
           1.7929e-03, 1.2477e-03],
          [8.4340e-04, 1.6851e-04, 1.4660e-04,  ..., 9.3138e-04,
           1.4485e-03, 5.2256e-04],
          [5.8149e-03, 7.5903e-03, 6.8564e-05,  ..., 3.0978e-03,
           2.7073e-03, 8.8844e-04],
          ...,
          [1.7655e-03, 7.5203e-03, 7.8223e-03,  ..., 1.9364e-03,
           3.0255e-03, 2.5741e-03],
          [2.0578e-03, 1.3046e-03, 2.3105e-03,  ..., 2.4812e-03,
           1.8086e-03, 1.7091e-03],
          [1.7676e-04, 9.6464e-04, 3.1486e-03,  ..., 1.0651e-03,
           2.7422e-03, 1.9212e-03]],

         [[0.0000e+00, 3.3813e-04, 1.3571e-03,  ..., 1.4421e-03,
           8.4447e-04, 6.2162e-04],
          [1.2478e-04, 2.3566e-03, 1.5089e-03,  ..., 1.7540e-03,
           3.3589e-03, 0.0000e+00],
          [6.6393e-03, 6.7954e-04, 2.2539e-03,  ..., 7.9258e-04,
           1.6922e-0

## Hyperparameter tuning

### E.Coli

In [7]:
organism = "E.Coli"
e.load_train_valid_data(organism)

Länge train_dataset: 3555
Länge valid_dataset: 420


In [8]:
# Train best model for longer (400 epochs)
EMBED_DIM = [64]
NUM_ENCODER_LAYERS = [2]
NUM_HEADS = [4]
dropouts = [0.5]
POS_ENC = [False]
accuracies, all_accuracies, best_model_state = e.hyper_parameter_training(EMBED_DIM, NUM_ENCODER_LAYERS, NUM_HEADS, dropouts, POS_ENC, epochs=400, validation_stop=False)

----- Start Training: 64 emb, 2 layers, 4 heads, 0.5 dropout, positional encoding: False, 400 epochs -----


  from .autonotebook import tqdm as notebook_tqdm


Epoch [1/400], Loss: 1.7964, Eval Accuracy: 0.5155, Took 1.56 s
Epoch [2/400], Loss: 1.2462, Eval Accuracy: 0.5146, Took 1.14 s
Epoch [3/400], Loss: 1.1489, Eval Accuracy: 0.5153, Took 1.15 s
Epoch [4/400], Loss: 1.1064, Eval Accuracy: 0.5166, Took 1.14 s
Epoch [5/400], Loss: 1.083, Eval Accuracy: 0.5186, Took 1.14 s
Epoch [6/400], Loss: 1.0689, Eval Accuracy: 0.5166, Took 1.16 s
Epoch [7/400], Loss: 1.0594, Eval Accuracy: 0.5176, Took 1.16 s
Epoch [8/400], Loss: 1.0536, Eval Accuracy: 0.5196, Took 1.16 s
Epoch [9/400], Loss: 1.0486, Eval Accuracy: 0.5204, Took 1.13 s
Epoch [10/400], Loss: 1.0456, Eval Accuracy: 0.522, Took 1.13 s
Epoch [11/400], Loss: 1.0435, Eval Accuracy: 0.5184, Took 1.14 s
Epoch [12/400], Loss: 1.0409, Eval Accuracy: 0.5198, Took 1.15 s
Epoch [13/400], Loss: 1.0407, Eval Accuracy: 0.5218, Took 1.14 s
Epoch [14/400], Loss: 1.0382, Eval Accuracy: 0.5218, Took 1.14 s
Epoch [15/400], Loss: 1.0381, Eval Accuracy: 0.5199, Took 1.14 s
Epoch [16/400], Loss: 1.0368, Eval A

In [9]:
training_results = {
    "time": 459,
    "best_model_state": best_model_state,
    "all_accuracies": all_accuracies['encoder_64em_2l_4h_05dr_400ep']
}

In [10]:
mlh.to_pickle(training_results, data_path+f'/{e.organism}/training_results_encoder.pkl')

#### Dropout

In [34]:
EMBED_DIM = [256]
NUM_ENCODER_LAYERS = [4]
NUM_HEADS = [4]
dropouts = [0.1, 0.2, 0.3, 0.4, 0.5]
POS_ENC = [False]
e.hyper_parameter_training(EMBED_DIM, NUM_ENCODER_LAYERS, NUM_HEADS, dropouts, POS_ENC)

----- Start Training: 256 emb, 4 layers, 4 heads, 0.1 dropout, positional encoding: False, 50 epochs -----
Epoch [1/50], Loss: 1.1639, Eval Accuracy: 0.4962, Took 3.68 s
Epoch [2/50], Loss: 1.0538, Eval Accuracy: 0.5166, Took 3.68 s
Epoch [3/50], Loss: 1.0461, Eval Accuracy: 0.5178, Took 3.69 s
Epoch [4/50], Loss: 1.0421, Eval Accuracy: 0.52, Took 3.68 s
Epoch [5/50], Loss: 1.0397, Eval Accuracy: 0.5076, Took 3.71 s
Epoch [6/50], Loss: 1.0378, Eval Accuracy: 0.5178, Took 3.71 s
Epoch [7/50], Loss: 1.0361, Eval Accuracy: 0.5162, Took 3.71 s
Epoch [8/50], Loss: 1.0355, Eval Accuracy: 0.5191, Took 3.71 s
Epoch [9/50], Loss: 1.0326, Eval Accuracy: 0.5233, Took 3.71 s
Epoch [10/50], Loss: 1.0331, Eval Accuracy: 0.5229, Took 3.71 s
Epoch [11/50], Loss: 1.0323, Eval Accuracy: 0.5174, Took 3.71 s
Epoch [12/50], Loss: 1.0308, Eval Accuracy: 0.5192, Took 3.72 s
Epoch [13/50], Loss: 1.031, Eval Accuracy: 0.5226, Took 3.72 s
Epoch [14/50], Loss: 1.0299, Eval Accuracy: 0.5203, Took 3.71 s
Epoch [15

{'encoder_256em_4l_4h_01dr_50ep': 0.5183,
 'encoder_256em_4l_4h_02dr_50ep': 0.5189,
 'encoder_256em_4l_4h_03dr_50ep': 0.5182,
 'encoder_256em_4l_4h_04dr_50ep': 0.5182,
 'encoder_256em_4l_4h_05dr_50ep': 0.5174}

#### Embedding Dimension

In [35]:
embed_dims = [16, 32, 64, 128, 256, 512]
NUM_ENCODER_LAYERS = [4]
NUM_HEADS = [4]
DROPOUTS = [0.5]
POS_ENC = [False]
e.hyper_parameter_training(embed_dims, NUM_ENCODER_LAYERS, NUM_HEADS, DROPOUTS, POS_ENC)

----- Start Training: 16 emb, 4 layers, 4 heads, 0.5 dropout, positional encoding: False, 50 epochs -----
Epoch [1/50], Loss: 3.0332, Eval Accuracy: 0.5129, Took 2.03 s
Epoch [2/50], Loss: 2.3796, Eval Accuracy: 0.5145, Took 2.02 s
Epoch [3/50], Loss: 1.9794, Eval Accuracy: 0.5145, Took 2.02 s
Epoch [4/50], Loss: 1.7237, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [5/50], Loss: 1.5641, Eval Accuracy: 0.5155, Took 2.02 s
Epoch [6/50], Loss: 1.4599, Eval Accuracy: 0.5145, Took 2.02 s
Epoch [7/50], Loss: 1.3854, Eval Accuracy: 0.5155, Took 2.02 s
Epoch [8/50], Loss: 1.3323, Eval Accuracy: 0.5155, Took 2.02 s
Epoch [9/50], Loss: 1.291, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [10/50], Loss: 1.2609, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [11/50], Loss: 1.2362, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [12/50], Loss: 1.2157, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [13/50], Loss: 1.2004, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [14/50], Loss: 1.1867, Eval Accuracy: 0.5155, Took 2.03 s
Epoch [1

{'encoder_16em_4l_4h_05dr_50ep': 0.5145,
 'encoder_32em_4l_4h_05dr_50ep': 0.5199,
 'encoder_64em_4l_4h_05dr_50ep': 0.5219,
 'encoder_128em_4l_4h_05dr_50ep': 0.5195,
 'encoder_256em_4l_4h_05dr_50ep': 0.5174,
 'encoder_512em_4l_4h_05dr_50ep': 0.506}

#### Number Encoder Layers and Heads

In [36]:
EMBED_DIM = [64]
num_encoder_layers = [1, 2, 4, 8]
num_heads = [1, 2, 4, 8]
DROPOUTS = [0.5]
POS_ENC = [False]
e.hyper_parameter_training(EMBED_DIM, num_encoder_layers, num_heads, DROPOUTS, POS_ENC)



----- Start Training: 64 emb, 1 layers, 1 heads, 0.5 dropout, positional encoding: False, 50 epochs -----
Epoch [1/50], Loss: 1.8965, Eval Accuracy: 0.5156, Took 0.51 s
Epoch [2/50], Loss: 1.2653, Eval Accuracy: 0.5155, Took 0.51 s
Epoch [3/50], Loss: 1.1597, Eval Accuracy: 0.5155, Took 0.51 s
Epoch [4/50], Loss: 1.1151, Eval Accuracy: 0.5155, Took 0.51 s
Epoch [5/50], Loss: 1.0909, Eval Accuracy: 0.5166, Took 0.51 s
Epoch [6/50], Loss: 1.0769, Eval Accuracy: 0.5145, Took 0.5 s
Epoch [7/50], Loss: 1.0671, Eval Accuracy: 0.5152, Took 0.51 s
Epoch [8/50], Loss: 1.0622, Eval Accuracy: 0.5154, Took 0.51 s
Stopped early after epoch 8 as validation accuracy was lower than average of the last 7 accuracies.
Last Loss: 1.0622, Last Eval Accuracy: 0.5154, Took 4.05 s
Model saved as 20240603143811_encoder_64em_1l_1h_05dr_8ep.pt
----- Start Training: 64 emb, 1 layers, 2 heads, 0.5 dropout, positional encoding: False, 50 epochs -----
Epoch [1/50], Loss: 1.8958, Eval Accuracy: 0.5162, Took 0.55 s
Ep

{'encoder_64em_1l_1h_05dr_50ep': 0.5154,
 'encoder_64em_1l_2h_05dr_50ep': 0.516,
 'encoder_64em_1l_4h_05dr_50ep': 0.5165,
 'encoder_64em_1l_8h_05dr_50ep': 0.5187,
 'encoder_64em_2l_1h_05dr_50ep': 0.5221,
 'encoder_64em_2l_2h_05dr_50ep': 0.5198,
 'encoder_64em_2l_4h_05dr_50ep': 0.5225,
 'encoder_64em_2l_8h_05dr_50ep': 0.5222,
 'encoder_64em_4l_1h_05dr_50ep': 0.5214,
 'encoder_64em_4l_2h_05dr_50ep': 0.5228,
 'encoder_64em_4l_4h_05dr_50ep': 0.5219,
 'encoder_64em_4l_8h_05dr_50ep': 0.5219,
 'encoder_64em_8l_1h_05dr_50ep': 0.5209,
 'encoder_64em_8l_2h_05dr_50ep': 0.5183,
 'encoder_64em_8l_4h_05dr_50ep': 0.5181,
 'encoder_64em_8l_8h_05dr_50ep': 0.5218}

#### Positional Encoding

In [39]:
EMBED_DIM = [64]
NUM_ENCODER_LAYERS = [2]
NUM_HEADS = [4]
DROPOUTS = [0.5]
pos_enc = [True, False]
e.hyper_parameter_training(EMBED_DIM, NUM_ENCODER_LAYERS, NUM_HEADS, DROPOUTS, pos_enc)

----- Start Training: 64 emb, 2 layers, 4 heads, 0.5 dropout, positional encoding: True, 50 epochs -----
Epoch [1/50], Loss: 2.0597, Eval Accuracy: 0.5154, Took 1.17 s
Epoch [2/50], Loss: 1.2627, Eval Accuracy: 0.5155, Took 1.17 s
Epoch [3/50], Loss: 1.1491, Eval Accuracy: 0.5137, Took 1.16 s
Epoch [4/50], Loss: 1.1068, Eval Accuracy: 0.5155, Took 1.16 s
Epoch [5/50], Loss: 1.0852, Eval Accuracy: 0.5145, Took 1.17 s
Epoch [6/50], Loss: 1.0717, Eval Accuracy: 0.5153, Took 1.17 s
Epoch [7/50], Loss: 1.0629, Eval Accuracy: 0.5159, Took 1.17 s
Epoch [8/50], Loss: 1.0585, Eval Accuracy: 0.5163, Took 1.17 s
Epoch [9/50], Loss: 1.0538, Eval Accuracy: 0.517, Took 1.14 s
Epoch [10/50], Loss: 1.0511, Eval Accuracy: 0.5168, Took 1.14 s
Epoch [11/50], Loss: 1.0485, Eval Accuracy: 0.5164, Took 1.14 s
Epoch [12/50], Loss: 1.0462, Eval Accuracy: 0.5164, Took 1.15 s
Epoch [13/50], Loss: 1.0459, Eval Accuracy: 0.5161, Took 1.15 s
Stopped early after epoch 13 as validation accuracy was lower than averag

{'encoder_64em_2l_4h_05dr_posenc_50ep': 0.5161,
 'encoder_64em_2l_4h_05dr_50ep': 0.5225}

### Drosophila.Melanogaster

In [29]:
organism = "Drosophila.Melanogaster"
e.load_train_valid_data(organism)

Länge train_dataset: 33040
Länge valid_dataset: 4073


In [44]:
embed_dims = [64, 128]
num_encoder_layers = [2, 4]
num_heads = [2, 4]
DROPOUTS = [0.2, 0.5]
POS_ENC = [False]
accuracies = e.hyper_parameter_training(embed_dims, num_encoder_layers, num_heads, DROPOUTS, POS_ENC, epochs=10, print_epochs=True)

----- Start Training: 64 emb, 2 layers, 2 heads, 0.2 dropout, positional encoding: False, 10 epochs -----
Epoch [1/10], Loss: 1.1824, Eval Accuracy: 0.4967, Took 9.54 s
Epoch [2/10], Loss: 1.081, Eval Accuracy: 0.497, Took 9.79 s
Epoch [3/10], Loss: 1.076, Eval Accuracy: 0.4977, Took 9.09 s
Epoch [4/10], Loss: 1.0746, Eval Accuracy: 0.498, Took 9.1 s
Epoch [5/10], Loss: 1.0737, Eval Accuracy: 0.4975, Took 9.1 s
Epoch [6/10], Loss: 1.073, Eval Accuracy: 0.4972, Took 9.11 s
Epoch [7/10], Loss: 1.0725, Eval Accuracy: 0.4987, Took 9.1 s
Epoch [8/10], Loss: 1.0719, Eval Accuracy: 0.4989, Took 9.1 s
Epoch [9/10], Loss: 1.0715, Eval Accuracy: 0.4982, Took 10.15 s
Epoch [10/10], Loss: 1.071, Eval Accuracy: 0.4991, Took 9.1 s
Last Loss: 1.071, Last Eval Accuracy: 0.4991, Took 93.17 s
Model saved as 20240603150541_encoder_64em_2l_2h_02dr_10ep.pt
----- Start Training: 64 emb, 2 layers, 2 heads, 0.5 dropout, positional encoding: False, 10 epochs -----
Epoch [1/10], Loss: 1.2169, Eval Accuracy: 0.4

In [45]:
# Train best suited models for longer (100 epochs)
embed_dims = [64]
num_encoder_layers = [4]
num_heads = [4]
DROPOUTS = [0.2]
POS_ENC = [False, True]
accuracies, _, _ = e.hyper_parameter_training(embed_dims, num_encoder_layers, num_heads, DROPOUTS, POS_ENC, epochs=100, print_epochs=True)

----- Start Training: 64 emb, 4 layers, 4 heads, 0.2 dropout, positional encoding: False, 100 epochs -----
Epoch [1/100], Loss: 1.1768, Eval Accuracy: 0.4964, Took 20.78 s
Epoch [2/100], Loss: 1.0809, Eval Accuracy: 0.4962, Took 20.48 s
Epoch [3/100], Loss: 1.0759, Eval Accuracy: 0.4978, Took 20.5 s
Epoch [4/100], Loss: 1.0744, Eval Accuracy: 0.498, Took 20.51 s
Epoch [5/100], Loss: 1.0738, Eval Accuracy: 0.4971, Took 20.5 s
Epoch [6/100], Loss: 1.0727, Eval Accuracy: 0.4968, Took 20.45 s
Epoch [7/100], Loss: 1.0719, Eval Accuracy: 0.4984, Took 20.78 s
Epoch [8/100], Loss: 1.0712, Eval Accuracy: 0.4991, Took 20.79 s
Epoch [9/100], Loss: 1.0712, Eval Accuracy: 0.4979, Took 20.52 s
Epoch [10/100], Loss: 1.0706, Eval Accuracy: 0.4993, Took 20.53 s
Epoch [11/100], Loss: 1.0698, Eval Accuracy: 0.4988, Took 20.77 s
Epoch [12/100], Loss: 1.0691, Eval Accuracy: 0.4987, Took 20.72 s
Epoch [13/100], Loss: 1.0687, Eval Accuracy: 0.4993, Took 20.61 s
Epoch [14/100], Loss: 1.0678, Eval Accuracy: 0.

In [42]:
# Train best model for even longer (400 epochs)
embed_dims = [64]
num_encoder_layers = [4]
num_heads = [4]
DROPOUTS = [0.2]
POS_ENC = [True]
accuracies, all_accuracies, best_model_state = e.hyper_parameter_training(embed_dims, num_encoder_layers, num_heads, DROPOUTS, POS_ENC, epochs=400, validation_stop=False, start_epoch=0, current_best_model_state=None, existing_model=None)

----- Start Training: 64 emb, 4 layers, 4 heads, 0.2 dropout, positional encoding: True, 400 epochs -----
Epoch [301/400], Loss: 0.9997, Eval Accuracy: 0.5293, Took 20.6 s
Epoch [302/400], Loss: 0.9994, Eval Accuracy: 0.5291, Took 20.61 s
Epoch [303/400], Loss: 0.9994, Eval Accuracy: 0.5288, Took 20.69 s
Epoch [304/400], Loss: 0.9993, Eval Accuracy: 0.5279, Took 20.45 s
Epoch [305/400], Loss: 0.9994, Eval Accuracy: 0.5287, Took 20.45 s
Epoch [306/400], Loss: 0.9991, Eval Accuracy: 0.5288, Took 20.47 s
Epoch [307/400], Loss: 0.9992, Eval Accuracy: 0.53, Took 20.48 s
Epoch [308/400], Loss: 0.9991, Eval Accuracy: 0.528, Took 20.57 s
Epoch [309/400], Loss: 0.9989, Eval Accuracy: 0.5275, Took 20.46 s
Epoch [310/400], Loss: 0.9989, Eval Accuracy: 0.5279, Took 20.48 s
Epoch [311/400], Loss: 0.9991, Eval Accuracy: 0.5296, Took 20.46 s
Epoch [312/400], Loss: 0.9987, Eval Accuracy: 0.5298, Took 20.49 s
Epoch [313/400], Loss: 0.9987, Eval Accuracy: 0.5298, Took 20.47 s
Epoch [314/400], Loss: 0.99

time: 4146 + 2060 + 2080

In [43]:
training_results = {
    "time": 4146 + 2060 + 2080,
    "best_model_state": best_model_state,
    "all_accuracies": all_accuracies['encoder_64em_4l_4h_posenc_02dr_400ep']
}

In [45]:
mlh.to_pickle(training_results, data_path+f'/{e.organism}/training_results_encoder.pkl')

### Homo.Sapiens

In [3]:
%%time

organism = "Homo.Sapiens"
e.load_train_valid_data(organism)

Länge train_dataset: 140711
Länge valid_dataset: 17784
CPU times: user 2min 10s, sys: 551 ms, total: 2min 10s
Wall time: 2min 10s


In [8]:
embed_dims = [64]
num_encoder_layers = [4]
num_heads = [4]
DROPOUTS = [0.2]
POS_ENC = [True, False]
e.hyper_parameter_training(embed_dims, num_encoder_layers, num_heads, DROPOUTS, POS_ENC, epochs=100)

----- Start Training: 64 emb, 4 layers, 4 heads, 0.2 dropout, positional encoding: True, 100 epochs -----


  from .autonotebook import tqdm as notebook_tqdm


Epoch [1/100], Loss: 1.1145, Eval Accuracy: 0.4759, Took 88.67 s
Epoch [2/100], Loss: 1.0813, Eval Accuracy: 0.4778, Took 87.75 s
Epoch [3/100], Loss: 1.0784, Eval Accuracy: 0.4809, Took 88.37 s
Epoch [4/100], Loss: 1.0759, Eval Accuracy: 0.4834, Took 87.4 s
Epoch [5/100], Loss: 1.0738, Eval Accuracy: 0.4837, Took 87.3 s
Epoch [6/100], Loss: 1.0719, Eval Accuracy: 0.4875, Took 87.73 s
Epoch [7/100], Loss: 1.0701, Eval Accuracy: 0.4889, Took 87.27 s
Epoch [8/100], Loss: 1.0681, Eval Accuracy: 0.4886, Took 87.55 s
Epoch [9/100], Loss: 1.0661, Eval Accuracy: 0.4922, Took 87.25 s
Epoch [10/100], Loss: 1.0642, Eval Accuracy: 0.4936, Took 87.25 s
Epoch [11/100], Loss: 1.0622, Eval Accuracy: 0.4928, Took 87.27 s
Epoch [12/100], Loss: 1.0605, Eval Accuracy: 0.496, Took 87.29 s
Epoch [13/100], Loss: 1.0588, Eval Accuracy: 0.4975, Took 87.23 s
Epoch [14/100], Loss: 1.057, Eval Accuracy: 0.4985, Took 87.34 s
Epoch [15/100], Loss: 1.0554, Eval Accuracy: 0.5002, Took 87.46 s
Epoch [16/100], Loss: 1

{'encoder_64em_4l_4h_posenc_02dr_100ep': 0.5213}

In [7]:
# Train best model for longer (400 epochs)
embed_dims = [64]
num_encoder_layers = [4]
num_heads = [4]
DROPOUTS = [0.2]
POS_ENC = [True]
accuracies, all_accuracies, best_model_state = e.hyper_parameter_training(embed_dims, num_encoder_layers, num_heads, DROPOUTS, POS_ENC, epochs=400, validation_stop=False, start_epoch=300, current_best_model_state=training_results_1["best_model_state"], existing_model=model)

----- Start Training: 64 emb, 4 layers, 4 heads, 0.2 dropout, positional encoding: True, 400 epochs -----


  from .autonotebook import tqdm as notebook_tqdm


Epoch [301/400], Loss: 1.0051, Eval Accuracy: 0.5313, Took 87.27 s
Epoch [302/400], Loss: 1.0049, Eval Accuracy: 0.5315, Took 87.05 s
Epoch [303/400], Loss: 1.005, Eval Accuracy: 0.5316, Took 87.22 s
Epoch [304/400], Loss: 1.005, Eval Accuracy: 0.5314, Took 87.19 s
Epoch [305/400], Loss: 1.0048, Eval Accuracy: 0.5316, Took 87.35 s
Epoch [306/400], Loss: 1.0049, Eval Accuracy: 0.5319, Took 87.17 s
Epoch [307/400], Loss: 1.0048, Eval Accuracy: 0.5309, Took 87.11 s
Epoch [308/400], Loss: 1.0049, Eval Accuracy: 0.5321, Took 87.17 s
Epoch [309/400], Loss: 1.0047, Eval Accuracy: 0.5321, Took 87.18 s
Epoch [310/400], Loss: 1.0047, Eval Accuracy: 0.5316, Took 87.16 s
Epoch [311/400], Loss: 1.0046, Eval Accuracy: 0.5314, Took 87.33 s
Epoch [312/400], Loss: 1.0048, Eval Accuracy: 0.532, Took 87.19 s
Epoch [313/400], Loss: 1.0046, Eval Accuracy: 0.5315, Took 87.22 s
Epoch [314/400], Loss: 1.0046, Eval Accuracy: 0.5315, Took 87.22 s
Epoch [315/400], Loss: 1.0045, Eval Accuracy: 0.5315, Took 87.15 

time: 4435 + 13280 + 5089 + 4376 + 8752

In [None]:
training_results = {
    "time": 4435 + 13280 + 5089 + 4376 + 8752,
    "best_model_state": best_model_state,
    "all_accuracies": all_accuracies['encoder_64em_4l_4h_posenc_02dr_400ep']
}

In [6]:
mlh.to_pickle(training_results, data_path+f'/Homo.Sapiens/training_results_encoder.pkl')