In [1]:
import pandas as pd
from tensorflow import keras
from os import path
import pickle


In [2]:
from proteinbert.finetuning import encode_train_and_valid_sets, encode_dataset
from proteinbert import OutputType, OutputSpec, evaluate_by_len, load_pretrained_model

In [3]:
from proteinbert import OutputType, OutputSpec, FinetuningModelGenerator, load_pretrained_model, \
finetune, evaluate_by_len

from proteinbert.conv_and_global_attention_model import get_model_with_hidden_layers_as_outputs

from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE

In [4]:
import wandb
from wandb.keras import WandbCallback

In [5]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [6]:
DATA_DIR = "../../data/"

In [7]:
OUTPUT_TYPE = OutputType(False, 'binary')
UNIQUE_LABELS = [0, 1]
OUTPUT_SPEC = OutputSpec(OUTPUT_TYPE, UNIQUE_LABELS)

In [8]:
pretrained_model_generator, input_encoder = load_pretrained_model("../../data/protein_bert/", "epoch_92400_sample_23500000.pkl")

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
train_set = pd.read_csv(path.join(DATA_DIR, "chen/deduplicated/chen_train.csv"), index_col=0)
test_set = pd.read_csv(path.join(DATA_DIR, "chen/deduplicated/chen_test.csv"), index_col=0)
train_set["seq"] = train_set["heavy"] + train_set["light"]
test_set["seq"] = test_set["heavy"] + test_set["light"]

In [11]:
len(train_set)

1291

In [12]:
len(test_set)

260

In [13]:
learning_rate = 1e-4
patience = (6, 4)

In [14]:
sizes = [0.5, 0.6, 0.7, 0.8, 0.9]

In [16]:
def train_and_save_model(train_data, valid_data, test_data, size):
    wandb.init(project=f"Dataset size exp", entity="kvetab")
    model_generator = FinetuningModelGenerator(pretrained_model_generator, OUTPUT_SPEC, pretraining_model_manipulation_function = \
            get_model_with_hidden_layers_as_outputs, dropout_rate = 0.5)

    training_callbacks = [
        keras.callbacks.ReduceLROnPlateau(patience = patience[1], factor = 0.25, min_lr = 1e-07, verbose = 1),
        keras.callbacks.EarlyStopping(patience = patience[0], restore_best_weights = True),
        WandbCallback()
    ]

    epoch_num = 100
    batch_size = 128
    #learning_rate = 1e-5
    wandb.config = {
          "learning_rate": learning_rate,
          "epochs": epoch_num * 2,
          "batch_size": batch_size
        }
    finetune(model_generator, input_encoder, OUTPUT_SPEC, train_data["seq"], train_data["Y"], valid_data['seq'], valid_data["Y"], \
            seq_len = 512, batch_size = batch_size, max_epochs_per_stage = epoch_num, lr = learning_rate, begin_with_frozen_pretrained_layers = True, \
            lr_with_frozen_pretrained_layers = 1e-02, n_final_epochs = 1, final_seq_len = 1024, final_lr = learning_rate / 10, callbacks = training_callbacks)
    mod = model_generator.create_model(seq_len = 512)
    mod_name = f"2022_04_22_size{size}"
    mod.save(path.join(DATA_DIR, f"protein_bert/by_data_size/{mod_name}"))

    results, confusion_matrix = evaluate_by_len(model_generator, input_encoder, OUTPUT_SPEC, test_data['seq'], test_data['Y'], \
            start_seq_len = 512, start_batch_size = 32)
    fn_fp = confusion_matrix.loc["0"][1] + confusion_matrix.loc["1"][0]
    f1 = confusion_matrix.loc["1"][1] / (confusion_matrix.loc["1"][1] + 0.5 * fn_fp)
    return confusion_matrix, f1

In [15]:
cms = {}
f1s = {}

In [17]:
for size in sizes:
    train, valid = train_test_split(train_set, test_size=1-size, random_state=42, stratify=train_set["Y"])
    #test = pd.concat([test, test_set])
    test = test_set
    #valid, test = train_test_split(test, test_size=0.5, random_state=333, stratify=test["Y"])
    print(len(train), len(valid), len(test))
    cm, f1_score = train_and_save_model(train, valid, test, size)
    cms[size] = cm
    f1s[size] = f1_score

645 646 260


[34m[1mwandb[0m: Currently logged in as: [33mkvetab[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-16:50:26] Training set: Filtered out 0 of 645 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:50:27] Validation set: Filtered out 0 of 646 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:50:27] Training with frozen pretrained layers...


2022-04-24 16:50:27.159831: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-24 16:50:27.728399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9656 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:41:00.0, compute capability: 7.5
  "The `lr` argument is deprecated, use `learning_rate` instead.")
2022-04-24 16:50:29.565892: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/100


2022-04-24 16:50:37.412848: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 7605




[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100

Epoch 00046: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 47/100
Epoch 48/100
[2022_04_24-16:51:32] Training the entire fine-tuned model...
[2022_04_24-16:51:40] Incompatible number of optimizer weights - will not initialize them.


2022-04-24 16:53:02.290053: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.5/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.5/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


774 517 260


0,1
epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇███▁▁▂▂▂▂▃▃▃▃▄▄▁
loss,█▇▄▃▄▅▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂
lr,██████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▃▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_epoch,15.0
best_val_loss,0.39776
epoch,0.0
loss,0.34265
lr,1e-05
val_loss,0.40121


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-16:53:31] Training set: Filtered out 0 of 774 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:53:31] Validation set: Filtered out 0 of 517 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:53:31] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100

Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100

Epoch 00047: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100

Epoch 00057: ReduceLROnPlateau redu



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

Epoch 00012: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100

Epoch 00017: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100

Epoch 00034: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 35/100
Epoch 36/100
Epoch 37/100
[2022_04_24-16:56:34] Training on final epochs of sequence length 1024...
[2022_04_24-16:56:34] Training set: Filtered out 0 of 774 (0.0%) records of lengths exceeding 1022.
[2022_04_24-16:56:34] Validation set: Filtered out 0 of 517 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.6/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.6/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


903 388 260


0,1
epoch,▁▁▁▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▇▇▇▇██▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▁
loss,█▅▃▃▃▂▃▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁
lr,████████▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▂▁▂▁▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_epoch,30.0
best_val_loss,0.37393
epoch,0.0
loss,0.3256
lr,1e-05
val_loss,0.37983


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-16:57:29] Training set: Filtered out 0 of 903 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:57:29] Validation set: Filtered out 0 of 388 (0.0%) records of lengths exceeding 510.
[2022_04_24-16:57:29] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 20/100
Epoch 21/100
[2022_04_24-16:58:04] Training the entire fine-tuned model...
[2022_04_24-16:58:12] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100

Epoch 00021: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 22/100
Epoch 23/100
[2022_04_24-16:59:11] Training on final epochs of sequence length 1024...
[2022_04_24-16:59:11] Training set: Filtered out 0 of 903 (0.0%) records of lengths exceeding 1022.
[2022_04_24-16:59:11] Validation set: Filtered out 0 of 388 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.7/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.7/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


1032 259 260


0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇█▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██▁
loss,█▆▄▃▃▃▃▃▂▃▃▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁
lr,█████████████████▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,██▃▃▃▃▂▂▂▂▂▄▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁

0,1
best_epoch,16.0
best_val_loss,0.38567
epoch,0.0
loss,0.30687
lr,1e-05
val_loss,0.38759


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-17:00:07] Training set: Filtered out 0 of 1032 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:00:07] Validation set: Filtered out 0 of 259 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:00:07] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100

Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 31/100
Epoch 32/100
[2022_04_24-17:00:54] Training the entire fine-tuned model...
[2022_04_24-17:01:02] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

Epoch 00012: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

Epoch 00026: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100

Epoch 00032: ReduceLROnPlateau reducing learning rate to 1.56249996052793e-06.
Epoch 33/100
Epoch 34/100
[2022_04_24-17:02:31] Training on final epochs of sequence length 1024...
[2022_04_24-17:02:31] Training set: Filtered out 0 of 1032 (0.0%) records of lengths exceeding 1022.
[2022_04_24-17:02:31] Validation set: Filtered out 0 of 259 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.8/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.8/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


1161 130 260


0,1
epoch,▁▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇█▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▆▇▇██▁
loss,█▆▄▃▄▄▃▃▄▄▄▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▂
lr,███████████████▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▄▃▂▂▂▂▇▂▂▂▂▃▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_epoch,27.0
best_val_loss,0.36927
epoch,0.0
loss,0.30799
lr,1e-05
val_loss,0.38269


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-17:03:27] Training set: Filtered out 0 of 1161 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:03:27] Validation set: Filtered out 0 of 130 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:03:27] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

Epoch 00012: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100

Epoch 00040: ReduceLROnPlateau reducing learning rate to 3.9062499126885086e-05.
Epoch 41/100
[2022_04_24-17:04:23] Training the entire fine-tuned model...
[2022_04_24-17:04:32] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/1



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100

Epoch 00023: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 24/100
Epoch 25/100
[2022_04_24-17:05:44] Training on final epochs of sequence length 1024...
[2022_04_24-17:05:44] Training set: Filtered out 0 of 1161 (0.0%) records of lengths exceeding 1022.
[2022_04_24-17:06:01] Validation set: Filtered out 0 of 130 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.9/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size0.9/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [18]:
size = 1
train = train_set
valid, test = train_test_split(test_set, test_size=0.5, random_state=333, stratify=test_set["Y"])
print(len(train_set), len(valid), len(test))
cm, f1_score = train_and_save_model(train, valid, test, size)
cms[size] = cm
f1s[size] = f1_score

1291 130 130


0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██▁▂▂▂▂▃▃▃▃▄▄▄▅▅▁
loss,█▅▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▂
lr,████████▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▃▄▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▁▂▁▁▁▂▂▂▁

0,1
best_epoch,18.0
best_val_loss,0.32869
epoch,0.0
loss,0.2923
lr,1e-05
val_loss,0.34477


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_24-17:06:56] Training set: Filtered out 0 of 1291 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:06:56] Validation set: Filtered out 0 of 130 (0.0%) records of lengths exceeding 510.
[2022_04_24-17:06:56] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100

Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 36/100
Epoch 37/100
[2022_04_24-17:07:52] Training the entire fine-tuned model...
[2022_04_24-17:08:35] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 10/100
Epoch 11/100
[2022_04_24-17:09:13] Training on final epochs of sequence length 1024...
[2022_04_24-17:09:13] Training set: Filtered out 0 of 1291 (0.0%) records of lengths exceeding 1022.
[2022_04_24-17:09:14] Validation set: Filtered out 0 of 130 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size1/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/by_data_size/2022_04_22_size1/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [19]:
f1s

{0.5: 0.37894736842105264,
 0.6: 0.4864864864864865,
 0.7: 0.43010752688172044,
 0.8: 0.4807692307692308,
 0.9: 0.5094339622641509,
 1: 0.5531914893617021}

In [34]:
train_set

Unnamed: 0,Antibody_ID,heavy,light,Y,cluster,seq
535,2g60,EVQLQQSGGELAKPGASVKMSCKSSGYTFTAYAIHWAKQAAGAGLE...,DVLMTQAPLTLPVSLGDQASISCRSSQAIVHANGNTYLEWYLQKPG...,0,911,EVQLQQSGGELAKPGASVKMSCKSSGYTFTAYAIHWAKQAAGAGLE...
455,2a1w,DVKLVESGGGLVKPGGSLRLSCAASGFTFRNYGMSWVRQTPEKRLE...,DVLMTQSPLSLPVSLGDQASISCRCSQSIVKSNGHTYLEWYLQKPG...,0,723,DVKLVESGGGLVKPGGSLRLSCAASGFTFRNYGMSWVRQTPEKRLE...
459,2a77,DVKLVESGGGLVKPGGSLRLSCAASGFTFRNYGMSWVRQTPEKRLE...,DVLMTQSPLSLPVSLGDQASISCRCSQSIVKSNGHTYLEWYLQKPG...,0,723,DVKLVESGGGLVKPGGSLRLSCAASGFTFRNYGMSWVRQTPEKRLE...
1120,4ffy,QVQLLQPGAELVKPGASMKLSCKASGYTFTNWWMHWVRLRPGRGLE...,NIVLTQSPASLAVSLGQRATISCRASESVDHYGNSFIYWYQQKPGQ...,0,478,QVQLLQPGAELVKPGASMKLSCKASGYTFTNWWMHWVRLRPGRGLE...
851,3l5x,EVTLKESGPVLVKPTETLTLTCTVSGFSLSTYGMGVGWIRQPPGKA...,EIVLTQSPATLSLSPGERATLSCRASKSISKYLAWYQQKPGQAPRL...,0,433,EVTLKESGPVLVKPTETLTLTCTVSGFSLSTYGMGVGWIRQPPGKA...
...,...,...,...,...,...,...
1664,5f9w,QVQLVQSGAEVKKPGASVTVSCQASGYTFTNYYVHWVRQAPGQGLQ...,EIVLTQSPATLSVSPGERATLSCRASQSVRSNLAWYQQRPGQAPRL...,0,271,QVQLVQSGAEVKKPGASVTVSCQASGYTFTNYYVHWVRQAPGQGLQ...
2017,5x5x,QVKLQQSGAEFVKAGASVKLSCKTSGYTFNNYWIHWVKQSPGQGLE...,DIELTQSPLSLPVSLGDQASISCTSSQSLLHSNGDTYLHWYLQKPG...,0,861,QVKLQQSGAEFVKAGASVKLSCKTSGYTFNNYWIHWVKQSPGQGLE...
1400,4qww,EVQLVESGGGLVQPKGSLKLSCAASGFTFNTYAMHWVRQAPGKGLE...,QIVLTQSPAIMSASPGEKVTMTCSASSSVSYMYWYHQKPGSSPKPW...,0,436,EVQLVESGGGLVQPKGSLKLSCAASGFTFNTYAMHWVRQAPGKGLE...
59,1cgs,RVQLLESGAELMKPGASVQISCKATGYTFSEYWIEWVKERPGHGLE...,ELVMTQSPLSLPVSLGDQASISCRPSQSLVHSNGNTYLHWYLQKPG...,0,103,RVQLLESGAELMKPGASVQISCKATGYTFSEYWIEWVKERPGHGLE...


# CV on all data

In [26]:
chen_data = pd.read_csv(path.join(DATA_DIR, "chen/deduplicated/chen_data_w_clusters.csv"), index_col=0)
chen_data.head()

Unnamed: 0,Antibody_ID,heavy,light,Y,cluster
0,12e8,EVQLQQSGAEVVRSGASVKLSCTASGFNIKDYYIHWVKQRPEKGLE...,DIVMTQSQKFMSTSVGDRVSITCKASQNVGTAVAWYQQKPGQSPKL...,0,677
1,15c8,EVQLQQSGAELVKPGASVKLSCTASGFNIKDTYMHWVKQKPEQGLE...,DIVLTQSPAIMSASLGERVTMTCTASSSVSSSNLHWYQQKPGSSPK...,0,685
2,1a0q,EVQLQESDAELVKPGASVKISCKASGYTFTDHVIHWVKQKPEQGLE...,DIELTQSPSSLSASLGGKVTITCKASQDIKKYIGWYQHKPGKQPRL...,1,102
3,1a14,QVQLQQSGAELVKPGASVRMSCKASGYTFTNYNMYWVKQSPGQGLE...,DIELTQTTSSLSASLGDRVTISCRASQDISNYLNWYQQNPDGTVKL...,0,442
4,1a2y,QVQLQESGPGLVAPSQSLSITCTVSGFSLTGYGVNWVRQPPGKGLE...,DIVLTQSPASLSASVGETVTITCRASGNIHNYLAWYQQKQGKSPQL...,0,59


In [43]:
chen_data["cluster"].value_counts()

18     59
24     35
28     28
8      25
7      21
       ..
588     1
562     1
786     1
722     1
329     1
Name: cluster, Length: 932, dtype: int64

In [44]:
def split_into_k_sets(k, data):
    total = len(data)
    size = total // k + 1
    clusters_by_size = data["cluster"].value_counts().index
    cluster_sizes = data["cluster"].value_counts()
    groups = { i: [] for i in range(k) }
    group = 0
    for clust in clusters_by_size:
        start_group = group
        if len(groups[group]) + cluster_sizes[clust] > size:
            group += 1
            group = group % k
        while len(groups[group]) + cluster_sizes[clust] > size and group != start_group:
            group += 1
            group = group % k
        if len(groups[group]) < size:
            groups[group] += list(data[data["cluster"] == clust].index)
        else:
            print("error")
    return groups

In [45]:
g = split_into_k_sets(10, chen_data)
for key, gr in g.items():
    print(len(gr))

152
154
155
154
156
156
156
156
156
156


In [65]:
def merge_clusters(df_in):
    df = df_in.copy()
    df["cluster_merged"] = df["cluster"]
    df["cluster_merged"][df["cluster"] < 300] = df["cluster"][df["cluster"] < 300] // 30
    df["cluster_merged"][df["cluster"] >= 300] = df["cluster"][df["cluster"] >= 300] // 100
    print(f'Unique clusters after merge: {df["cluster_merged"].nunique()}')
    return df

In [76]:
merged = merge_clusters(chen_data[~chen_data["cluster"].isin([18, 24, 28])])

Unique clusters after merge: 10


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [77]:
merged["cluster_merged"].value_counts()

3    214
0    198
4    164
7    161
6    159
5    158
8    135
1    104
2     71
9     65
Name: cluster_merged, dtype: int64

In [78]:
rest = chen_data[chen_data["cluster"].isin([18, 24, 28])].copy()
rest

Unnamed: 0,Antibody_ID,heavy,light,Y,cluster
82,1dqj,EVQLQESGPSLVKPSQTLSLTCSVTGDSVTSDYWSWIRKFPGNKLE...,DIVLTQSPATLSVTPGDSVSLSCRASQSISNNLHWYQQKSHESPRL...,1,28
123,1fvd,EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKL...,1,24
124,1fve,EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKL...,1,24
139,1gpo,EVKLQESGPSLVKPSQTLSLTCSVTGDSITSDFWSWIRQFPGNRLE...,DIELTQSPATLSVTPGNSVSISCRASQSLVNEDGNTYLFWYQQKSH...,1,28
163,1ic4,DVQLQESGPSLVKPSQTLSLTCSVTGDSITSAYWSWIRKFPGNRLE...,DIVLTQSPATLSVTPGNSVSLSCRASQSIGNNLHWYQQKSHESPRL...,0,28
...,...,...,...,...,...
2347,6my5,EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTWIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDIPRRISGYVAWYQQKPGK...,0,18
2368,6o39,EVQLVESGGGLVQPGGSLRLSCAASGFNIHSSSIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSSAVAWYQQKPGKAPKL...,0,24
2369,6o3a,EVQLVESGGGLVQPGGSLRLSCAASGFNFSSSSIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSSAVAWYQQKPGKAPKL...,0,24
2370,6o3b,EVQLVESGGGLVQPGGSLRLSCAASGFNIYYYSMHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSSAVAWYQQKPGKAPKL...,0,24


In [69]:
assignments = {
    18: 9,
    24: 2,
    28: 1
}

def assign_rest(cluster):
    return assignments[cluster]

In [79]:
rest["cluster_merged"] = rest["cluster"].apply(assign_rest)
rest.head()

Unnamed: 0,Antibody_ID,heavy,light,Y,cluster,cluster_merged
82,1dqj,EVQLQESGPSLVKPSQTLSLTCSVTGDSVTSDYWSWIRKFPGNKLE...,DIVLTQSPATLSVTPGDSVSLSCRASQSISNNLHWYQQKSHESPRL...,1,28,1
123,1fvd,EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKL...,1,24,2
124,1fve,EVQLVESGGGLVQPGGSLRLSCAASGFNIKDTYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDVNTAVAWYQQKPGKAPKL...,1,24,2
139,1gpo,EVKLQESGPSLVKPSQTLSLTCSVTGDSITSDFWSWIRQFPGNRLE...,DIELTQSPATLSVTPGNSVSISCRASQSLVNEDGNTYLFWYQQKSH...,1,28,1
163,1ic4,DVQLQESGPSLVKPSQTLSLTCSVTGDSITSAYWSWIRKFPGNRLE...,DIVLTQSPATLSVTPGNSVSLSCRASQSIGNNLHWYQQKSHESPRL...,0,28,1


In [80]:
merged = pd.concat([merged, rest])
merged["cluster_merged"].value_counts()

3    214
0    198
4    164
7    161
6    159
5    158
8    135
1    132
9    124
2    106
Name: cluster_merged, dtype: int64

In [82]:
merged["seq"] = merged["heavy"] + merged["light"]

In [20]:
def train_and_save_named_model(train_data, valid_data, test_data, name, project_name):
    wandb.init(project=project_name, entity="kvetab")
    model_generator = FinetuningModelGenerator(pretrained_model_generator, OUTPUT_SPEC, pretraining_model_manipulation_function = \
            get_model_with_hidden_layers_as_outputs, dropout_rate = 0.5)

    training_callbacks = [
        keras.callbacks.ReduceLROnPlateau(patience = patience[1], factor = 0.25, min_lr = 1e-07, verbose = 1),
        keras.callbacks.EarlyStopping(patience = patience[0], restore_best_weights = True),
        WandbCallback()
    ]

    epoch_num = 100
    batch_size = 128
    #learning_rate = 1e-5
    wandb.config = {
          "learning_rate": learning_rate,
          "epochs": epoch_num * 2,
          "batch_size": batch_size
        }
    finetune(model_generator, input_encoder, OUTPUT_SPEC, train_data["seq"], train_data["Y"], valid_data['seq'], valid_data["Y"], \
            seq_len = 512, batch_size = batch_size, max_epochs_per_stage = epoch_num, lr = learning_rate, begin_with_frozen_pretrained_layers = True, \
            lr_with_frozen_pretrained_layers = 1e-02, n_final_epochs = 1, final_seq_len = 1024, final_lr = learning_rate / 10, callbacks = training_callbacks)
    mod = model_generator.create_model(seq_len = 512)
    mod.save(path.join(DATA_DIR, f"protein_bert/{name}"))

    results, confusion_matrix = evaluate_by_len(model_generator, input_encoder, OUTPUT_SPEC, test_data['seq'], test_data['Y'], \
            start_seq_len = 512, start_batch_size = 32)
    fn_fp = confusion_matrix.loc["0"][1] + confusion_matrix.loc["1"][0]
    f1 = confusion_matrix.loc["1"][1] / (confusion_matrix.loc["1"][1] + 0.5 * fn_fp)
    return confusion_matrix, f1

In [21]:
cms = {}
f1s = {}

In [87]:
for i in range(10):
    train = merged[merged["cluster_merged"] != i]
    test = merged[merged["cluster_merged"] == i]
    train, valid = train_test_split(train, test_size=0.2, random_state=333)
    cm, f1 = train_and_save_named_model(train, valid, test, f"10-fold-cv/2022_04_22_split_{i}", "10_fold_cv")
    cms[i] = cm
    f1s[i] = f1

[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-15:58:00] Training set: Filtered out 0 of 1082 (0.0%) records of lengths exceeding 510.
[2022_04_22-15:58:00] Validation set: Filtered out 0 of 271 (0.0%) records of lengths exceeding 510.
[2022_04_22-15:58:00] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100

Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 29/100
Epoch 30/100
[2022_04_22-15:58:46] Training the entire fine-tuned model...
[2022_04_22-15:58:54] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 14/100
Epoch 15/100
[2022_04_22-15:59:39] Training on final epochs of sequence length 1024...
[2022_04_22-15:59:39] Training set: Filtered out 0 of 1082 (0.0%) records of lengths exceeding 1022.
[2022_04_22-15:59:41] Validation set: Filtered out 0 of 271 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_0/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_0/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███▁▁▂▂▂▂▃▃▃▄▄▄▄
loss,█▆▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁
lr,█████████████████████████▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▇▃▃▂▃▂▂▂▂▂▂▂▃▂▃▂▁▁▁▁▂▁▁▁▂▁▂▂▂▁▁▁▁▁▁▂▁▁▁

0,1
best_epoch,8.0
best_val_loss,0.38365
epoch,0.0
loss,0.32567
lr,1e-05
val_loss,0.39026


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:00:38] Training set: Filtered out 0 of 1135 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:00:38] Validation set: Filtered out 0 of 284 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:00:38] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 37/100
Epoch 38/100
[2022_04_22-16:01:34] Training the entire fine-tuned model...
[2022_04_22-16:02:17] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

Epoch 00007: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 14/100
Epoch 15/100
[2022_04_22-16:03:03] Training on final epochs of sequence length 1024...
[2022_04_22-16:03:03] Training set: Filtered out 0 of 1135 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:03:04] Validation set: Filtered out 0 of 284 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_1/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_1/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██▁▁▁▂▂▂▃▃▃▃▄▁
loss,█▅▄▃▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁
lr,██████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▃▂▂▂▂▂▂▃▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁

0,1
best_epoch,0.0
best_val_loss,0.40079
epoch,0.0
loss,0.32257
lr,1e-05
val_loss,0.40079


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:04:02] Training set: Filtered out 0 of 1156 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:04:02] Validation set: Filtered out 0 of 289 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:04:02] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100

Epoch 00031: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 32/100
Epoch 33/100
[2022_04_22-16:04:52] Training the entire fine-tuned model...
[2022_04_22-16:05:00] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100

Epoch 00007: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 8/100
Epoch 9/100
[2022_04_22-16:05:32] Training on final epochs of sequence length 1024...
[2022_04_22-16:05:32] Training set: Filtered out 0 of 1156 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:05:47] Validation set: Filtered out 0 of 289 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_2/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_2/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇███▁▁▁▂▂▂▂▃▁
loss,█▆▄▅▃▂▂▂▃▄▃▂▂▁▁▁▁▁▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
lr,██████████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▄▂▂▁▂▃▃▄▁▂▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▂▂▁▁▁

0,1
best_epoch,2.0
best_val_loss,0.43135
epoch,0.0
loss,0.37825
lr,1e-05
val_loss,0.43428


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:06:45] Training set: Filtered out 0 of 1069 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:06:46] Validation set: Filtered out 0 of 268 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:06:46] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100

Epoch 00040: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100

Epoch 00044: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100

Epoch 00052: ReduceLROnPlateau reducing learning rate to 3.9062499126885086e-05.
Epoch 53/100
Epoch 



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

Epoch 00012: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 13/100
Epoch 14/100
[2022_04_22-16:08:48] Training on final epochs of sequence length 1024...
[2022_04_22-16:08:48] Training set: Filtered out 0 of 1069 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:08:53] Validation set: Filtered out 0 of 268 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_3/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_3/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇██▁▁▁▂▂▂▂▃▁
loss,█▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁
lr,███████████████▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▄▃▃▃▂▂▂▂▂▃▁▂▂▃▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▁▁▂▁▁▂▂▁

0,1
best_epoch,0.0
best_val_loss,0.38409
epoch,0.0
loss,0.32773
lr,1e-05
val_loss,0.38409


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:09:48] Training set: Filtered out 0 of 1109 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:09:48] Validation set: Filtered out 0 of 278 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:09:48] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100

Epoch 00017: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100

Epoch 00023: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100

Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 30/100
Epoch 31/100
[2022_04_22-16:10:36] Training the entire fine-tuned model...
[2022_04_22-16:10:46] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 14/100
Epoch 15/100
[2022_04_22-16:11:32] Training on final epochs of sequence length 1024...
[2022_04_22-16:11:32] Training set: Filtered out 0 of 1109 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:11:33] Validation set: Filtered out 0 of 278 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_4/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_4/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇███▁▁▂▂▂▂▃▃▃▄▄▄▁
loss,█▄▄▃▃▃▃▃▃▂▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂
lr,███████████████▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▃▂▂▂▂▃▂▂▂▄▃▁▁▂▂▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▂▁▁

0,1
best_epoch,8.0
best_val_loss,0.33011
epoch,0.0
loss,0.34131
lr,1e-05
val_loss,0.33201


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:12:32] Training set: Filtered out 0 of 1114 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:12:32] Validation set: Filtered out 0 of 279 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:12:32] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100

Epoch 00027: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 28/100
Epoch 29/100
[2022_04_22-16:13:18] Training the entire fine-tuned model...
[2022_04_22-16:13:30] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

Epoch 00008: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 9/100
Epoch 10/100
[2022_04_22-16:14:04] Training on final epochs of sequence length 1024...
[2022_04_22-16:14:04] Training set: Filtered out 0 of 1114 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:14:17] Validation set: Filtered out 0 of 279 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_5/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_5/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██▁▁▁▂▂▂▃▃▃▃▁
loss,█▅▄▃▃▃▃▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂
lr,███████████████████████████▃▃▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▃▃▃▄▃▄▂▄▂▂▂▂▂▂▃▂▃▁▂▁▂▁▁▂▁▃▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
best_epoch,3.0
best_val_loss,0.38524
epoch,0.0
loss,0.34379
lr,1e-05
val_loss,0.38604


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:15:12] Training set: Filtered out 0 of 1113 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:15:12] Validation set: Filtered out 0 of 279 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:15:12] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100

Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100

Epoch 00038: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 39/100
Epoch 40/100
[2022_04_22-16:16:10] Training the entire fine-tuned model...
[2022_04_22-16:16:51] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100

Epoch 00006: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100

Epoch 00013: ReduceLROnPlateau reducing learning rate to 6.24999984211172e-06.
Epoch 14/100
Epoch 15/100
[2022_04_22-16:17:38] Training on final epochs of sequence length 1024...
[2022_04_22-16:17:38] Training set: Filtered out 0 of 1113 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:17:38] Validation set: Filtered out 0 of 279 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_6/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_6/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇██▁▁▂▂▂▂▃▃▃▃▁
loss,█▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
lr,██████████████████████▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▇▅▃▃▃▂▂▃▂▂▂▂▂▂▂▁▂▃▁▁▁▁▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁

0,1
best_epoch,8.0
best_val_loss,0.35197
epoch,0.0
loss,0.32524
lr,1e-05
val_loss,0.35409


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:18:34] Training set: Filtered out 0 of 1112 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:18:34] Validation set: Filtered out 0 of 278 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:18:34] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 23/100
Epoch 24/100
[2022_04_22-16:19:12] Training the entire fine-tuned model...
[2022_04_22-16:19:21] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100

Epoch 00018: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 19/100
Epoch 20/100
[2022_04_22-16:20:20] Training on final epochs of sequence length 1024...
[2022_04_22-16:20:20] Training set: Filtered out 0 of 1112 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:20:49] Validation set: Filtered out 0 of 278 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_7/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_7/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▆▁
loss,█▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▂
lr,████████████████████▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▃▄▃▃▃▂▂▂▂▂▃▂▂▂▂▄▃▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁

0,1
best_epoch,13.0
best_val_loss,0.31945
epoch,0.0
loss,0.30867
lr,1e-05
val_loss,0.32061


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:21:44] Training set: Filtered out 0 of 1132 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:21:44] Validation set: Filtered out 0 of 284 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:21:44] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100

Epoch 00030: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100

Epoch 00043: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100

Epoch 00049: ReduceLROnPlateau reducing learning rate to 3.9062499126885086e-05.
Epoch 50/100
Epoch 51/100
[2022_04_22-16:22:57] Training t



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 10/100
Epoch 11/100
[2022_04_22-16:23:59] Training on final epochs of sequence length 1024...
[2022_04_22-16:23:59] Training set: Filtered out 0 of 1132 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:23:59] Validation set: Filtered out 0 of 284 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_8/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_8/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇███▁▁▂▂▂▂▂
loss,█▅▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁
lr,█████████████▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▂▂▃▁▃▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
best_epoch,44.0
best_val_loss,0.38264
epoch,0.0
loss,0.31755
lr,1e-05
val_loss,0.38513


[34m[1mwandb[0m: wandb version 0.12.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2022_04_22-16:24:54] Training set: Filtered out 0 of 1141 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:24:55] Validation set: Filtered out 0 of 286 (0.0%) records of lengths exceeding 510.
[2022_04_22-16:24:55] Training with frozen pretrained layers...


  "The `lr` argument is deprecated, use `learning_rate` instead.")


Epoch 1/100


[34m[1mwandb[0m: [32m[41mERROR[0m Can't save model, h5py returned error: Layer GlobalAttention has arguments in `__init__` and therefore must override `get_config`.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100

Epoch 00029: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 30/100
Epoch 31/100
Epoch 32/100
[2022_04_22-16:25:43] Training the entire fine-tuned model...
[2022_04_22-16:25:52] Incompatible number of optimizer weights - will not initialize them.
Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100

Epoch 00014: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.
Epoch 15/100
Epoch 16/100
[2022_04_22-16:26:42] Training on final epochs of sequence length 1024...
[2022_04_22-16:26:42] Training set: Filtered out 0 of 1141 (0.0%) records of lengths exceeding 1022.
[2022_04_22-16:27:21] Validation set: Filtered out 0 of 286 (0.0%) records of lengths exceeding 1022.








INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_9/assets


INFO:tensorflow:Assets written to: ../../data/protein_bert/10-fold-cv/2022_04_22_split_9/assets
  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [88]:
f1s

{0: 0.3870967741935484,
 1: 0.4375,
 2: 0.4,
 3: 0.5789473684210527,
 4: 0.19230769230769232,
 5: 0.2978723404255319,
 6: 0.4827586206896552,
 7: 0.5245901639344263,
 8: 0.5277777777777778,
 9: 0.5957446808510638}

In [23]:
from statistics import mean

In [24]:
f1_dict = {0: 0.3870967741935484,
 1: 0.4375,
 2: 0.4,
 3: 0.5789473684210527,
 4: 0.19230769230769232,
 5: 0.2978723404255319,
 6: 0.4827586206896552,
 7: 0.5245901639344263,
 8: 0.5277777777777778,
 9: 0.5957446808510638}

f1_scores = [value for key, value in f1_dict.items()]

In [25]:
mean(f1_scores)

0.44245954186007486