# Finetuning

Trained with Google TPU v2

#Set Up

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# @title Importing

#Installs
!pip install pyarrow fastparquet

# Packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf
import os
from IPython.display import clear_output

# Sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# Keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
#from keras.layers.embeddings import Embedding
from keras.metrics import AUC

# Tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import random

Collecting fastparquet
  Downloading fastparquet-2024.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Downloading fastparquet-2024.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cramjam-2.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cramjam, fastparquet
Successfully installed cramjam-2.8.3 fastparquet-2024.5.0


In [None]:
#@title Random Seeds
import random
## SEEDS

# Hard Code Random Seeds.
r1 = 0
r2 = 1

# Set Random Seed
random.seed(r1)
tf.random.set_seed(r2)

In [None]:
#@title Connect to TPU
print("TensorFlow version:", tf.__version__)

# Connect to the TPU cluster or fall back to CPU/GPU
try:
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver()  # Tries to connect to the TPU
  tf.config.experimental_connect_to_cluster(resolver)
  tf.tpu.experimental.initialize_tpu_system(resolver)
  strategy = tf.distribute.TPUStrategy(resolver)
  devices = tf.config.list_logical_devices('TPU')
  print('TPU devices:', devices)
except ValueError:
  print("Could not connect to TPU; using CPU/GPU strategy instead.")
  strategy = tf.distribute.get_strategy()

# Example computation using the strategy
with strategy.scope():
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

  @tf.function
  def matmul_fn(x, y):
    return tf.matmul(x, y)

  z = strategy.run(matmul_fn, args=(a, b))

print(z)

TensorFlow version: 2.15.0
TPU devices: [LogicalDevice(name='/device:TPU:0', device_type='TPU'), LogicalDevice(name='/device:TPU:1', device_type='TPU'), LogicalDevice(name='/device:TPU:2', device_type='TPU'), LogicalDevice(name='/device:TPU:3', device_type='TPU'), LogicalDevice(name='/device:TPU:4', device_type='TPU'), LogicalDevice(name='/device:TPU:5', device_type='TPU'), LogicalDevice(name='/device:TPU:6', device_type='TPU'), LogicalDevice(name='/device:TPU:7', device_type='TPU')]
PerReplica:{
  0: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  1: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  2: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  3: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  4: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  5: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  6: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  7: tf.

# Hyperparameters & Settings

In [None]:
# write where you want to save all your files and retrieve encoder
root = "/content/drive/MyDrive/Extra Curricular /ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Finetuning/Models"
encoder_root = "/content/drive/MyDrive/Extra Curricular /ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Pretraining/Encoders"
results_root = "/content/drive/MyDrive/Extra Curricular /ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Finetuning/Results"

In [None]:
"""
Please Fill out Parameters Below
"""
## Model size
# eg. ["small", "medium", "large", "huge"]
size = "medium"

## Mask ratio
# eg. [.25, .50, .75]
mask_ratio = 0.90

## Smoothing
# eg. [True, False]
smoothing = False

## Loss Function
# eg. [True, False], meaning MSE on only the masked portion or everything in the reconstruction
mse_only_masked = False

In [None]:
"""
Finetuning Specific Settings
"""

## Finetuning Styles
# Add more if needed

finetuning_styles = ["full", "linear_probe"]

In [None]:
# Encoder naming
mask_name = int(mask_ratio*100)

encoder_name = f"/encoder_{size}_{mask_name}"

if smoothing == True:
  encoder_name = f"{encoder_name}_smoothed"
else:
  encoder_name = f"{encoder_name}_unsmoothed"

if mse_only_masked == True:
  encoder_name = f"{encoder_name}_mse_only_masked.h5"
else:
  encoder_name = f"{encoder_name}_mse_all.h5"

print(encoder_name)

/encoder_medium_90_unsmoothed_mse_all.h5


In [None]:
# Start of finetuning name
ft_name = f"/AcT_{size}_{mask_name}"

if smoothing == True:
  ft_name = f"{ft_name}_smoothed"
else:
  ft_name = f"{ft_name}_unsmoothed"

if mse_only_masked == True:
  ft_name = f"{ft_name}_mse_only_masked"
else:
  ft_name = f"{ft_name}_mse_all"

print(ft_name)

/AcT_medium_90_unsmoothed_mse_all


In [None]:
encoder_path = encoder_root + encoder_name

# Hyperparameter Additional Info

In [None]:
"""
Model Size
"""
## Model Size
if size == "small":

  patch_size = 18
  embed_dim = 96
  # encoder
  encoder_num_heads = 6
  encoder_ff_dim = 256
  encoder_num_layers = 1
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 6
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "medium":

  patch_size = 18
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 2
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "large":

  patch_size = 9
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 4
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "huge":

  patch_size = 5
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 8
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

In [None]:
"""
For Finetuning
"""

## Model Size
if size == "small":

  learning_rate = 0.00001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-6

if size == "medium":

  learning_rate = 0.00001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-6

if size == "large":

  learning_rate = 0.000001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-7


if size == "huge":

  learning_rate = 0.0000005
  early_stopping_patience = 250

  reduce_lr_patience = 100
  min_lr = 1e-8

In [None]:
"""
Smoothing
"""
if smoothing == True:
  data_folder_location = "/content/drive/MyDrive/Extra Curricular /ActigraphyTransformer/A-NEW/Baseline Tests/Data_2013/All_Meds/Smooth/TestSize2000_set1"

else:
  data_folder_location = "/content/drive/MyDrive/Extra Curricular /ActigraphyTransformer/A-NEW/Baseline Tests/Data_2013/All_Meds/Raw/TestSize2000_set1"

# Process Data

In [None]:
# Which sizes to look at
train_sizes = [100, 250, 500, 1000, 2500, 5769] # for ALBERT hyperparameter tuning, we can test on less datasets
test_size = 2000 # fixed

In [None]:
# first save the test sets
X_test = np.load(os.path.join(data_folder_location, f'X_test_{test_size}.npy'))
y_test = np.load(os.path.join(data_folder_location, f'y_test_{test_size}.npy'))


# Scale the test set
scaler = StandardScaler()
scaler.fit(X_test)
X_test = scaler.transform(X_test)


In [None]:
print(X_test.shape)
print(y_test.shape)

(2000, 10080)
(2000, 1)


In [None]:
train_sets = {}
val_sets = {}

In [None]:
for size in train_sizes:
  X_train = np.load(os.path.join(data_folder_location, f'X_train_{size}.npy'))
  y_train = np.load(os.path.join(data_folder_location, f'y_train_{size}.npy'))
  train_sets[size] = (X_train, y_train)

  X_val = np.load(os.path.join(data_folder_location, f'X_val_{size}.npy'))
  y_val = np.load(os.path.join(data_folder_location, f'y_val_{size}.npy'))
  val_sets[size] = (X_val, y_val)

print("Data loaded successfully.")
print(f"Train set size: {len(train_sets)}")
print(f"Val set size: {len(val_sets)}")

Data loaded successfully.
Train set size: 6
Val set size: 6


In [None]:
for key, value in train_sets.items():
  print(f"For train size {key}: ")

  # print the shapes of X train and y train
  print(f"X train shape: {value[0].shape}")
  print(f"y train shape: {value[1].shape}")

  # also print the shapes of X val and y val
  print(f"X val shape: {val_sets[key][0].shape}")
  print(f"y val shape: {val_sets[key][1].shape}")

  print("================================")

For train size 100: 
X train shape: (80, 10080)
y train shape: (80, 1)
X val shape: (20, 10080)
y val shape: (20, 1)
For train size 250: 
X train shape: (200, 10080)
y train shape: (200, 1)
X val shape: (50, 10080)
y val shape: (50, 1)
For train size 500: 
X train shape: (400, 10080)
y train shape: (400, 1)
X val shape: (100, 10080)
y val shape: (100, 1)
For train size 1000: 
X train shape: (800, 10080)
y train shape: (800, 1)
X val shape: (200, 10080)
y val shape: (200, 1)
For train size 2500: 
X train shape: (2000, 10080)
y train shape: (2000, 1)
X val shape: (500, 10080)
y val shape: (500, 1)
For train size 5769: 
X train shape: (4615, 10080)
y train shape: (4615, 1)
X val shape: (1154, 10080)
y val shape: (1154, 1)


# Wait for later

# LOAD ALBERT


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, Model

# Modified Transformer Block to output attention weights with explicit layer names (otherwise the same as the )
def TransformerBlock(embed_dim, num_heads, ff_dim, rate=0.1, name_prefix="encoder"):
    input_layer = layers.Input(shape=(None, embed_dim), name=f"{name_prefix}_input")
    attention_layer = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=f"{name_prefix}_attention")
    attention_output, attention_weights = attention_layer(input_layer, input_layer, return_attention_scores=True)
    attention_output = layers.Dropout(rate, name=f"{name_prefix}_dropout")(attention_output)
    out1 = layers.LayerNormalization(epsilon=1e-6, name=f"{name_prefix}_norm1")(input_layer + attention_output)
    ff_output = layers.Dense(ff_dim, activation="relu", name=f"{name_prefix}_ff1")(out1)
    ff_output = layers.Dense(embed_dim, name=f"{name_prefix}_ff2")(ff_output)
    ff_output = layers.Dropout(rate, name=f"{name_prefix}_dropout2")(ff_output)
    final_output = layers.LayerNormalization(epsilon=1e-6, name=f"{name_prefix}_norm2")(out1 + ff_output)
    return models.Model(inputs=input_layer, outputs=[final_output, attention_weights], name=f"{name_prefix}_transformer")

# Sine/Cosine positional embeddings
def get_positional_embeddings(num_patches, embed_dim):
    position = tf.range(num_patches, dtype=tf.float32)[:, tf.newaxis]
    div_term = tf.exp(tf.range(0, embed_dim, 2, dtype=tf.float32) * (-tf.math.log(10000.0) / embed_dim))
    pos_embeddings = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
    return pos_embeddings


In [None]:
# Function to load the encoder and build the fine-tuning model with consistent patching and positional embedding
def create_finetuning_model(encoder_path=encoder_path, input_size=10080, patch_size=patch_size, embed_dim=embed_dim, return_attention=False):

    # Load the saved encoder model
    encoder_model = tf.keras.models.load_model(encoder_path, custom_objects={'TransformerBlock': TransformerBlock, 'get_positional_embeddings': get_positional_embeddings})

    # Define new inputs for the fine-tuning model
    inputs = layers.Input(shape=(input_size,), name="finetuning_inputs")

    # Get encoder outputs
    encoder_outputs = encoder_model(inputs)
    encoder_outputs, attention_weights = encoder_outputs[0], encoder_outputs[1:]

    # Pass through a GlobalAveragePooling layer
    x = layers.GlobalAveragePooling1D(name="global_avg_pool")(encoder_outputs)
    x = layers.Dropout(0.1, name="dropout")(x)
    x = layers.Dense(128, activation='relu', name="dense_128")(x)
    outputs = layers.Dense(1, activation="sigmoid", name="output")(x)

    # Include attention weights in the final model outputs if requested
    if return_attention:
        outputs = [outputs] + attention_weights

    # Create and return the fine-tuning model
    finetuning_model = models.Model(inputs=inputs, outputs=outputs, name="finetuning_model")
    return finetuning_model


## Compiling

In [None]:
# Compile the model -----
with strategy.scope():
  train_model = create_finetuning_model(return_attention=False)
  train_model.compile(
    # Metrics
    loss= tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics= tf.keras.metrics.AUC(name='auc'),
    # Optimizer
    optimizer= tf.keras.optimizers.Adam(
      learning_rate=learning_rate,
      beta_1=0.9,
      beta_2=0.999,
      epsilon=1e-07,
      amsgrad=False
))

# Save the original model weights
train_model.save_weights('original_model_weights.h5')

train_model.summary()



Model: "finetuning_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 finetuning_inputs (InputLa  [(None, 10080)]           0         
 yer)                                                            
                                                                 
 encoder_model (Functional)  [(None, 560, 96),         993440    
                              (None, 12, 560, 560),              
                              (None, 12, 560, 560)]              
                                                                 
 global_avg_pool (GlobalAve  (None, 96)                0         
 ragePooling1D)                                                  
                                                                 
 dropout (Dropout)           (None, 96)                0         
                                                                 
 dense_128 (Dense)           (None, 128)          

# Training

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# # reduce learning rate (Don't use this it's buggy)
# reduce_lr = ReduceLROnPlateau(
#     monitor='val_loss',    # Monitor validation loss
#     factor=0.5,            # Reduce rate by a factor of 0.5
#     patience=75,           # Number of epochs with no improvement after which learning rate will be reduced
#     min_lr=1e-6,           # Minimum learning rate that the reduction can reach
#     verbose=1              # Print messages when reducing the learning rate
# )

# earlyStopping callback
early_stopper = EarlyStopping(
    monitor='val_auc',  # monitor validation AUC
    mode='max',  # maximize AUC
    patience=early_stopping_patience,  # number of epochs with no improvement after which training will be stopped
    verbose=1,  # display messages when early stopping is triggered
    restore_best_weights=True  # restore model weights from the epoch with the best value of the monitored quantity
)

In [None]:
# Training Loop

scores = {}
scores["test"] = {}
scores["val"] = {}

for finetuning_style in finetuning_styles:
  print(f"\nFinetuning Style: {finetuning_style}")

  scores["test"][finetuning_style] = {}
  scores["val"][finetuning_style] = {}

  for size in train_sizes:

    print(f"\nSIZE:{size}")


    # Load X_train and fit
    X_train, y_train = train_sets[size]
    train_scalar = StandardScaler()
    train_scalar.fit(X_train)
    X_train = train_scalar.transform(X_train)

    # Load X_val and fit
    X_val, y_val = val_sets[size]
    val_scalar = StandardScaler()
    val_scalar.fit(X_val)
    X_val = val_scalar.transform(X_val)

    # Set Class Weights = Balance
    class1 = sum(y_train)
    total = len(y_train)
    class0 = total-class1

    class_weights = {0: (class1/total),
                  1: ((class0/total))}

    # Reset model weights
    train_model.load_weights('original_model_weights.h5')

    if finetuning_style == "linear_probe":
      for layer in train_model.layers:
        if layer.name == "encoder_model":
            layer.trainable = False

    if finetuning_style == "full":
      for layer in train_model.layers:
        layer.trainable = True

    print(" ")
    # Verify by printing each layer's name and trainable status
    for layer in train_model.layers:
        print(layer.name, layer.trainable)
    print(" ")

    # Train model
    history = train_model.fit(
        X_train, y_train,
        epochs= 10000, # Edit
        batch_size= 64,
        validation_data = (X_val, y_val),
        shuffle=False,
        class_weight=class_weights,
        callbacks = [early_stopper],
        verbose = 2)

    # Save model
    current_model_name = f"{ft_name}_n{size}_{finetuning_style}.h5"
    print(current_model_name)
    train_model.save(root+current_model_name)

    # Test model
    test_scores = train_model.evaluate(X_test, y_test, batch_size=64) # Test Set
    scores["test"][finetuning_style][size] = test_scores[1]
    print("Test AUC:", test_scores[1])

    val_scores = train_model.evaluate(X_val, y_val, batch_size=64) # Val Set

    scores["val"][finetuning_style][size] = val_scores[1]
    print("Val AUC:", val_scores[1])

# Save all results in a .txt
print("\n\n")
print(scores)

results_path = f"{results_root}{ft_name}_RESULTS.txt"

try:
    file_to_write = open(results_path, 'wt')
    file_to_write.write(str(scores))
    file_to_write.close()

except:
    print("Unable to write to file")


Finetuning Style: full

SIZE:100
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
2/2 - 22s - loss: 0.0334 - auc: 0.5224 - val_loss: 0.7328 - val_auc: 0.6579 - 22s/epoch - 11s/step
Epoch 2/10000
2/2 - 1s - loss: 0.0335 - auc: 0.6410 - val_loss: 0.7312 - val_auc: 0.6842 - 665ms/epoch - 332ms/step
Epoch 3/10000
2/2 - 1s - loss: 0.0333 - auc: 0.6058 - val_loss: 0.7294 - val_auc: 0.7105 - 695ms/epoch - 348ms/step
Epoch 4/10000
2/2 - 1s - loss: 0.0355 - auc: 0.0833 - val_loss: 0.7276 - val_auc: 0.7368 - 661ms/epoch - 330ms/step
Epoch 5/10000
2/2 - 1s - loss: 0.0336 - auc: 0.5449 - val_loss: 0.7256 - val_auc: 0.6316 - 627ms/epoch - 314ms/step
Epoch 6/10000
2/2 - 1s - loss: 0.0349 - auc: 0.2019 - val_loss: 0.7239 - val_auc: 0.6579 - 630ms/epoch - 315ms/step
Epoch 7/10000
2/2 - 1s - loss: 0.0346 - auc: 0.2821 - val_loss: 0.7221 - val_auc: 0.6579 - 632ms/epoch - 316ms/step
Epoch 8/10000
2/2 - 1s - loss: 0.0341 - auc: 0.490

  saving_api.save_model(


Test AUC: 0.4326503276824951
Val AUC: 0.736842155456543

SIZE:250
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
4/4 - 3s - loss: 0.0407 - auc: 0.4351 - val_loss: 0.7334 - val_auc: 0.0408 - 3s/epoch - 674ms/step
Epoch 2/10000
4/4 - 1s - loss: 0.0420 - auc: 0.2676 - val_loss: 0.7316 - val_auc: 0.0714 - 746ms/epoch - 187ms/step
Epoch 3/10000
4/4 - 1s - loss: 0.0406 - auc: 0.4747 - val_loss: 0.7289 - val_auc: 0.0714 - 661ms/epoch - 165ms/step
Epoch 4/10000
4/4 - 1s - loss: 0.0415 - auc: 0.3608 - val_loss: 0.7261 - val_auc: 0.0714 - 670ms/epoch - 167ms/step
Epoch 5/10000
4/4 - 1s - loss: 0.0413 - auc: 0.3458 - val_loss: 0.7230 - val_auc: 0.0714 - 670ms/epoch - 168ms/step
Epoch 6/10000
4/4 - 1s - loss: 0.0411 - auc: 0.3969 - val_loss: 0.7193 - val_auc: 0.0714 - 681ms/epoch - 170ms/step
Epoch 7/10000
4/4 - 1s - loss: 0.0412 - auc: 0.4549 - val_loss: 0.7162 - val_auc: 0.0816 - 724ms/epoch - 181ms/step
Epoch 8/10000
4/4 

  saving_api.save_model(


Test AUC: 0.6835736036300659
Val AUC: 1.0

SIZE:500
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
7/7 - 3s - loss: 0.0406 - auc: 0.4536 - val_loss: 0.7326 - val_auc: 0.5584 - 3s/epoch - 396ms/step
Epoch 2/10000
7/7 - 1s - loss: 0.0415 - auc: 0.2855 - val_loss: 0.7340 - val_auc: 0.5825 - 811ms/epoch - 116ms/step
Epoch 3/10000
7/7 - 1s - loss: 0.0417 - auc: 0.3491 - val_loss: 0.7366 - val_auc: 0.5876 - 809ms/epoch - 116ms/step
Epoch 4/10000
7/7 - 1s - loss: 0.0409 - auc: 0.4461 - val_loss: 0.7383 - val_auc: 0.6271 - 808ms/epoch - 115ms/step
Epoch 5/10000
7/7 - 1s - loss: 0.0407 - auc: 0.4883 - val_loss: 0.7399 - val_auc: 0.6271 - 765ms/epoch - 109ms/step
Epoch 6/10000
7/7 - 1s - loss: 0.0405 - auc: 0.5101 - val_loss: 0.7407 - val_auc: 0.6667 - 813ms/epoch - 116ms/step
Epoch 7/10000
7/7 - 1s - loss: 0.0404 - auc: 0.5087 - val_loss: 0.7417 - val_auc: 0.6890 - 829ms/epoch - 118ms/step
Epoch 8/10000
7/7 - 1s - loss: 0

  saving_api.save_model(


Test AUC: 0.7426977157592773
Val AUC: 0.8453608751296997

SIZE:1000
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
13/13 - 5s - loss: 0.0388 - auc: 0.5098 - val_loss: 0.7354 - val_auc: 0.2831 - 5s/epoch - 411ms/step
Epoch 2/10000
13/13 - 1s - loss: 0.0386 - auc: 0.5605 - val_loss: 0.7315 - val_auc: 0.3625 - 1s/epoch - 79ms/step
Epoch 3/10000
13/13 - 1s - loss: 0.0384 - auc: 0.5828 - val_loss: 0.7269 - val_auc: 0.4338 - 1s/epoch - 78ms/step
Epoch 4/10000
13/13 - 1s - loss: 0.0383 - auc: 0.5983 - val_loss: 0.7228 - val_auc: 0.4991 - 1s/epoch - 78ms/step
Epoch 5/10000
13/13 - 1s - loss: 0.0382 - auc: 0.6441 - val_loss: 0.7192 - val_auc: 0.5447 - 1s/epoch - 78ms/step
Epoch 6/10000
13/13 - 1s - loss: 0.0384 - auc: 0.5841 - val_loss: 0.7158 - val_auc: 0.5825 - 1s/epoch - 78ms/step
Epoch 7/10000
13/13 - 1s - loss: 0.0377 - auc: 0.7092 - val_loss: 0.7133 - val_auc: 0.6207 - 1s/epoch - 77ms/step
Epoch 8/10000
13/13 - 1s -

  saving_api.save_model(


Test AUC: 0.7451436519622803
Val AUC: 0.8217353820800781

SIZE:2500
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
32/32 - 2s - loss: 0.0392 - auc: 0.4990 - val_loss: 0.7287 - val_auc: 0.4620 - 2s/epoch - 55ms/step
Epoch 2/10000
32/32 - 2s - loss: 0.0387 - auc: 0.5825 - val_loss: 0.7227 - val_auc: 0.5204 - 2s/epoch - 49ms/step
Epoch 3/10000
32/32 - 2s - loss: 0.0390 - auc: 0.5360 - val_loss: 0.7182 - val_auc: 0.5805 - 2s/epoch - 49ms/step
Epoch 4/10000
32/32 - 2s - loss: 0.0386 - auc: 0.6045 - val_loss: 0.7141 - val_auc: 0.6344 - 2s/epoch - 50ms/step
Epoch 5/10000
32/32 - 2s - loss: 0.0386 - auc: 0.6041 - val_loss: 0.7105 - val_auc: 0.6585 - 2s/epoch - 49ms/step
Epoch 6/10000
32/32 - 2s - loss: 0.0383 - auc: 0.6415 - val_loss: 0.7069 - val_auc: 0.6911 - 2s/epoch - 49ms/step
Epoch 7/10000
32/32 - 2s - loss: 0.0383 - auc: 0.6471 - val_loss: 0.7018 - val_auc: 0.6971 - 2s/epoch - 49ms/step
Epoch 8/10000
32/32 - 2s - 

  saving_api.save_model(


Test AUC: 0.7418853640556335
Val AUC: 0.7638487815856934

SIZE:5769
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
73/73 - 3s - loss: 0.0393 - auc: 0.4806 - val_loss: 0.7183 - val_auc: 0.4828 - 3s/epoch - 42ms/step
Epoch 2/10000
73/73 - 3s - loss: 0.0391 - auc: 0.5431 - val_loss: 0.7124 - val_auc: 0.5736 - 3s/epoch - 39ms/step
Epoch 3/10000
73/73 - 3s - loss: 0.0387 - auc: 0.5946 - val_loss: 0.7076 - val_auc: 0.6307 - 3s/epoch - 39ms/step
Epoch 4/10000
73/73 - 3s - loss: 0.0387 - auc: 0.5887 - val_loss: 0.7009 - val_auc: 0.6579 - 3s/epoch - 39ms/step
Epoch 5/10000
73/73 - 3s - loss: 0.0386 - auc: 0.6088 - val_loss: 0.6976 - val_auc: 0.6834 - 3s/epoch - 39ms/step
Epoch 6/10000
73/73 - 3s - loss: 0.0382 - auc: 0.6410 - val_loss: 0.6950 - val_auc: 0.6985 - 3s/epoch - 39ms/step
Epoch 7/10000
73/73 - 3s - loss: 0.0383 - auc: 0.6291 - val_loss: 0.6912 - val_auc: 0.7033 - 3s/epoch - 38ms/step
Epoch 8/10000
73/73 - 3s - 

  saving_api.save_model(


Test AUC: 0.7533736824989319
Val AUC: 0.7875000238418579

Finetuning Style: linear_probe

SIZE:100
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
2/2 - 1s - loss: 0.0341 - auc: 0.4808 - val_loss: 0.7336 - val_auc: 0.6316 - 773ms/epoch - 387ms/step
Epoch 2/10000
2/2 - 1s - loss: 0.0334 - auc: 0.6763 - val_loss: 0.7331 - val_auc: 0.6316 - 606ms/epoch - 303ms/step
Epoch 3/10000
2/2 - 1s - loss: 0.0364 - auc: 0.0929 - val_loss: 0.7325 - val_auc: 0.6316 - 605ms/epoch - 302ms/step
Epoch 4/10000
2/2 - 1s - loss: 0.0342 - auc: 0.4038 - val_loss: 0.7314 - val_auc: 0.6842 - 657ms/epoch - 329ms/step
Epoch 5/10000
2/2 - 1s - loss: 0.0349 - auc: 0.4776 - val_loss: 0.7302 - val_auc: 0.7105 - 650ms/epoch - 325ms/step
Epoch 6/10000
2/2 - 1s - loss: 0.0328 - auc: 0.8077 - val_loss: 0.7288 - val_auc: 0.7368 - 668ms/epoch - 334ms/step
Epoch 7/10000
2/2 - 1s - loss: 0.0349 - auc: 0.4006 - val_loss: 0.7270 - val_auc: 0.7895 - 692ms/

  saving_api.save_model(


Test AUC: 0.5167486667633057
Val AUC: 0.8157894611358643

SIZE:250
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
4/4 - 1s - loss: 0.0415 - auc: 0.2977 - val_loss: 0.7335 - val_auc: 0.0408 - 873ms/epoch - 218ms/step
Epoch 2/10000
4/4 - 1s - loss: 0.0409 - auc: 0.4394 - val_loss: 0.7318 - val_auc: 0.0714 - 745ms/epoch - 186ms/step
Epoch 3/10000
4/4 - 1s - loss: 0.0407 - auc: 0.6009 - val_loss: 0.7301 - val_auc: 0.0714 - 695ms/epoch - 174ms/step
Epoch 4/10000
4/4 - 1s - loss: 0.0408 - auc: 0.4966 - val_loss: 0.7278 - val_auc: 0.0714 - 701ms/epoch - 175ms/step
Epoch 5/10000
4/4 - 1s - loss: 0.0402 - auc: 0.6478 - val_loss: 0.7253 - val_auc: 0.0510 - 698ms/epoch - 174ms/step
Epoch 6/10000
4/4 - 1s - loss: 0.0415 - auc: 0.2891 - val_loss: 0.7228 - val_auc: 0.0612 - 682ms/epoch - 170ms/step
Epoch 7/10000
4/4 - 1s - loss: 0.0407 - auc: 0.4433 - val_loss: 0.7202 - val_auc: 0.0714 - 686ms/epoch - 171ms/step
Epoch 8/10000

  saving_api.save_model(


Test AUC: 0.6920478343963623
Val AUC: 1.0

SIZE:500
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
7/7 - 1s - loss: 0.0413 - auc: 0.3809 - val_loss: 0.7319 - val_auc: 0.5670 - 931ms/epoch - 133ms/step
Epoch 2/10000
7/7 - 1s - loss: 0.0411 - auc: 0.3982 - val_loss: 0.7325 - val_auc: 0.5550 - 775ms/epoch - 111ms/step
Epoch 3/10000
7/7 - 1s - loss: 0.0409 - auc: 0.4367 - val_loss: 0.7336 - val_auc: 0.6254 - 808ms/epoch - 115ms/step
Epoch 4/10000
7/7 - 1s - loss: 0.0408 - auc: 0.4148 - val_loss: 0.7347 - val_auc: 0.6151 - 767ms/epoch - 110ms/step
Epoch 5/10000
7/7 - 1s - loss: 0.0413 - auc: 0.3179 - val_loss: 0.7361 - val_auc: 0.6581 - 810ms/epoch - 116ms/step
Epoch 6/10000
7/7 - 1s - loss: 0.0406 - auc: 0.5010 - val_loss: 0.7373 - val_auc: 0.6443 - 766ms/epoch - 109ms/step
Epoch 7/10000
7/7 - 1s - loss: 0.0414 - auc: 0.3027 - val_loss: 0.7385 - val_auc: 0.6357 - 764ms/epoch - 109ms/step
Epoch 8/10000
7/7 - 1s - los

  saving_api.save_model(


Test AUC: 0.7342502474784851
Val AUC: 0.8522337079048157

SIZE:1000
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
13/13 - 1s - loss: 0.0394 - auc: 0.3892 - val_loss: 0.7356 - val_auc: 0.3003 - 1s/epoch - 86ms/step
Epoch 2/10000
13/13 - 1s - loss: 0.0390 - auc: 0.5012 - val_loss: 0.7334 - val_auc: 0.3810 - 1s/epoch - 77ms/step
Epoch 3/10000
13/13 - 1s - loss: 0.0387 - auc: 0.5168 - val_loss: 0.7291 - val_auc: 0.4566 - 1s/epoch - 78ms/step
Epoch 4/10000
13/13 - 1s - loss: 0.0384 - auc: 0.5906 - val_loss: 0.7246 - val_auc: 0.5095 - 1s/epoch - 78ms/step
Epoch 5/10000
13/13 - 1s - loss: 0.0379 - auc: 0.6832 - val_loss: 0.7206 - val_auc: 0.5619 - 1s/epoch - 77ms/step
Epoch 6/10000
13/13 - 1s - loss: 0.0376 - auc: 0.7390 - val_loss: 0.7158 - val_auc: 0.5906 - 1s/epoch - 78ms/step
Epoch 7/10000
13/13 - 1s - loss: 0.0376 - auc: 0.7390 - val_loss: 0.7119 - val_auc: 0.6280 - 1s/epoch - 79ms/step
Epoch 8/10000
13/13 - 1s -

  saving_api.save_model(


Test AUC: 0.7458006143569946
Val AUC: 0.8187285661697388

SIZE:2500
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
32/32 - 2s - loss: 0.0393 - auc: 0.4925 - val_loss: 0.7273 - val_auc: 0.4504 - 2s/epoch - 54ms/step
Epoch 2/10000
32/32 - 2s - loss: 0.0391 - auc: 0.5136 - val_loss: 0.7228 - val_auc: 0.5304 - 2s/epoch - 50ms/step
Epoch 3/10000
32/32 - 2s - loss: 0.0391 - auc: 0.4915 - val_loss: 0.7196 - val_auc: 0.5926 - 2s/epoch - 50ms/step
Epoch 4/10000
32/32 - 2s - loss: 0.0387 - auc: 0.5775 - val_loss: 0.7158 - val_auc: 0.6255 - 2s/epoch - 49ms/step
Epoch 5/10000
32/32 - 2s - loss: 0.0383 - auc: 0.6579 - val_loss: 0.7114 - val_auc: 0.6536 - 2s/epoch - 66ms/step
Epoch 6/10000
32/32 - 2s - loss: 0.0383 - auc: 0.6518 - val_loss: 0.7087 - val_auc: 0.6877 - 2s/epoch - 51ms/step
Epoch 7/10000
32/32 - 2s - loss: 0.0382 - auc: 0.6541 - val_loss: 0.7052 - val_auc: 0.6929 - 2s/epoch - 52ms/step
Epoch 8/10000
32/32 - 2s -

  saving_api.save_model(


Test AUC: 0.7419341802597046
Val AUC: 0.7653608322143555

SIZE:5769
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
73/73 - 3s - loss: 0.0393 - auc: 0.4920 - val_loss: 0.7188 - val_auc: 0.4792 - 3s/epoch - 42ms/step
Epoch 2/10000
73/73 - 3s - loss: 0.0391 - auc: 0.5221 - val_loss: 0.7133 - val_auc: 0.5658 - 3s/epoch - 39ms/step
Epoch 3/10000
73/73 - 3s - loss: 0.0390 - auc: 0.5572 - val_loss: 0.7094 - val_auc: 0.6188 - 3s/epoch - 39ms/step
Epoch 4/10000
73/73 - 3s - loss: 0.0389 - auc: 0.5589 - val_loss: 0.7055 - val_auc: 0.6586 - 3s/epoch - 39ms/step
Epoch 5/10000
73/73 - 3s - loss: 0.0386 - auc: 0.6087 - val_loss: 0.7013 - val_auc: 0.6834 - 3s/epoch - 39ms/step
Epoch 6/10000
73/73 - 3s - loss: 0.0385 - auc: 0.6206 - val_loss: 0.6957 - val_auc: 0.6985 - 3s/epoch - 38ms/step
Epoch 7/10000
73/73 - 3s - loss: 0.0384 - auc: 0.6147 - val_loss: 0.6921 - val_auc: 0.7031 - 3s/epoch - 39ms/step
Epoch 8/10000
73/73 - 3s -

  saving_api.save_model(


Test AUC: 0.7564898729324341
Val AUC: 0.7883272171020508



{'test': {'full': {100: 0.4326503276824951, 250: 0.6835736036300659, 500: 0.7426977157592773, 1000: 0.7451436519622803, 2500: 0.7418853640556335, 5769: 0.7533736824989319}, 'linear_probe': {100: 0.5167486667633057, 250: 0.6920478343963623, 500: 0.7342502474784851, 1000: 0.7458006143569946, 2500: 0.7419341802597046, 5769: 0.7564898729324341}}, 'val': {'full': {100: 0.736842155456543, 250: 1.0, 500: 0.8453608751296997, 1000: 0.8217353820800781, 2500: 0.7638487815856934, 5769: 0.7875000238418579}, 'linear_probe': {100: 0.8157894611358643, 250: 1.0, 500: 0.8522337079048157, 1000: 0.8187285661697388, 2500: 0.7653608322143555, 5769: 0.7883272171020508}}}
