# Finetuning

Trained with Google TPU v2

#Set Up

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# @title Importing

#Installs
!pip install pyarrow fastparquet

# Packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf
import os
from IPython.display import clear_output

# Sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

# Keras
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
#from keras.layers.embeddings import Embedding
from keras.metrics import AUC

# Tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import random

Collecting fastparquet
  Downloading fastparquet-2024.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Downloading fastparquet-2024.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cramjam-2.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m875.2 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cramjam, fastparquet
Successfully installed cramjam-2.8.3 fastparquet-2024.5.0


In [None]:
#@title Random Seeds
import random
## SEEDS

# Hard Code Random Seeds.
r1 = 0
r2 = 1

# Set Random Seed
random.seed(r1)
tf.random.set_seed(r2)

In [None]:
#@title Connect to TPU
print("TensorFlow version:", tf.__version__)

# Connect to the TPU cluster or fall back to CPU/GPU
try:
  resolver = tf.distribute.cluster_resolver.TPUClusterResolver()  # Tries to connect to the TPU
  tf.config.experimental_connect_to_cluster(resolver)
  tf.tpu.experimental.initialize_tpu_system(resolver)
  strategy = tf.distribute.TPUStrategy(resolver)
  devices = tf.config.list_logical_devices('TPU')
  print('TPU devices:', devices)
except ValueError:
  print("Could not connect to TPU; using CPU/GPU strategy instead.")
  strategy = tf.distribute.get_strategy()

# Example computation using the strategy
with strategy.scope():
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

  @tf.function
  def matmul_fn(x, y):
    return tf.matmul(x, y)

  z = strategy.run(matmul_fn, args=(a, b))

print(z)

TensorFlow version: 2.15.0
TPU devices: [LogicalDevice(name='/device:TPU:0', device_type='TPU'), LogicalDevice(name='/device:TPU:1', device_type='TPU'), LogicalDevice(name='/device:TPU:2', device_type='TPU'), LogicalDevice(name='/device:TPU:3', device_type='TPU'), LogicalDevice(name='/device:TPU:4', device_type='TPU'), LogicalDevice(name='/device:TPU:5', device_type='TPU'), LogicalDevice(name='/device:TPU:6', device_type='TPU'), LogicalDevice(name='/device:TPU:7', device_type='TPU')]
PerReplica:{
  0: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  1: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  2: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  3: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  4: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  5: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  6: tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32),
  7: tf.

# Hyperparameters & Settings

In [None]:
# write where you want to save all your files and retrieve encoder
root = "/content/drive/MyDrive/ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Conv Finetuning/Models"
encoder_root = "/content/drive/MyDrive/ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Conv Pretraining/Encoders"
results_root = "/content/drive/MyDrive/ActigraphyTransformer/A-NEW/ALBERT Experiments /ALBERT Conv Finetuning/Results"

In [None]:
"""
Please Fill out Parameters Below
"""
## Model size
# eg. ["small", "medium", "large", "huge"]
size = "medium"

## Mask ratio
# eg. [.25, .50, .75]
mask_ratio = 0.90

## Smoothing
# eg. [True, False]
smoothing = False

## Loss Function
# eg. [True, False], meaning MSE on only the masked portion or everything in the reconstruction
mse_only_masked = False

In [None]:
"""
Finetuning Specific Settings
"""

## Finetuning Styles
# Add more if needed

finetuning_styles = ["full", "linear_probe"]

In [None]:
# Encoder naming
mask_name = int(mask_ratio*100)

encoder_name = f"/conv_encoder_{size}_{mask_name}"

if smoothing == True:
  encoder_name = f"{encoder_name}_smoothed"
else:
  encoder_name = f"{encoder_name}_unsmoothed"

if mse_only_masked == True:
  encoder_name = f"{encoder_name}_mse_only_masked.h5"
else:
  encoder_name = f"{encoder_name}_mse_all.h5"

print(encoder_name)

/conv_encoder_medium_90_unsmoothed_mse_all.h5


In [None]:
# Start of finetuning name
ft_name = f"/conv_AcT_{size}_{mask_name}"

if smoothing == True:
  ft_name = f"{ft_name}_smoothed"
else:
  ft_name = f"{ft_name}_unsmoothed"

if mse_only_masked == True:
  ft_name = f"{ft_name}_mse_only_masked"
else:
  ft_name = f"{ft_name}_mse_all"

print(ft_name)

/conv_AcT_medium_90_unsmoothed_mse_all


In [None]:
encoder_path = encoder_root + encoder_name

# Hyperparameter Additional Info

In [None]:
"""
Model Size
"""
## Model Size
if size == "small":

  patch_size = 18
  embed_dim = 96
  # encoder
  encoder_num_heads = 6
  encoder_ff_dim = 256
  encoder_num_layers = 1
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 6
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "medium":

  patch_size = 18
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 2
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "large":

  patch_size = 9
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 4
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

if size == "huge":

  patch_size = 5
  embed_dim = 96
  # encoder
  encoder_num_heads = 12
  encoder_ff_dim = 256
  encoder_num_layers = 8
  encoder_rate = 0.1
  # decoder
  decoder_num_heads = 12
  decoder_ff_dim = 256
  decoder_num_layers = 1
  decoder_rate = 0.1

In [None]:
"""
For Finetuning
"""

## Model Size
if size == "small":

  learning_rate = 0.00001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-6

if size == "medium":

  learning_rate = 0.00001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-6

if size == "large":

  learning_rate = 0.000001
  early_stopping_patience = 250

  reduce_lr_patience = 75
  min_lr = 1e-7


if size == "huge":

  learning_rate = 0.0000005
  early_stopping_patience = 250

  reduce_lr_patience = 100
  min_lr = 1e-8

In [None]:
"""
Smoothing
"""
if smoothing == True:
  data_folder_location = "/content/drive/MyDrive/ActigraphyTransformer/A-NEW/Baseline Tests/Data_2013/All_Meds/Smooth/TestSize2000_set1"

else:
  data_folder_location = "/content/drive/MyDrive/ActigraphyTransformer/A-NEW/Baseline Tests/Data_2013/All_Meds/Raw/TestSize2000_set1"

# Process Data

In [None]:
# Which sizes to look at
train_sizes = [100, 250, 500, 1000, 2500, 5769] # for ALBERT hyperparameter tuning, we can test on less datasets
test_size = 2000 # fixed

In [None]:
# first save the test sets
X_test = np.load(os.path.join(data_folder_location, f'X_test_{test_size}.npy'))
y_test = np.load(os.path.join(data_folder_location, f'y_test_{test_size}.npy'))


# Scale the test set
scaler = StandardScaler()
scaler.fit(X_test)
X_test = scaler.transform(X_test)

In [None]:
print(X_test.shape)
print(y_test.shape)

(2000, 10080)
(2000, 1)


In [None]:
train_sets = {}
val_sets = {}

In [None]:
for size in train_sizes:
  X_train = np.load(os.path.join(data_folder_location, f'X_train_{size}.npy'))
  y_train = np.load(os.path.join(data_folder_location, f'y_train_{size}.npy'))
  train_sets[size] = (X_train, y_train)

  X_val = np.load(os.path.join(data_folder_location, f'X_val_{size}.npy'))
  y_val = np.load(os.path.join(data_folder_location, f'y_val_{size}.npy'))
  val_sets[size] = (X_val, y_val)

print("Data loaded successfully.")
print(f"Train set size: {len(train_sets)}")
print(f"Val set size: {len(val_sets)}")

Data loaded successfully.
Train set size: 6
Val set size: 6


In [None]:
for key, value in train_sets.items():
  print(f"For train size {key}: ")

  # print the shapes of X train and y train
  print(f"X train shape: {value[0].shape}")
  print(f"y train shape: {value[1].shape}")

  # also print the shapes of X val and y val
  print(f"X val shape: {val_sets[key][0].shape}")
  print(f"y val shape: {val_sets[key][1].shape}")

  print("================================")

For train size 100: 
X train shape: (80, 10080)
y train shape: (80, 1)
X val shape: (20, 10080)
y val shape: (20, 1)
For train size 250: 
X train shape: (200, 10080)
y train shape: (200, 1)
X val shape: (50, 10080)
y val shape: (50, 1)
For train size 500: 
X train shape: (400, 10080)
y train shape: (400, 1)
X val shape: (100, 10080)
y val shape: (100, 1)
For train size 1000: 
X train shape: (800, 10080)
y train shape: (800, 1)
X val shape: (200, 10080)
y val shape: (200, 1)
For train size 2500: 
X train shape: (2000, 10080)
y train shape: (2000, 1)
X val shape: (500, 10080)
y val shape: (500, 1)
For train size 5769: 
X train shape: (4615, 10080)
y train shape: (4615, 1)
X val shape: (1154, 10080)
y val shape: (1154, 1)


# Wait for later

# LOAD ALBERT


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, Model

# Modified Transformer Block to output attention weights with explicit layer names (otherwise the same as the )
def TransformerBlock(embed_dim, num_heads, ff_dim, rate=0.1, name_prefix="encoder"):
    input_layer = layers.Input(shape=(None, embed_dim), name=f"{name_prefix}_input")
    attention_layer = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=f"{name_prefix}_attention")
    attention_output, attention_weights = attention_layer(input_layer, input_layer, return_attention_scores=True)
    attention_output = layers.Dropout(rate, name=f"{name_prefix}_dropout")(attention_output)
    out1 = layers.LayerNormalization(epsilon=1e-6, name=f"{name_prefix}_norm1")(input_layer + attention_output)
    ff_output = layers.Dense(ff_dim, activation="relu", name=f"{name_prefix}_ff1")(out1)
    ff_output = layers.Dense(embed_dim, name=f"{name_prefix}_ff2")(ff_output)
    ff_output = layers.Dropout(rate, name=f"{name_prefix}_dropout2")(ff_output)
    final_output = layers.LayerNormalization(epsilon=1e-6, name=f"{name_prefix}_norm2")(out1 + ff_output)
    return models.Model(inputs=input_layer, outputs=[final_output, attention_weights], name=f"{name_prefix}_transformer")

# Sine/Cosine positional embeddings
def get_positional_embeddings(num_patches, embed_dim):
    position = tf.range(num_patches, dtype=tf.float32)[:, tf.newaxis]
    div_term = tf.exp(tf.range(0, embed_dim, 2, dtype=tf.float32) * (-tf.math.log(10000.0) / embed_dim))
    pos_embeddings = tf.concat([tf.sin(position * div_term), tf.cos(position * div_term)], axis=-1)
    return pos_embeddings


In [None]:
# Function to load the encoder and build the fine-tuning model with consistent patching and positional embedding
def create_finetuning_model(encoder_path=encoder_path, input_size=10080, patch_size=patch_size, embed_dim=embed_dim, return_attention=False):

    # Load the saved encoder model
    encoder_model = tf.keras.models.load_model(encoder_path, custom_objects={'TransformerBlock': TransformerBlock, 'get_positional_embeddings': get_positional_embeddings})

    # Define new inputs for the fine-tuning model
    inputs = layers.Input(shape=(input_size,), name="finetuning_inputs")

    # Get encoder outputs
    encoder_outputs = encoder_model(inputs)
    encoder_outputs, attention_weights = encoder_outputs[0], encoder_outputs[1:]

    # Pass through a GlobalAveragePooling layer
    x = layers.GlobalAveragePooling1D(name="global_avg_pool")(encoder_outputs)
    x = layers.Dropout(0.1, name="dropout")(x)
    x = layers.Dense(128, activation='relu', name="dense_128")(x)
    outputs = layers.Dense(1, activation="sigmoid", name="output")(x)

    # Include attention weights in the final model outputs if requested
    if return_attention:
        outputs = [outputs] + attention_weights

    # Create and return the fine-tuning model
    finetuning_model = models.Model(inputs=inputs, outputs=outputs, name="finetuning_model")
    return finetuning_model


## Compiling

In [None]:
# Compile the model -----
with strategy.scope():
  train_model = create_finetuning_model(return_attention=False)
  train_model.compile(
    # Metrics
    loss= tf.keras.losses.BinaryCrossentropy(from_logits=False),
    metrics= tf.keras.metrics.AUC(name='auc'),
    # Optimizer
    optimizer= tf.keras.optimizers.Adam(
      learning_rate=learning_rate,
      beta_1=0.9,
      beta_2=0.999,
      epsilon=1e-07,
      amsgrad=False
))

# Save the original model weights
train_model.save_weights('original_model_weights.h5')

train_model.summary()



Model: "finetuning_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 finetuning_inputs (InputLa  [(None, 10080)]           0         
 yer)                                                            
                                                                 
 encoder_model (Functional)  [(None, 560, 96),         992000    
                              (None, 12, 560, 560),              
                              (None, 12, 560, 560)]              
                                                                 
 global_avg_pool (GlobalAve  (None, 96)                0         
 ragePooling1D)                                                  
                                                                 
 dropout (Dropout)           (None, 96)                0         
                                                                 
 dense_128 (Dense)           (None, 128)          

# Training

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# # reduce learning rate (Don't use this it's buggy)
# reduce_lr = ReduceLROnPlateau(
#     monitor='val_loss',    # Monitor validation loss
#     factor=0.5,            # Reduce rate by a factor of 0.5
#     patience=75,           # Number of epochs with no improvement after which learning rate will be reduced
#     min_lr=1e-6,           # Minimum learning rate that the reduction can reach
#     verbose=1              # Print messages when reducing the learning rate
# )

# earlyStopping callback
early_stopper = EarlyStopping(
    monitor='val_auc',  # monitor validation AUC
    mode='max',  # maximize AUC
    patience=early_stopping_patience,  # number of epochs with no improvement after which training will be stopped
    verbose=1,  # display messages when early stopping is triggered
    restore_best_weights=True  # restore model weights from the epoch with the best value of the monitored quantity
)

In [None]:
# Training Loop

scores = {}
scores["test"] = {}
scores["val"] = {}

for finetuning_style in finetuning_styles:
  print(f"\nFinetuning Style: {finetuning_style}")

  scores["test"][finetuning_style] = {}
  scores["val"][finetuning_style] = {}

  for size in train_sizes:

    print(f"\nSIZE:{size}")


    # Load X_train and fit
    X_train, y_train = train_sets[size]
    train_scalar = StandardScaler()
    train_scalar.fit(X_train)
    X_train = train_scalar.transform(X_train)

    # Load X_val and fit
    X_val, y_val = val_sets[size]
    val_scalar = StandardScaler()
    val_scalar.fit(X_val)
    X_val = val_scalar.transform(X_val)

    # Set Class Weights = Balance
    class1 = sum(y_train)
    total = len(y_train)
    class0 = total-class1

    class_weights = {0: (class1/total),
                  1: ((class0/total))}

    # Reset model weights
    train_model.load_weights('original_model_weights.h5')

    if finetuning_style == "linear_probe":
      for layer in train_model.layers:
        if layer.name == "encoder_model":
            layer.trainable = False

    if finetuning_style == "full":
      for layer in train_model.layers:
        layer.trainable = True

    print(" ")
    # Verify by printing each layer's name and trainable status
    for layer in train_model.layers:
        print(layer.name, layer.trainable)
    print(" ")

    # Train model
    history = train_model.fit(
        X_train, y_train,
        epochs= 10000, # Edit
        batch_size= 64,
        validation_data = (X_val, y_val),
        shuffle=False,
        class_weight=class_weights,
        callbacks = [early_stopper],
        verbose = 2)

    # Save model
    current_model_name = f"{ft_name}_n{size}_{finetuning_style}.h5"
    print(current_model_name)
    train_model.save(root+current_model_name)

    # Test model
    test_scores = train_model.evaluate(X_test, y_test, batch_size=64) # Test Set
    scores["test"][finetuning_style][size] = test_scores[1]
    print("Test AUC:", test_scores[1])

    val_scores = train_model.evaluate(X_val, y_val, batch_size=64) # Val Set

    scores["val"][finetuning_style][size] = val_scores[1]
    print("Val AUC:", val_scores[1])

# Save all results in a .txt
print("\n\n")
print(scores)

results_path = f"{results_root}{ft_name}_RESULTS.txt"

try:
    file_to_write = open(results_path, 'wt')
    file_to_write.write(str(scores))
    file_to_write.close()

except:
    print("Unable to write to file")


Finetuning Style: full

SIZE:100
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
2/2 - 22s - loss: 0.0347 - auc: 0.5417 - val_loss: 0.5127 - val_auc: 0.2368 - 22s/epoch - 11s/step
Epoch 2/10000
2/2 - 1s - loss: 0.0365 - auc: 0.2147 - val_loss: 0.5125 - val_auc: 0.2368 - 627ms/epoch - 314ms/step
Epoch 3/10000
2/2 - 1s - loss: 0.0389 - auc: 0.0064 - val_loss: 0.5125 - val_auc: 0.2368 - 658ms/epoch - 329ms/step
Epoch 4/10000
2/2 - 1s - loss: 0.0352 - auc: 0.4295 - val_loss: 0.5125 - val_auc: 0.2368 - 617ms/epoch - 308ms/step
Epoch 5/10000
2/2 - 1s - loss: 0.0371 - auc: 0.1314 - val_loss: 0.5123 - val_auc: 0.2368 - 612ms/epoch - 306ms/step
Epoch 6/10000
2/2 - 1s - loss: 0.0370 - auc: 0.1699 - val_loss: 0.5123 - val_auc: 0.2368 - 634ms/epoch - 317ms/step
Epoch 7/10000
2/2 - 1s - loss: 0.0379 - auc: 0.0545 - val_loss: 0.5122 - val_auc: 0.2368 - 614ms/epoch - 307ms/step
Epoch 8/10000
2/2 - 1s - loss: 0.0337 - auc: 0.657

  saving_api.save_model(


Test AUC: 0.3701525330543518
Val AUC: 0.5526315569877625

SIZE:250
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
4/4 - 3s - loss: 0.0435 - auc: 0.2320 - val_loss: 0.5037 - val_auc: 0.0204 - 3s/epoch - 815ms/step
Epoch 2/10000
4/4 - 1s - loss: 0.0437 - auc: 0.2642 - val_loss: 0.5035 - val_auc: 0.0102 - 682ms/epoch - 171ms/step
Epoch 3/10000
4/4 - 1s - loss: 0.0437 - auc: 0.2221 - val_loss: 0.5037 - val_auc: 0.0102 - 666ms/epoch - 167ms/step
Epoch 4/10000
4/4 - 1s - loss: 0.0439 - auc: 0.2002 - val_loss: 0.5041 - val_auc: 0.0102 - 673ms/epoch - 168ms/step
Epoch 5/10000
4/4 - 1s - loss: 0.0439 - auc: 0.1881 - val_loss: 0.5046 - val_auc: 0.0408 - 724ms/epoch - 181ms/step
Epoch 6/10000
4/4 - 1s - loss: 0.0434 - auc: 0.2646 - val_loss: 0.5054 - val_auc: 0.0102 - 687ms/epoch - 172ms/step
Epoch 7/10000
4/4 - 1s - loss: 0.0439 - auc: 0.2178 - val_loss: 0.5062 - val_auc: 0.0102 - 685ms/epoch - 171ms/step
Epoch 8/10000
4/4

  saving_api.save_model(


Test AUC: 0.7137060761451721
Val AUC: 0.9795919060707092

SIZE:500
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
7/7 - 3s - loss: 0.0433 - auc: 0.2908 - val_loss: 0.5149 - val_auc: 0.1220 - 3s/epoch - 483ms/step
Epoch 2/10000
7/7 - 1s - loss: 0.0431 - auc: 0.3039 - val_loss: 0.5191 - val_auc: 0.1168 - 797ms/epoch - 114ms/step
Epoch 3/10000
7/7 - 1s - loss: 0.0430 - auc: 0.2797 - val_loss: 0.5248 - val_auc: 0.1254 - 834ms/epoch - 119ms/step
Epoch 4/10000
7/7 - 1s - loss: 0.0428 - auc: 0.3154 - val_loss: 0.5308 - val_auc: 0.1289 - 844ms/epoch - 121ms/step
Epoch 5/10000
7/7 - 1s - loss: 0.0427 - auc: 0.3406 - val_loss: 0.5365 - val_auc: 0.1323 - 849ms/epoch - 121ms/step
Epoch 6/10000
7/7 - 1s - loss: 0.0420 - auc: 0.3894 - val_loss: 0.5422 - val_auc: 0.1546 - 834ms/epoch - 119ms/step
Epoch 7/10000
7/7 - 1s - loss: 0.0417 - auc: 0.4283 - val_loss: 0.5476 - val_auc: 0.1460 - 801ms/epoch - 114ms/step
Epoch 8/10000
7/7

  saving_api.save_model(


Test AUC: 0.7531517148017883
Val AUC: 0.8281787633895874

SIZE:1000
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
13/13 - 6s - loss: 0.0413 - auc: 0.3031 - val_loss: 0.5182 - val_auc: 0.4055 - 6s/epoch - 459ms/step
Epoch 2/10000
13/13 - 1s - loss: 0.0404 - auc: 0.4388 - val_loss: 0.5242 - val_auc: 0.4231 - 1s/epoch - 79ms/step
Epoch 3/10000
13/13 - 1s - loss: 0.0412 - auc: 0.2970 - val_loss: 0.5315 - val_auc: 0.4570 - 1s/epoch - 79ms/step
Epoch 4/10000
13/13 - 1s - loss: 0.0411 - auc: 0.2880 - val_loss: 0.5392 - val_auc: 0.4738 - 1s/epoch - 80ms/step
Epoch 5/10000
13/13 - 1s - loss: 0.0402 - auc: 0.4083 - val_loss: 0.5456 - val_auc: 0.5052 - 1s/epoch - 80ms/step
Epoch 6/10000
13/13 - 1s - loss: 0.0407 - auc: 0.3226 - val_loss: 0.5515 - val_auc: 0.5511 - 1s/epoch - 78ms/step
Epoch 7/10000
13/13 - 1s - loss: 0.0398 - auc: 0.4461 - val_loss: 0.5571 - val_auc: 0.5631 - 991ms/epoch - 76ms/step
Epoch 8/10000
13/13 - 1

  saving_api.save_model(


Test AUC: 0.756232500076294
Val AUC: 0.9179553985595703

SIZE:2500
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
32/32 - 2s - loss: 0.0406 - auc: 0.4433 - val_loss: 0.5256 - val_auc: 0.3195 - 2s/epoch - 55ms/step
Epoch 2/10000
32/32 - 2s - loss: 0.0400 - auc: 0.4638 - val_loss: 0.5430 - val_auc: 0.3460 - 2s/epoch - 50ms/step
Epoch 3/10000
32/32 - 2s - loss: 0.0402 - auc: 0.4467 - val_loss: 0.5583 - val_auc: 0.3795 - 2s/epoch - 51ms/step
Epoch 4/10000
32/32 - 2s - loss: 0.0396 - auc: 0.4967 - val_loss: 0.5722 - val_auc: 0.4140 - 2s/epoch - 50ms/step
Epoch 5/10000
32/32 - 2s - loss: 0.0395 - auc: 0.4903 - val_loss: 0.5831 - val_auc: 0.4496 - 2s/epoch - 50ms/step
Epoch 6/10000
32/32 - 2s - loss: 0.0395 - auc: 0.4895 - val_loss: 0.5940 - val_auc: 0.4869 - 2s/epoch - 50ms/step
Epoch 7/10000
32/32 - 2s - loss: 0.0388 - auc: 0.5869 - val_loss: 0.6021 - val_auc: 0.5083 - 2s/epoch - 52ms/step
Epoch 8/10000
32/32 - 2s - l

  saving_api.save_model(


Test AUC: 0.7598503232002258
Val AUC: 0.8002749085426331

SIZE:5769
 
finetuning_inputs True
encoder_model True
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
73/73 - 3s - loss: 0.0406 - auc: 0.4117 - val_loss: 0.5609 - val_auc: 0.3660 - 3s/epoch - 42ms/step
Epoch 2/10000
73/73 - 3s - loss: 0.0400 - auc: 0.4240 - val_loss: 0.5937 - val_auc: 0.4465 - 3s/epoch - 40ms/step
Epoch 3/10000
73/73 - 3s - loss: 0.0392 - auc: 0.5254 - val_loss: 0.6141 - val_auc: 0.5035 - 3s/epoch - 40ms/step
Epoch 4/10000
73/73 - 3s - loss: 0.0392 - auc: 0.5254 - val_loss: 0.6313 - val_auc: 0.5693 - 3s/epoch - 40ms/step
Epoch 5/10000
73/73 - 3s - loss: 0.0389 - auc: 0.5482 - val_loss: 0.6430 - val_auc: 0.6146 - 3s/epoch - 40ms/step
Epoch 6/10000
73/73 - 3s - loss: 0.0387 - auc: 0.5909 - val_loss: 0.6514 - val_auc: 0.6487 - 3s/epoch - 39ms/step
Epoch 7/10000
73/73 - 3s - loss: 0.0386 - auc: 0.6122 - val_loss: 0.6582 - val_auc: 0.6725 - 3s/epoch - 40ms/step
Epoch 8/10000
73/73 - 3s - 

  saving_api.save_model(


Test AUC: 0.7734871506690979
Val AUC: 0.8171349763870239

Finetuning Style: linear_probe

SIZE:100
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
2/2 - 1s - loss: 0.0353 - auc: 0.5000 - val_loss: 0.5124 - val_auc: 0.2368 - 775ms/epoch - 388ms/step
Epoch 2/10000
2/2 - 1s - loss: 0.0354 - auc: 0.3974 - val_loss: 0.5126 - val_auc: 0.2368 - 616ms/epoch - 308ms/step
Epoch 3/10000
2/2 - 1s - loss: 0.0378 - auc: 0.0609 - val_loss: 0.5128 - val_auc: 0.2368 - 634ms/epoch - 317ms/step
Epoch 4/10000
2/2 - 1s - loss: 0.0370 - auc: 0.1635 - val_loss: 0.5130 - val_auc: 0.2368 - 636ms/epoch - 318ms/step
Epoch 5/10000
2/2 - 1s - loss: 0.0376 - auc: 0.1859 - val_loss: 0.5130 - val_auc: 0.2368 - 631ms/epoch - 315ms/step
Epoch 6/10000
2/2 - 1s - loss: 0.0363 - auc: 0.3237 - val_loss: 0.5131 - val_auc: 0.2368 - 637ms/epoch - 318ms/step
Epoch 7/10000
2/2 - 1s - loss: 0.0377 - auc: 0.1186 - val_loss: 0.5132 - val_auc: 0.2368 - 637ms/

  saving_api.save_model(


Test AUC: 0.7269301414489746
Val AUC: 0.6315789818763733

SIZE:250
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
4/4 - 1s - loss: 0.0444 - auc: 0.0924 - val_loss: 0.5039 - val_auc: 0.0204 - 834ms/epoch - 209ms/step
Epoch 2/10000
4/4 - 1s - loss: 0.0433 - auc: 0.2921 - val_loss: 0.5041 - val_auc: 0.0102 - 687ms/epoch - 172ms/step
Epoch 3/10000
4/4 - 1s - loss: 0.0437 - auc: 0.2423 - val_loss: 0.5044 - val_auc: 0.0102 - 670ms/epoch - 167ms/step
Epoch 4/10000
4/4 - 1s - loss: 0.0454 - auc: 0.1181 - val_loss: 0.5051 - val_auc: 0.0102 - 674ms/epoch - 168ms/step
Epoch 5/10000
4/4 - 1s - loss: 0.0443 - auc: 0.1430 - val_loss: 0.5057 - val_auc: 0.0102 - 684ms/epoch - 171ms/step
Epoch 6/10000
4/4 - 1s - loss: 0.0431 - auc: 0.3067 - val_loss: 0.5065 - val_auc: 0.0204 - 669ms/epoch - 167ms/step
Epoch 7/10000
4/4 - 1s - loss: 0.0421 - auc: 0.3956 - val_loss: 0.5073 - val_auc: 0.0102 - 671ms/epoch - 168ms/step
Epoch 8/10000

  saving_api.save_model(


Test AUC: 0.7068299651145935
Val AUC: 1.0

SIZE:500
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
7/7 - 1s - loss: 0.0423 - auc: 0.4185 - val_loss: 0.5145 - val_auc: 0.1220 - 991ms/epoch - 142ms/step
Epoch 2/10000
7/7 - 1s - loss: 0.0425 - auc: 0.3870 - val_loss: 0.5181 - val_auc: 0.1254 - 840ms/epoch - 120ms/step
Epoch 3/10000
7/7 - 1s - loss: 0.0426 - auc: 0.3651 - val_loss: 0.5229 - val_auc: 0.1134 - 803ms/epoch - 115ms/step
Epoch 4/10000
7/7 - 1s - loss: 0.0426 - auc: 0.3032 - val_loss: 0.5286 - val_auc: 0.1186 - 799ms/epoch - 114ms/step
Epoch 5/10000
7/7 - 1s - loss: 0.0425 - auc: 0.3271 - val_loss: 0.5339 - val_auc: 0.1237 - 843ms/epoch - 120ms/step
Epoch 6/10000
7/7 - 1s - loss: 0.0433 - auc: 0.2348 - val_loss: 0.5389 - val_auc: 0.1306 - 840ms/epoch - 120ms/step
Epoch 7/10000
7/7 - 1s - loss: 0.0414 - auc: 0.4463 - val_loss: 0.5440 - val_auc: 0.1323 - 842ms/epoch - 120ms/step
Epoch 8/10000
7/7 - 1s - los

  saving_api.save_model(


Test AUC: 0.7526146173477173
Val AUC: 0.8350515365600586

SIZE:1000
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
13/13 - 1s - loss: 0.0416 - auc: 0.3298 - val_loss: 0.5181 - val_auc: 0.3995 - 1s/epoch - 87ms/step
Epoch 2/10000
13/13 - 1s - loss: 0.0412 - auc: 0.3049 - val_loss: 0.5248 - val_auc: 0.4334 - 1s/epoch - 78ms/step
Epoch 3/10000
13/13 - 1s - loss: 0.0409 - auc: 0.3462 - val_loss: 0.5325 - val_auc: 0.4497 - 1s/epoch - 77ms/step
Epoch 4/10000
13/13 - 1s - loss: 0.0406 - auc: 0.3710 - val_loss: 0.5392 - val_auc: 0.4747 - 1s/epoch - 79ms/step
Epoch 5/10000
13/13 - 1s - loss: 0.0402 - auc: 0.4172 - val_loss: 0.5454 - val_auc: 0.5142 - 1s/epoch - 84ms/step
Epoch 6/10000
13/13 - 1s - loss: 0.0394 - auc: 0.5105 - val_loss: 0.5509 - val_auc: 0.5326 - 1s/epoch - 80ms/step
Epoch 7/10000
13/13 - 1s - loss: 0.0402 - auc: 0.4079 - val_loss: 0.5558 - val_auc: 0.5554 - 1s/epoch - 80ms/step
Epoch 8/10000
13/13 - 1s -

  saving_api.save_model(


Test AUC: 0.7532938122749329
Val AUC: 0.9170962572097778

SIZE:2500
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
32/32 - 2s - loss: 0.0411 - auc: 0.3751 - val_loss: 0.5267 - val_auc: 0.3089 - 2s/epoch - 56ms/step
Epoch 2/10000
32/32 - 2s - loss: 0.0406 - auc: 0.4202 - val_loss: 0.5452 - val_auc: 0.3515 - 2s/epoch - 50ms/step
Epoch 3/10000
32/32 - 2s - loss: 0.0396 - auc: 0.4945 - val_loss: 0.5600 - val_auc: 0.3882 - 2s/epoch - 49ms/step
Epoch 4/10000
32/32 - 2s - loss: 0.0399 - auc: 0.4497 - val_loss: 0.5732 - val_auc: 0.4286 - 2s/epoch - 51ms/step
Epoch 5/10000
32/32 - 2s - loss: 0.0397 - auc: 0.4806 - val_loss: 0.5854 - val_auc: 0.4600 - 2s/epoch - 50ms/step
Epoch 6/10000
32/32 - 2s - loss: 0.0392 - auc: 0.5331 - val_loss: 0.5956 - val_auc: 0.4834 - 2s/epoch - 51ms/step
Epoch 7/10000
32/32 - 2s - loss: 0.0390 - auc: 0.5388 - val_loss: 0.6047 - val_auc: 0.5184 - 2s/epoch - 52ms/step
Epoch 8/10000
32/32 - 2s -

  saving_api.save_model(


Test AUC: 0.7588603496551514
Val AUC: 0.7995189428329468

SIZE:5769
 
finetuning_inputs True
encoder_model False
global_avg_pool True
dropout True
dense_128 True
output True
 
Epoch 1/10000
73/73 - 3s - loss: 0.0404 - auc: 0.4214 - val_loss: 0.5634 - val_auc: 0.3586 - 3s/epoch - 43ms/step
Epoch 2/10000
73/73 - 3s - loss: 0.0396 - auc: 0.4823 - val_loss: 0.5957 - val_auc: 0.4507 - 3s/epoch - 40ms/step
Epoch 3/10000
73/73 - 3s - loss: 0.0392 - auc: 0.5039 - val_loss: 0.6177 - val_auc: 0.5189 - 3s/epoch - 40ms/step
Epoch 4/10000
73/73 - 3s - loss: 0.0390 - auc: 0.5638 - val_loss: 0.6313 - val_auc: 0.5757 - 3s/epoch - 40ms/step
Epoch 5/10000
73/73 - 3s - loss: 0.0387 - auc: 0.5923 - val_loss: 0.6431 - val_auc: 0.6247 - 3s/epoch - 40ms/step
Epoch 6/10000
73/73 - 3s - loss: 0.0387 - auc: 0.5885 - val_loss: 0.6527 - val_auc: 0.6541 - 3s/epoch - 39ms/step
Epoch 7/10000
73/73 - 3s - loss: 0.0385 - auc: 0.6253 - val_loss: 0.6591 - val_auc: 0.6727 - 3s/epoch - 39ms/step
Epoch 8/10000
73/73 - 3s -

  saving_api.save_model(


Test AUC: 0.773167610168457
Val AUC: 0.8180803656578064



{'test': {'full': {100: 0.3701525330543518, 250: 0.7137060761451721, 500: 0.7531517148017883, 1000: 0.756232500076294, 2500: 0.7598503232002258, 5769: 0.7734871506690979}, 'linear_probe': {100: 0.7269301414489746, 250: 0.7068299651145935, 500: 0.7526146173477173, 1000: 0.7532938122749329, 2500: 0.7588603496551514, 5769: 0.773167610168457}}, 'val': {'full': {100: 0.5526315569877625, 250: 0.9795919060707092, 500: 0.8281787633895874, 1000: 0.9179553985595703, 2500: 0.8002749085426331, 5769: 0.8171349763870239}, 'linear_probe': {100: 0.6315789818763733, 250: 1.0, 500: 0.8350515365600586, 1000: 0.9170962572097778, 2500: 0.7995189428329468, 5769: 0.8180803656578064}}}
