# Diagnostics 1: Uncertainty Calibration and Selection Bias

See [here](notes/250902_next_steps.md) for more details. Essentially, I aim to run two diagnostic tests to see what's going wrong with the active learning campaign. Those are: Uncertainty calibration and selection bias. For uncertainty calibration, I'll be checking for correlation between the variances and the residuals. For selection bias, I'll be comparing the distribution of labels and predictions of actively selected versus randomly selected samples.

In [1]:
from scripts.data_utils import train_val_test_split
from scripts.config import (
    DATA_PATH, 
    SEQUENCE_COL, 
    SCORE_COL, 
    TOK_MODEL, 
    VAL_SPLIT,
    TEST_SPLIT,
    BATCH_SIZE,
    RANDOM_SEED,
)

training_pool, val_dataloader, test_dataloader = train_val_test_split(
    DATA_PATH,
    SEQUENCE_COL,
    SCORE_COL,
    TOK_MODEL,
    VAL_SPLIT,
    TEST_SPLIT,
    BATCH_SIZE,
    RANDOM_SEED
)

In [2]:
import torch
from torch.utils.data import Subset, DataLoader
from scripts.training import initialize_and_train_new_model

def run_standard_finetuning(
        n_samples,
        approach, 
        model_name,
        batch_size, 
        learning_rate, 
        weight_decay, 
        epochs, 
        training_pool, 
        val_dataloader,
        patience
        ):

    # get dataloader of random train data
    random_indices = torch.randperm(len(training_pool))[:n_samples].tolist()
    train_subset = Subset(training_pool, random_indices)
    train_dataloader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)

    # train model
    model, history = initialize_and_train_new_model(approach, model_name, learning_rate, weight_decay, epochs, train_dataloader, val_dataloader, patience, return_history=True)
    return model, train_dataloader, history

In [3]:
from pathlib import Path
import pandas as pd
import numpy as np

from scripts.acquisition import acquire_new_batch, train_bootstrapped_ensemble, get_variances
from scripts.training import test_model
from scripts.diagnostics import get_residuals, get_chosen_labels_and_preds


def get_bootstrapped_ensemble_learning_curves(
        n_samples,
        initial_n_samples,
        n_samples_per_batch,
        model_name, 
        approach,
        learning_rate, 
        weight_decay, 
        epochs, 
        training_pool, 
        train_dataloader_batch_size,
        pool_dataloader_batch_size,
        val_dataloader, 
        test_dataloader,
        patience=5,
        n_models=5,
        results_path="active_vs_standard_learning_curves.csv"
):
    results_path = Path(results_path)
    results_dir = results_path.parent
    results_dir.mkdir(parents=True, exist_ok=True)

    # Load existing results if the file exists, otherwise start with a fresh DataFrame.
    if results_path.exists():
        all_results_df = pd.read_csv(results_path)
    else:
        all_results_df = pd.DataFrame()
    
    total_pool_size = len(training_pool)
    unlabeled_indices = np.arange(total_pool_size)
    labeled_indices = np.array([], dtype=np.int64)

    ensemble_predictions = None
    current_cycle = 1
    total_cycles = int(np.ceil((n_samples-initial_n_samples)/n_samples_per_batch)) + 1
    
    while len(labeled_indices) < n_samples and len(unlabeled_indices) > 0:
        print(f"\nCycle {current_cycle}/{total_cycles}\n-------------------------------------------------")

        # on the first cycle, choose random samples of initial_n_samples size
        if ensemble_predictions is None:
            print(f"Choosing initial {initial_n_samples} samples randomly...")
            train_dataloader, pool_dataloader, labeled_indices, unlabeled_indices = acquire_new_batch(
                training_pool, train_dataloader_batch_size, pool_dataloader_batch_size, initial_n_samples, n_samples_per_batch, labeled_indices, unlabeled_indices, acquisition_scores=None
            )
        # each other time, use the n_samples_per_batch with acquisition scores to select
        else:
            scores = get_variances(ensemble_predictions, f"{str(results_dir)}/variances/variances{current_cycle}.csv")
            print(f"Selecting new data points...")
            train_dataloader, pool_dataloader, labeled_indices, unlabeled_indices = acquire_new_batch(
                training_pool, train_dataloader_batch_size, pool_dataloader_batch_size, initial_n_samples, n_samples_per_batch, labeled_indices, unlabeled_indices, acquisition_scores=scores
            )
        
        # give message when loop ends
        if len(unlabeled_indices) == 0:
            print("Unlabeled pool is empty. Proceeding to final model training.")
            break
        
        # evaluate active vs standard
        final_results = []

        # active
        print(f"\nTraining and evaluating model using {len(labeled_indices)} actively selected samples...")
        model_active = initialize_and_train_new_model(approach, model_name, learning_rate, weight_decay, epochs, train_dataloader, val_dataloader, patience, return_history=False)
        results_active = test_model(model_active, test_dataloader, return_results=True)
        results_active = {
            'changing_var': 'n_samples',
            'local_exp_idx': current_cycle-1,
            'value': len(labeled_indices),
            'training_method': 'active',
            **results_active
        }
        final_results.append(results_active)

        # run diagnostics
        get_residuals(model_active, pool_dataloader, f"{str(results_dir)}/residuals/residuals{current_cycle}.csv")
        get_chosen_labels_and_preds(model_active, train_dataloader, f"{str(results_dir)}/labels_and_preds/active_labels_and_preds{current_cycle}.csv")

        # standard
        print(f"\nTraining and evaluating model using {len(labeled_indices)} randomly selected samples...")
        model_standard, standard_train_dataloader, _ = run_standard_finetuning(len(labeled_indices), approach, model_name, train_dataloader_batch_size, learning_rate, weight_decay, epochs, training_pool, val_dataloader, patience)
        results_standard = test_model(model_standard, test_dataloader, return_results=True)
        results_standard = {
            'changing_var': 'n_samples',
            'local_exp_idx': current_cycle-1,
            'value': len(labeled_indices),
            'training_method': 'standard',
            **results_standard
        }
        final_results.append(results_standard)

        get_chosen_labels_and_preds(model_standard, standard_train_dataloader, f"{str(results_dir)}/labels_and_preds/standard_labels_and_preds{current_cycle}.csv")
        
        # save to disk each time to save progress
        results_df = pd.DataFrame(final_results)
        all_results_df = pd.concat([all_results_df, results_df], ignore_index=True)
        all_results_df.to_csv(results_path, index=False)
        print(f"Progress for experiment {current_cycle-1} appended to {results_path}")

        # if it's the last cycle, skip ensemble predictions
        if (current_cycle == total_cycles):
            print("Experiments complete.")
            break

        print("Starting ensemble training and pool evaluation...")
        ensemble_predictions = train_bootstrapped_ensemble(n_models, model_name, approach, learning_rate, weight_decay, epochs, labeled_indices, train_dataloader_batch_size, training_pool, pool_dataloader, val_dataloader, patience)
    
        current_cycle += 1
    return all_results_df

In [5]:
from scripts.config import (
    MODEL_NAME,
    APPROACH,
    LEARNING_RATE,
    WEIGHT_DECAY,
    EPOCHS,
    POOL_BATCH_SIZE,
    PATIENCE,
    N_MODELS,
)

get_bootstrapped_ensemble_learning_curves(
    n_samples=256,
    initial_n_samples=16,
    n_samples_per_batch=16,
    model_name=MODEL_NAME,
    approach=APPROACH,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    epochs=EPOCHS,
    training_pool=training_pool,
    train_dataloader_batch_size=BATCH_SIZE,
    pool_dataloader_batch_size=POOL_BATCH_SIZE,
    val_dataloader=val_dataloader,
    test_dataloader=test_dataloader,
    patience=PATIENCE,
    n_models=N_MODELS,
    results_path='results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv'
)




Cycle 1/16
-------------------------------------------------
Choosing initial 16 samples randomly...

Training and evaluating model using 16 actively selected samples...


[Training]:  64%|██████▍   | 32/50 [00:09<00:05,  3.55it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0134 | Val Loss: 0.2605 | SpearmanR: 0.2480


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 131.35it/s]
[Getting Residuals]: 100%|██████████| 25/25 [00:01<00:00, 16.88it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals1.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 1/1 [00:00<00:00, 100.66it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds1.csv...

Training and evaluating model using 16 randomly selected samples...


[Training]: 100%|██████████| 50/50 [00:14<00:00,  3.44it/s]


Train Loss: 0.0021 | Val Loss: 0.2431 | SpearmanR: 0.2572


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.76it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 1/1 [00:00<00:00, 123.21it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds1.csv...
Progress for experiment 0 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  54%|█████▍    | 27/50 [00:07<00:06,  3.70it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.1688 | Val Loss: 0.2561 | SpearmanR: 0.0190


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.84it/s]



Training Model 2...


[Training]:  64%|██████▍   | 32/50 [00:08<00:04,  3.70it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0131 | Val Loss: 0.4167 | SpearmanR: 0.0428


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.75it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:13<00:00,  3.80it/s]


Train Loss: 0.0062 | Val Loss: 0.3480 | SpearmanR: 0.3327


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.89it/s]



Training Model 4...


[Training]:  20%|██        | 10/50 [00:02<00:09,  4.17it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.3056 | Val Loss: 0.2037 | SpearmanR: 0.0665


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.85it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:13<00:00,  3.73it/s]


Train Loss: 0.0023 | Val Loss: 0.2755 | SpearmanR: 0.2665


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.78it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 2/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances2.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 32 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:13<00:00,  3.61it/s]


Train Loss: 0.0060 | Val Loss: 0.2612 | SpearmanR: 0.3460


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.87it/s]
[Getting Residuals]: 100%|██████████| 25/25 [00:01<00:00, 16.83it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals2.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 2/2 [00:00<00:00, 95.85it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds2.csv...

Training and evaluating model using 32 randomly selected samples...


[Training]:  76%|███████▌  | 38/50 [00:12<00:04,  2.98it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0175 | Val Loss: 0.1699 | SpearmanR: 0.4390


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 130.58it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 2/2 [00:00<00:00, 125.57it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds2.csv...
Progress for experiment 1 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]: 100%|██████████| 50/50 [00:13<00:00,  3.66it/s]


Train Loss: 0.0117 | Val Loss: 0.2735 | SpearmanR: 0.1898


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.73it/s]



Training Model 2...


[Training]: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s]


Train Loss: 0.0124 | Val Loss: 0.2066 | SpearmanR: 0.3367


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.91it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s]


Train Loss: 0.0033 | Val Loss: 0.1855 | SpearmanR: 0.4183


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.82it/s]



Training Model 4...


[Training]:  66%|██████▌   | 33/50 [00:08<00:04,  4.07it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0241 | Val Loss: 0.1890 | SpearmanR: 0.1846


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.94it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:12<00:00,  3.95it/s]


Train Loss: 0.0115 | Val Loss: 0.2327 | SpearmanR: 0.3117


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.65it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 3/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances3.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 48 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:14<00:00,  3.46it/s]


Train Loss: 0.0132 | Val Loss: 0.1903 | SpearmanR: 0.4163


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.27it/s]
[Getting Residuals]: 100%|██████████| 25/25 [00:01<00:00, 16.80it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals3.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 3/3 [00:00<00:00, 99.20it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds3.csv...

Training and evaluating model using 48 randomly selected samples...


[Training]: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Train Loss: 0.0264 | Val Loss: 0.1599 | SpearmanR: 0.5228


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.99it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 3/3 [00:00<00:00, 127.63it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds3.csv...
Progress for experiment 2 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  74%|███████▍  | 37/50 [00:11<00:03,  3.34it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0248 | Val Loss: 0.2239 | SpearmanR: 0.2771


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.97it/s]



Training Model 2...


[Training]: 100%|██████████| 50/50 [00:15<00:00,  3.21it/s]


Train Loss: 0.0050 | Val Loss: 0.2396 | SpearmanR: 0.2210


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.86it/s]



Training Model 3...


[Training]:  88%|████████▊ | 44/50 [00:12<00:01,  3.45it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0102 | Val Loss: 0.3152 | SpearmanR: 0.2127


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.78it/s]



Training Model 4...


[Training]: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s]


Train Loss: 0.0063 | Val Loss: 0.1690 | SpearmanR: 0.4338


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.94it/s]



Training Model 5...


[Training]:  60%|██████    | 30/50 [00:10<00:06,  2.86it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0213 | Val Loss: 0.1982 | SpearmanR: 0.3321


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.92it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 4/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances4.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 64 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s]


Train Loss: 0.0302 | Val Loss: 0.2889 | SpearmanR: 0.3930


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.10it/s]
[Getting Residuals]: 100%|██████████| 25/25 [00:01<00:00, 16.69it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals4.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 4/4 [00:00<00:00, 102.96it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds4.csv...

Training and evaluating model using 64 randomly selected samples...


[Training]:  86%|████████▌ | 43/50 [00:13<00:02,  3.17it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0096 | Val Loss: 0.1502 | SpearmanR: 0.5104


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.62it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 4/4 [00:00<00:00, 127.27it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds4.csv...
Progress for experiment 3 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]: 100%|██████████| 50/50 [00:17<00:00,  2.78it/s]


Train Loss: 0.0060 | Val Loss: 0.2493 | SpearmanR: 0.3642


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 17.15it/s]



Training Model 2...


[Training]:  50%|█████     | 25/50 [00:08<00:08,  3.01it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0394 | Val Loss: 0.2998 | SpearmanR: 0.2299


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 17.00it/s]



Training Model 3...


[Training]:  54%|█████▍    | 27/50 [00:08<00:07,  3.24it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0525 | Val Loss: 0.3500 | SpearmanR: 0.1937


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.88it/s]



Training Model 4...


[Training]:  28%|██▊       | 14/50 [00:04<00:11,  3.22it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.1014 | Val Loss: 0.2367 | SpearmanR: 0.2789


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.94it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:17<00:00,  2.82it/s]


Train Loss: 0.0177 | Val Loss: 0.2239 | SpearmanR: 0.4181


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.98it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 5/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances5.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 80 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:16<00:00,  2.94it/s]


Train Loss: 0.0199 | Val Loss: 0.1779 | SpearmanR: 0.4154


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 130.03it/s]
[Getting Residuals]: 100%|██████████| 25/25 [00:01<00:00, 17.10it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals5.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 5/5 [00:00<00:00, 102.13it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds5.csv...

Training and evaluating model using 80 randomly selected samples...


[Training]:  88%|████████▊ | 44/50 [00:14<00:02,  2.97it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0221 | Val Loss: 0.1501 | SpearmanR: 0.5548


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.23it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 5/5 [00:00<00:00, 123.91it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds5.csv...
Progress for experiment 4 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  64%|██████▍   | 32/50 [00:12<00:07,  2.55it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0188 | Val Loss: 0.2439 | SpearmanR: 0.2892


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.83it/s]



Training Model 2...


[Training]:  30%|███       | 15/50 [00:05<00:11,  2.99it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0487 | Val Loss: 0.2415 | SpearmanR: 0.1791


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 17.01it/s]



Training Model 3...


[Training]:  70%|███████   | 35/50 [00:12<00:05,  2.74it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0225 | Val Loss: 0.1804 | SpearmanR: 0.3853


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.99it/s]



Training Model 4...


[Training]:  64%|██████▍   | 32/50 [00:11<00:06,  2.83it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0148 | Val Loss: 0.1867 | SpearmanR: 0.3551


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 17.08it/s]



Training Model 5...


[Training]:  92%|█████████▏| 46/50 [00:15<00:01,  2.89it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0184 | Val Loss: 0.2698 | SpearmanR: 0.3824


[Surveying]: 100%|██████████| 25/25 [00:01<00:00, 16.94it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 6/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances6.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 96 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:17<00:00,  2.86it/s]


Train Loss: 0.0220 | Val Loss: 0.1993 | SpearmanR: 0.3725


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 130.50it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.24it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals6.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 6/6 [00:00<00:00, 114.10it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds6.csv...

Training and evaluating model using 96 randomly selected samples...


[Training]: 100%|██████████| 50/50 [00:17<00:00,  2.80it/s]


Train Loss: 0.0225 | Val Loss: 0.1416 | SpearmanR: 0.6027


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 128.73it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 6/6 [00:00<00:00, 125.15it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds6.csv...
Progress for experiment 5 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  52%|█████▏    | 26/50 [00:09<00:08,  2.72it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0252 | Val Loss: 0.2659 | SpearmanR: 0.1944


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.32it/s]



Training Model 2...


[Training]:  64%|██████▍   | 32/50 [00:11<00:06,  2.71it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0347 | Val Loss: 0.1837 | SpearmanR: 0.3646


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.33it/s]



Training Model 3...


[Training]:  54%|█████▍    | 27/50 [00:10<00:08,  2.61it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0399 | Val Loss: 0.2771 | SpearmanR: 0.2971


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.35it/s]



Training Model 4...


[Training]:  26%|██▌       | 13/50 [00:05<00:15,  2.40it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0629 | Val Loss: 0.2362 | SpearmanR: 0.2842


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.37it/s]



Training Model 5...


[Training]:  28%|██▊       | 14/50 [00:04<00:12,  2.82it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0802 | Val Loss: 0.2430 | SpearmanR: 0.2274


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.30it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 7/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances7.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 112 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:18<00:00,  2.74it/s]


Train Loss: 0.0251 | Val Loss: 0.1711 | SpearmanR: 0.4504


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.92it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.47it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals7.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 7/7 [00:00<00:00, 97.28it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds7.csv...

Training and evaluating model using 112 randomly selected samples...


[Training]: 100%|██████████| 50/50 [00:18<00:00,  2.68it/s]


Train Loss: 0.0328 | Val Loss: 0.1666 | SpearmanR: 0.5592


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 126.62it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 7/7 [00:00<00:00, 127.66it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds7.csv...
Progress for experiment 6 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  76%|███████▌  | 38/50 [00:14<00:04,  2.65it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0317 | Val Loss: 0.2289 | SpearmanR: 0.4011


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.46it/s]



Training Model 2...


[Training]:  56%|█████▌    | 28/50 [00:12<00:09,  2.25it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0620 | Val Loss: 0.2106 | SpearmanR: 0.4464


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.64it/s]



Training Model 3...


[Training]:  78%|███████▊  | 39/50 [00:14<00:04,  2.70it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0200 | Val Loss: 0.1790 | SpearmanR: 0.4720


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.46it/s]



Training Model 4...


[Training]:  76%|███████▌  | 38/50 [00:15<00:04,  2.44it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0118 | Val Loss: 0.1775 | SpearmanR: 0.4674


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.59it/s]



Training Model 5...


[Training]:  62%|██████▏   | 31/50 [00:11<00:06,  2.78it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0244 | Val Loss: 0.1580 | SpearmanR: 0.4683


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.54it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 8/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances8.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 128 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s]


Train Loss: 0.0241 | Val Loss: 0.1406 | SpearmanR: 0.5326


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 128.64it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.40it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals8.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 8/8 [00:00<00:00, 111.62it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds8.csv...

Training and evaluating model using 128 randomly selected samples...


[Training]:  80%|████████  | 40/50 [00:16<00:04,  2.45it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0218 | Val Loss: 0.1286 | SpearmanR: 0.6445


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 128.22it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 8/8 [00:00<00:00, 127.32it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds8.csv...
Progress for experiment 7 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  50%|█████     | 25/50 [00:10<00:10,  2.39it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0342 | Val Loss: 0.1960 | SpearmanR: 0.4327


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.55it/s]



Training Model 2...


[Training]:  96%|█████████▌| 48/50 [00:20<00:00,  2.32it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0113 | Val Loss: 0.1502 | SpearmanR: 0.4732


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.56it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:20<00:00,  2.44it/s]


Train Loss: 0.0172 | Val Loss: 0.1841 | SpearmanR: 0.4530


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.73it/s]



Training Model 4...


[Training]: 100%|██████████| 50/50 [00:19<00:00,  2.51it/s]


Train Loss: 0.0183 | Val Loss: 0.1659 | SpearmanR: 0.4587


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.57it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:20<00:00,  2.48it/s]


Train Loss: 0.0196 | Val Loss: 0.1713 | SpearmanR: 0.4407


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.58it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 9/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances9.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 144 actively selected samples...


[Training]:  68%|██████▊   | 34/50 [00:16<00:07,  2.10it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0357 | Val Loss: 0.1702 | SpearmanR: 0.4714


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.05it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.63it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals9.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 9/9 [00:00<00:00, 106.45it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds9.csv...

Training and evaluating model using 144 randomly selected samples...


[Training]:  98%|█████████▊| 49/50 [00:21<00:00,  2.28it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0219 | Val Loss: 0.1097 | SpearmanR: 0.6833


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 128.96it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 9/9 [00:00<00:00, 125.84it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds9.csv...
Progress for experiment 8 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  68%|██████▊   | 34/50 [00:14<00:06,  2.39it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0481 | Val Loss: 0.1848 | SpearmanR: 0.4223


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.44it/s]



Training Model 2...


[Training]: 100%|██████████| 50/50 [00:19<00:00,  2.54it/s]


Train Loss: 0.0166 | Val Loss: 0.1511 | SpearmanR: 0.5385


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.64it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.36it/s]


Train Loss: 0.0215 | Val Loss: 0.1474 | SpearmanR: 0.5133


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.86it/s]



Training Model 4...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.35it/s]


Train Loss: 0.0329 | Val Loss: 0.1738 | SpearmanR: 0.4881


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.76it/s]



Training Model 5...


[Training]:  70%|███████   | 35/50 [00:14<00:06,  2.40it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0341 | Val Loss: 0.1447 | SpearmanR: 0.4923


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.69it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 10/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances10.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 160 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.35it/s]


Train Loss: 0.0398 | Val Loss: 0.1720 | SpearmanR: 0.5002


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.23it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.86it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals10.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 10/10 [00:00<00:00, 115.78it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds10.csv...

Training and evaluating model using 160 randomly selected samples...


[Training]:  92%|█████████▏| 46/50 [00:22<00:01,  2.03it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0321 | Val Loss: 0.1108 | SpearmanR: 0.6636


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.40it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 10/10 [00:00<00:00, 129.58it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds10.csv...
Progress for experiment 9 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.35it/s]


Train Loss: 0.0042 | Val Loss: 0.1537 | SpearmanR: 0.5794


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.76it/s]



Training Model 2...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.32it/s]


Train Loss: 0.0057 | Val Loss: 0.1701 | SpearmanR: 0.5060


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.77it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s]


Train Loss: 0.0030 | Val Loss: 0.1622 | SpearmanR: 0.4604


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.91it/s]



Training Model 4...


[Training]:  60%|██████    | 30/50 [00:12<00:08,  2.42it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0275 | Val Loss: 0.2262 | SpearmanR: 0.4061


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.88it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:22<00:00,  2.25it/s]


Train Loss: 0.0137 | Val Loss: 0.1571 | SpearmanR: 0.5258


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.75it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 11/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances11.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 176 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s]


Train Loss: 0.0247 | Val Loss: 0.1487 | SpearmanR: 0.5697


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.74it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 15.39it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals11.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 11/11 [00:00<00:00, 113.72it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds11.csv...

Training and evaluating model using 176 randomly selected samples...


[Training]:  94%|█████████▍| 47/50 [00:23<00:01,  1.96it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0139 | Val Loss: 0.1203 | SpearmanR: 0.6566


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 130.37it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 11/11 [00:00<00:00, 128.71it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds11.csv...
Progress for experiment 10 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  86%|████████▌ | 43/50 [00:19<00:03,  2.21it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0142 | Val Loss: 0.1821 | SpearmanR: 0.4178


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.81it/s]



Training Model 2...


[Training]:  42%|████▏     | 21/50 [00:09<00:13,  2.20it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0506 | Val Loss: 0.1485 | SpearmanR: 0.5054


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.94it/s]



Training Model 3...


[Training]:  70%|███████   | 35/50 [00:14<00:06,  2.36it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0196 | Val Loss: 0.1523 | SpearmanR: 0.5380


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.77it/s]



Training Model 4...


[Training]: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s]


Train Loss: 0.0078 | Val Loss: 0.1630 | SpearmanR: 0.5245


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.91it/s]



Training Model 5...


[Training]:  68%|██████▊   | 34/50 [00:14<00:06,  2.29it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0156 | Val Loss: 0.1537 | SpearmanR: 0.4857


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.76it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 12/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances12.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 192 actively selected samples...


[Training]:  98%|█████████▊| 49/50 [00:24<00:00,  2.02it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0237 | Val Loss: 0.1292 | SpearmanR: 0.6088


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.99it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 16.77it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals12.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 12/12 [00:00<00:00, 115.73it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds12.csv...

Training and evaluating model using 192 randomly selected samples...


[Training]:  70%|███████   | 35/50 [00:17<00:07,  1.99it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0172 | Val Loss: 0.1052 | SpearmanR: 0.7076


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.41it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 12/12 [00:00<00:00, 129.34it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds12.csv...
Progress for experiment 11 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  82%|████████▏ | 41/50 [00:19<00:04,  2.08it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0082 | Val Loss: 0.1868 | SpearmanR: 0.5684


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.87it/s]



Training Model 2...


[Training]:  56%|█████▌    | 28/50 [00:12<00:10,  2.19it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0310 | Val Loss: 0.1630 | SpearmanR: 0.5172


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.95it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s]


Train Loss: 0.0076 | Val Loss: 0.1456 | SpearmanR: 0.5307


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.90it/s]



Training Model 4...


[Training]:  82%|████████▏ | 41/50 [00:18<00:04,  2.17it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0279 | Val Loss: 0.1762 | SpearmanR: 0.5241


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.83it/s]



Training Model 5...


[Training]:  92%|█████████▏| 46/50 [00:21<00:01,  2.13it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0085 | Val Loss: 0.1495 | SpearmanR: 0.5551


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.89it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 13/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances13.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 208 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:26<00:00,  1.90it/s]


Train Loss: 0.0124 | Val Loss: 0.1295 | SpearmanR: 0.6187


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 128.36it/s]
[Getting Residuals]: 100%|██████████| 24/24 [00:01<00:00, 17.00it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals13.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 13/13 [00:00<00:00, 111.37it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds13.csv...

Training and evaluating model using 208 randomly selected samples...


[Training]:  88%|████████▊ | 44/50 [00:22<00:03,  1.92it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0221 | Val Loss: 0.1356 | SpearmanR: 0.6676


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.09it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 13/13 [00:00<00:00, 125.83it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds13.csv...
Progress for experiment 12 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  60%|██████    | 30/50 [00:16<00:10,  1.87it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0179 | Val Loss: 0.1513 | SpearmanR: 0.5930


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.96it/s]



Training Model 2...


[Training]:  92%|█████████▏| 46/50 [00:21<00:01,  2.11it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0177 | Val Loss: 0.1329 | SpearmanR: 0.5901


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.95it/s]



Training Model 3...


[Training]:  96%|█████████▌| 48/50 [00:23<00:00,  2.02it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0108 | Val Loss: 0.1513 | SpearmanR: 0.5719


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 16.86it/s]



Training Model 4...


[Training]:  88%|████████▊ | 44/50 [00:22<00:03,  2.00it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0168 | Val Loss: 0.1543 | SpearmanR: 0.5944


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 17.00it/s]



Training Model 5...


[Training]: 100%|██████████| 50/50 [00:24<00:00,  2.03it/s]


Train Loss: 0.0110 | Val Loss: 0.1347 | SpearmanR: 0.5690


[Surveying]: 100%|██████████| 24/24 [00:01<00:00, 17.12it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 14/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances14.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 224 actively selected samples...


[Training]:  66%|██████▌   | 33/50 [00:18<00:09,  1.75it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0345 | Val Loss: 0.1364 | SpearmanR: 0.5794


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 127.03it/s]
[Getting Residuals]: 100%|██████████| 23/23 [00:01<00:00, 16.38it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals14.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 14/14 [00:00<00:00, 116.68it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds14.csv...

Training and evaluating model using 224 randomly selected samples...


[Training]:  88%|████████▊ | 44/50 [00:23<00:03,  1.87it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0387 | Val Loss: 0.1212 | SpearmanR: 0.6411


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 130.52it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 14/14 [00:00<00:00, 127.61it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds14.csv...
Progress for experiment 13 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Train Loss: 0.0106 | Val Loss: 0.1377 | SpearmanR: 0.5962


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.61it/s]



Training Model 2...


[Training]:  50%|█████     | 25/50 [00:11<00:11,  2.10it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0234 | Val Loss: 0.1519 | SpearmanR: 0.5488


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.25it/s]



Training Model 3...


[Training]:  84%|████████▍ | 42/50 [00:22<00:04,  1.91it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0231 | Val Loss: 0.1734 | SpearmanR: 0.4674


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.58it/s]



Training Model 4...


[Training]:  48%|████▊     | 24/50 [00:12<00:13,  2.00it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0219 | Val Loss: 0.1470 | SpearmanR: 0.5414


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.46it/s]



Training Model 5...


[Training]:  62%|██████▏   | 31/50 [00:15<00:09,  1.99it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0177 | Val Loss: 0.1228 | SpearmanR: 0.6070


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.55it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 15/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances15.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 240 actively selected samples...


[Training]: 100%|██████████| 50/50 [00:27<00:00,  1.84it/s]


Train Loss: 0.0227 | Val Loss: 0.1295 | SpearmanR: 0.6310


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.86it/s]
[Getting Residuals]: 100%|██████████| 23/23 [00:01<00:00, 16.23it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals15.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 15/15 [00:00<00:00, 114.43it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds15.csv...

Training and evaluating model using 240 randomly selected samples...


[Training]: 100%|██████████| 50/50 [00:28<00:00,  1.77it/s]


Train Loss: 0.0377 | Val Loss: 0.1202 | SpearmanR: 0.7026


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.35it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 15/15 [00:00<00:00, 126.86it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds15.csv...
Progress for experiment 14 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Starting ensemble training and pool evaluation...

Training Model 1...


[Training]:  90%|█████████ | 45/50 [00:24<00:02,  1.85it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0096 | Val Loss: 0.1521 | SpearmanR: 0.5797


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.70it/s]



Training Model 2...


[Training]: 100%|██████████| 50/50 [00:25<00:00,  1.95it/s]


Train Loss: 0.0229 | Val Loss: 0.1505 | SpearmanR: 0.6052


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.69it/s]



Training Model 3...


[Training]: 100%|██████████| 50/50 [00:26<00:00,  1.90it/s]


Train Loss: 0.0173 | Val Loss: 0.1903 | SpearmanR: 0.5048


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.81it/s]



Training Model 4...


[Training]: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s]


Train Loss: 0.0145 | Val Loss: 0.1484 | SpearmanR: 0.5784


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.51it/s]



Training Model 5...


[Training]:  98%|█████████▊| 49/50 [00:25<00:00,  1.92it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0097 | Val Loss: 0.1237 | SpearmanR: 0.6241


[Surveying]: 100%|██████████| 23/23 [00:01<00:00, 16.69it/s]


Ensemble training complete, submitting predictions for next cycle.

Cycle 16/16
-------------------------------------------------
Saving variance distribution to results/06_uncertainty_calibration/variances/variances16.csv...
Save complete.
Selecting new data points...

Training and evaluating model using 256 actively selected samples...


[Training]:  92%|█████████▏| 46/50 [00:33<00:02,  1.38it/s]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0239 | Val Loss: 0.1205 | SpearmanR: 0.6296


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 129.79it/s]
[Getting Residuals]: 100%|██████████| 23/23 [00:01<00:00, 16.91it/s]


Saving residuals to results/06_uncertainty_calibration/residuals/residuals16.csv...


[Getting Chosen Labels and Preds]: 100%|██████████| 16/16 [00:00<00:00, 123.30it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/active_labels_and_preds16.csv...

Training and evaluating model using 256 randomly selected samples...


[Training]:  96%|█████████▌| 48/50 [00:48<00:02,  1.01s/it]


Early stopping triggered after 10 epochs with no improvement.
Train Loss: 0.0229 | Val Loss: 0.1083 | SpearmanR: 0.7281


[Testing]: 100%|██████████| 25/25 [00:00<00:00, 131.37it/s]
[Getting Chosen Labels and Preds]: 100%|██████████| 16/16 [00:00<00:00, 126.77it/s]


Saving residuals to results/06_uncertainty_calibration/labels_and_preds/standard_labels_and_preds16.csv...
Progress for experiment 15 appended to results/06_uncertainty_calibration/active_vs_standard_learning_curve.csv
Experiments complete.


Unnamed: 0,changing_var,local_exp_idx,value,training_method,avg_test_loss,spearmanr,pearsonr,final_mse
0,n_samples,0,16,active,0.198404,0.089504,0.138942,0.19838
1,n_samples,0,16,standard,0.222621,0.027158,0.016171,0.222519
2,n_samples,0,16,active,0.20059,0.076409,0.094881,0.200553
3,n_samples,0,16,standard,0.205489,-0.160188,-0.126538,0.205435
4,n_samples,0,16,active,0.199817,0.141446,0.154365,0.199759
5,n_samples,0,16,standard,0.197825,0.121573,0.128695,0.1978
6,n_samples,1,32,active,0.196137,0.101768,0.121738,0.196127
7,n_samples,1,32,standard,0.197525,-0.203042,-0.148789,0.197506
8,n_samples,0,16,active,0.275546,0.258759,0.384468,0.275646
9,n_samples,0,16,standard,0.239904,0.289643,0.289137,0.23986


## Analysis

In [59]:
import pandas as pd
import glob
from pathlib import Path

def assemble_df(attribute):
    files = glob.glob(f"results/06_uncertainty_calibration/{attribute}/*.csv")
    files.sort()

    all_dfs = []

    for filepath in files:
        temp_df = pd.read_csv(filepath)
        if attribute == 'labels_and_preds':
            filename = Path(filepath).stem
            training_type = 'active' if filename.startswith("active") else 'standard'
            num = filename[-1] if filename[-2].startswith("s") else filename[-2:]
            temp_df = temp_df.rename(columns={'labels': f'{training_type}_labels{num}',
                                              'preds': f'{training_type}_preds{num}'})
        elif attribute == 'variances':
            column_name = Path(filepath).stem
            temp_df = temp_df.rename(columns={'variance': column_name})
        else:
            column_name = Path(filepath).stem
            temp_df = temp_df.rename(columns={attribute: column_name})
        all_dfs.append(temp_df)

    final_df = pd.concat(all_dfs, axis=1)

    return final_df

In [64]:
import plotly.express as px

def plot_distributions(df, column_names, plotting_labels=True):
    melted_df = df[column_names].melt(
    var_name='Training Method',     
    value_name='Labels' if plotting_labels else 'Preds' 
)

    fig = px.histogram(
        data_frame=melted_df,
        x='Labels' if plotting_labels else 'Preds' ,                                  
        color='Training Method',                                 
        barmode='overlay',                             
        opacity=0.65,                                  
        histnorm='probability density',                
        title='Distribution of Variances Across Active Learning Cycles'
    )

    fig.show()

### Uncertainty

In [None]:
variances_df = assemble_df('variances')
residuals_df = assemble_df('residuals')
merged_df = pd.concat([variances_df, residuals_df], axis=1)
merged_df

Unnamed: 0,variances10,variances11,variances12,variances13,variances14,variances15,variances16,variances2,variances3,variances4,...,residuals15,residuals16,residuals2,residuals3,residuals4,residuals5,residuals6,residuals7,residuals8,residuals9
0,0.010486,0.025583,0.048223,0.016065,0.009742,0.031411,0.028075,0.168271,0.105175,0.033408,...,0.117926,0.234689,0.240201,0.344249,0.547413,0.165809,0.425429,0.141126,0.070147,0.069537
1,0.042847,0.027055,0.021365,0.030832,0.056617,0.040480,0.102804,0.115111,0.039509,0.016360,...,0.442486,0.401448,0.850812,0.811963,0.345282,1.061635,0.918316,1.289529,1.085083,1.125114
2,0.013225,0.036619,0.067221,0.025856,0.025835,0.021028,0.004860,0.084911,0.010705,0.016902,...,0.138658,0.047270,0.069874,0.115371,0.252515,0.040035,0.290288,0.062190,0.097773,0.089653
3,0.018188,0.028687,0.001117,0.015263,0.025918,0.024833,0.046549,0.131170,0.134421,0.019372,...,0.373498,0.504986,0.278149,0.454235,0.188546,0.439912,0.319117,0.398082,0.627397,0.554399
4,0.011649,0.035836,0.035112,0.008232,0.022019,0.069180,0.013510,0.126485,0.033159,0.035262,...,0.278331,0.300524,0.412412,0.518250,0.471094,0.253817,0.529046,0.401245,0.322690,0.335143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3147,,,,,,,,0.136989,,,...,,,,,,,,,,
3148,,,,,,,,0.077908,,,...,,,,,,,,,,
3149,,,,,,,,0.085897,,,...,,,,,,,,,,
3150,,,,,,,,0.126600,,,...,,,,,,,,,,


In [51]:
import plotly.express as px

pairs_to_plot = [('variances2', 'residuals2'),('variances6', 'residuals6'),('variances10', 'residuals10'),('variances14', 'residuals14')]
colors = ['blue', 'red', 'orange', 'purple']

for i, pair in enumerate(pairs_to_plot):
    fig = px.scatter(
        data_frame=merged_df,
        x=pair[0],
        y=pair[1],
        trendline='ols',
    )
    fig.update_traces(
    marker=dict(color=colors[i])
    )
    fig.show()

### Selection bias

In [60]:
labels_and_preds_df = assemble_df("labels_and_preds")
labels_and_preds_df

Unnamed: 0,active_labels1,active_preds1,active_labels10,active_preds10,active_labels11,active_preds11,active_labels12,active_preds12,active_labels13,active_preds13,...,standard_labels5,standard_preds5,standard_labels6,standard_preds6,standard_labels7,standard_preds7,standard_labels8,standard_preds8,standard_labels9,standard_preds9
0,-0.036354,-0.107666,0.088585,0.432001,1.167766,0.945142,0.284879,0.282786,0.641427,0.726994,...,-0.263598,-0.120962,-0.911415,-0.776029,0.158221,0.465098,-0.484527,-0.394888,0.067395,0.134904
1,-0.180261,-0.283052,0.258336,0.546001,0.607098,0.415099,0.462165,0.212660,0.062256,0.095716,...,0.146577,0.248761,1.243742,1.030968,-0.309355,-0.335464,-0.020560,0.134665,1.042827,0.901584
2,0.302464,0.344470,0.828947,0.858783,0.161135,0.109368,-0.756513,-0.685431,0.597890,0.630525,...,-0.446529,-0.356209,-0.122269,-0.208577,0.593735,0.614576,0.282405,0.260362,0.040280,0.084740
3,-0.990597,-0.407582,-0.180261,-0.581259,0.815583,0.607320,0.819739,0.664179,0.398955,0.398922,...,-0.049718,0.320475,-0.814505,-0.592489,-0.244630,-0.164468,0.424377,0.436567,-0.212445,0.006337
4,-0.475687,-0.379855,0.787555,0.873218,0.361586,0.191383,0.088585,0.016937,0.596740,0.521443,...,0.593735,0.634870,0.828947,0.849784,-0.756513,-0.394833,-0.580422,-0.448594,-0.350748,-0.463443
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,,,,,,,,,,,...,,,,,,,,,,
252,,,,,,,,,,,...,,,,,,,,,,
253,,,,,,,,,,,...,,,,,,,,,,
254,,,,,,,,,,,...,,,,,,,,,,


In [72]:
pairs_to_plot = [('active_labels2', 'standard_labels2'),('active_labels6', 'standard_labels6'),('active_labels10', 'standard_labels10'),('active_labels16', 'standard_labels16'),]

for pair in pairs_to_plot:
    plot_distributions(labels_and_preds_df, list(pair), plotting_labels=True)

In [73]:
import pandas as pd
import numpy as np
from scipy import stats

def check_normality(data: pd.Series):
    """
    Calculates skewness, kurtosis, and performs Shapiro-Wilk and Jarque-Bera
    normality tests on a pandas Series.

    Args:
        data (pd.Series): The column of data to test.
    """
    # --- 1. Clean data by removing NaN values ---
    data = data.dropna()
    
    if len(data) < 3:
        print(f"Column '{data.name}' has fewer than 3 non-NaN values. Cannot perform tests.")
        return

    print(f"--- Normality Report for column: '{data.name}' ---")

    # --- 2. Skewness and Kurtosis ---
    skewness = data.skew()
    kurtosis = data.kurt() # This is excess kurtosis (Fisher's definition)
    
    print(f"Skewness: {skewness:.3f}")
    print(f"Kurtosis: {kurtosis:.3f}\n")
    
    # Interpretation helpers
    if -0.5 < skewness < 0.5:
        print("Skewness indicates the distribution is fairly symmetrical.")
    elif -1 < skewness < -0.5 or 0.5 < skewness < 1:
        print("Skewness indicates the distribution is moderately skewed.")
    else:
        print("Skewness indicates the distribution is highly skewed.")

    if kurtosis > 1:
        print("Kurtosis indicates heavy tails (leptokurtic), suggesting outliers are more likely.")
    elif kurtosis < -1:
         print("Kurtosis indicates light tails (platykurtic), suggesting outliers are less likely.")
    else:
        print("Kurtosis is within a normal range.")

    # --- 3. Shapiro-Wilk Test ---
    # More powerful for smaller sample sizes (<5000)
    shapiro_stat, shapiro_p = stats.shapiro(data)
    print(f"\nShapiro-Wilk Test:")
    print(f"  - Statistic: {shapiro_stat:.3f}")
    print(f"  - p-value: {shapiro_p:.3f}")
    
    alpha = 0.05
    if shapiro_p > alpha:
        print("  - Conclusion: Sample looks Gaussian (fail to reject H0)")
    else:
        print("  - Conclusion: Sample does not look Gaussian (reject H0)")

    # --- 4. Jarque-Bera Test ---
    # Good for larger sample sizes, based on skew and kurtosis
    jb_stat, jb_p = stats.jarque_bera(data)
    print(f"\nJarque-Bera Test:")
    print(f"  - Statistic: {jb_stat:.3f}")
    print(f"  - p-value: {jb_p:.3f}")
    
    if jb_p > alpha:
        print("  - Conclusion: Sample looks Gaussian (fail to reject H0)")
    else:
        print("  - Conclusion: Sample does not look Gaussian (reject H0)")
    
    print("-" * 45 + "\n")

In [86]:
import pandas as pd
import numpy as np
from scipy import stats
import re

def get_normality_stats(data: pd.Series) -> dict:
    """
    Calculates normality statistics for a pandas Series and returns them as a dictionary.

    Args:
        data (pd.Series): The column of data to test.

    Returns:
        dict: A dictionary containing skewness, kurtosis, and test results.
    """
    # Clean data by removing NaN values
    data = data.dropna()
    
    # If not enough data points, return a dictionary of NaNs
    if len(data) < 3:
        return {
            'skewness': np.nan,
            'kurtosis': np.nan,
            'shapiro_stat': np.nan,
            'shapiro_p': np.nan,
            'jb_stat': np.nan,
            'jb_p': np.nan
        }

    # Calculate Skewness and Kurtosis
    skewness = data.skew()
    kurtosis = data.kurt()  # This is excess kurtosis (Fisher's definition)
    
    # Perform Shapiro-Wilk Test
    shapiro_stat, shapiro_p = stats.shapiro(data)

    # Perform Jarque-Bera Test
    jb_stat, jb_p = stats.jarque_bera(data)
    
    # Compile results into a dictionary
    results = {
        'skewness': skewness,
        'kurtosis': kurtosis,
        'shapiro_stat': shapiro_stat,
        'shapiro_p': shapiro_p,
        'jb_stat': jb_stat,
        'jb_p': jb_p
    }
    
    return results

def analyze_distributions(df: pd.DataFrame) -> pd.DataFrame:
    """
    Analyzes each column of a DataFrame for normality, parsing column names
    for metadata, and returns a results DataFrame.

    The expected column name pattern is '{training_type}_{labels_or_preds}{cycle_number}',
    e.g., 'active_preds10' or 'standard_labels2'.

    Args:
        df (pd.DataFrame): The DataFrame with distributions to analyze.

    Returns:
        pd.DataFrame: A new DataFrame containing the analysis results.
    """
    all_results = []
    
    # Regex to separate the text part from the number part (e.g., 'preds' from '10')
    name_pattern = re.compile(r'([a-zA-Z]+)(\d+)')

    for col_name in df.columns:
        # --- 1. Parse column name for metadata ---
        try:
            parts = col_name.split('_')
            training_type = parts[0]
            
            match = name_pattern.match(parts[1])
            if not match:
                print(f"Warning: Skipping column '{col_name}' as it doesn't match the expected pattern.")
                continue
            
            labels_or_preds, cycle_number = match.groups()
            cycle_number = int(cycle_number)

        except (IndexError, AttributeError):
            print(f"Warning: Skipping column '{col_name}' due to parsing error.")
            continue
            
        # --- 2. Get normality statistics ---
        stats_dict = get_normality_stats(df[col_name])
        
        # --- 3. Combine metadata and stats ---
        row = {
            'training_type': training_type,
            'labels_or_preds': labels_or_preds,
            'cycle_number': cycle_number,
            **stats_dict  # Unpack the stats dictionary into the row
        }
        all_results.append(row)
        
    # --- 4. Create the final results DataFrame ---
    results_df = pd.DataFrame(all_results)
    return results_df

In [89]:
normality_df = analyze_distributions(labels_and_preds_df)
normality_df

Unnamed: 0,training_type,labels_or_preds,cycle_number,skewness,kurtosis,shapiro_stat,shapiro_p,jb_stat,jb_p
0,active,labels,1,-0.060358,-0.803503,0.967776,8.014238e-01,0.580105,0.748224
1,active,preds,1,0.462770,-1.738309,0.800923,2.801483e-03,2.159460,0.339687
2,active,labels,10,-0.266843,-0.823128,0.973667,3.758096e-03,6.510293,0.038575
3,active,preds,10,-0.398019,-1.477378,0.861344,5.529655e-11,18.529505,0.000095
4,active,labels,11,-0.251245,-0.821583,0.975012,2.940630e-03,6.900424,0.031739
...,...,...,...,...,...,...,...,...,...
59,standard,preds,7,0.154458,-0.785730,0.980333,9.783797e-02,3.451447,0.178044
60,standard,labels,8,0.330137,-0.296798,0.985384,1.868077e-01,2.858080,0.239539
61,standard,preds,8,0.281074,-0.880159,0.965748,2.521545e-03,5.895844,0.052449
62,standard,labels,9,-0.099319,-0.606375,0.983723,8.601145e-02,2.589865,0.273916


In [91]:
normality_df.to_csv('results/06_uncertainty_calibration/normality_results.csv', index=False)

In [92]:
def plot_normality_trends(results_df: pd.DataFrame):
    """
    Generates several plots to visualize normality metrics over training cycles.

    Args:
        results_df (pd.DataFrame): The DataFrame containing the normality analysis results.
    """
    # Sort the dataframe to ensure lines are plotted correctly
    results_df = results_df.sort_values(by=['training_type', 'labels_or_preds', 'cycle_number'])

    # --- Plot 1: Skewness over Cycles ---
    # This plot helps visualize if distributions become more or less symmetrical.
    # A value near 0 is symmetrical.
    fig_skew = px.line(
        results_df,
        x='cycle_number',
        y='skewness',
        color='training_type',
        facet_row='labels_or_preds',
        title='Skewness of Distributions Over Training Cycles',
        labels={'cycle_number': 'Training Cycle', 'skewness': 'Skewness'},
        markers=True
    )
    # Add a reference line for perfect symmetry
    fig_skew.add_hline(y=0, line_dash="dot", line_color="black", annotation_text="Symmetrical")
    fig_skew.show()

    # --- Plot 2: Kurtosis over Cycles ---
    # This plot shows the "tailedness" of the distributions.
    # A value near 0 is similar to a normal distribution.
    # Positive values mean heavier tails (more outliers).
    fig_kurt = px.line(
        results_df,
        x='cycle_number',
        y='kurtosis',
        color='training_type',
        facet_row='labels_or_preds',
        title='Kurtosis of Distributions Over Training Cycles',
        labels={'cycle_number': 'Training Cycle', 'kurtosis': 'Excess Kurtosis'},
        markers=True
    )
    # Add a reference line for normal kurtosis
    fig_kurt.add_hline(y=0, line_dash="dot", line_color="black", annotation_text="Normal Tails")
    fig_kurt.show()

    # --- Plot 3: Shapiro-Wilk P-value over Cycles ---
    # This is the most direct test for normality.
    # If p-value < 0.05, the distribution is likely not normal.
    fig_pval = px.line(
        results_df,
        x='cycle_number',
        y='shapiro_p',
        color='training_type',
        facet_row='labels_or_preds',
        title='Shapiro-Wilk P-value Over Training Cycles',
        labels={'cycle_number': 'Training Cycle', 'shapiro_p': 'P-value'},
        markers=True
    )
    # Add the significance level alpha=0.05 as a reference line
    fig_pval.add_hline(y=0.05, line_dash="dash", line_color="red", annotation_text="α = 0.05")
    fig_pval.show()

In [93]:
plot_normality_trends(normality_df)

In [94]:
all_df = pd.read_csv('avrpikC_full.csv')
all_df

Unnamed: 0,aa_sequence,enrichment_score
0,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKIEV...,1.468796
1,GLKRIIVIKVAREGNNCRSKAMALVASTGGVDSVALVGDLRGKIEV...,1.415944
2,GLKRIIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRGKIEV...,1.389615
3,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKIEV...,1.359651
4,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRGKIEV...,1.343857
...,...,...
3955,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKIEV...,-1.041749
3956,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKIEA...,-1.041749
3957,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKIEV...,-1.057543
3958,GLKQKIVIKVAMEGNNCRSKAMALVASTGGVDSVALVGDLRDKTEV...,-1.057543


In [96]:
fig = px.histogram(
        data_frame=all_df,
        x='enrichment_score',                                                                                         
        histnorm='probability density',                
        title='Enrichment scores'
    )

fig.show()

In [97]:
all_df.enrichment_score.mean()

np.float64(0.09718174617570516)