In [1]:
from pathlib import Path
from malid import config, logger
from malid.train import train_metamodel
from malid.datamodels import (
    GeneLocus,
    TargetObsColumnEnum,
)
import pandas as pd
from IPython.display import display, Markdown
from typing import List

In [2]:
base_model_train_fold_name = "train_smaller"
metamodel_fold_label_train = "validation"

In [3]:
models_of_interest = [
    "rf_multiclass",
    "xgboost",
    "lasso_multiclass",
    "lasso_cv",
    "ridge_cv",
    "elasticnet_cv",
    "linearsvm_ovr",
]
models_of_interest

['rf_multiclass',
 'xgboost',
 'lasso_multiclass',
 'lasso_cv',
 'ridge_cv',
 'elasticnet_cv',
 'linearsvm_ovr']

In [4]:
def choose(gene_locus: GeneLocus, classification_targets: List[TargetObsColumnEnum]):
    for target_obs_column in classification_targets:
        try:
            flavors = train_metamodel.get_metamodel_flavors(
                gene_locus=gene_locus,
                target_obs_column=target_obs_column,
                fold_id=config.all_fold_ids[0],
                base_model_train_fold_name=base_model_train_fold_name,
            )
        except Exception as err:
            logger.warning(
                f"Failed to generate metamodel flavors for {gene_locus}, {target_obs_column}: {err}"
            )
            continue

        for metamodel_flavor, metamodel_config in flavors.items():
            _output_suffix = (
                Path(gene_locus.name)
                / target_obs_column.name
                / metamodel_flavor
                / f"{base_model_train_fold_name}_applied_to_{metamodel_fold_label_train}_model"
            )
            results_output_prefix = (
                config.paths.second_stage_blending_metamodel_output_dir / _output_suffix
            )

            display(
                Markdown(
                    f"# {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor}"
                )
            )

            try:
                ## All results in a table
                df = pd.read_csv(
                    f"{results_output_prefix}.compare_model_scores.test_set_performance.tsv",
                    sep="\t",
                    index_col=0,
                )
                df["missing_classes"] = df["missing_classes"].map(
                    {False: "✅ no", True: "❗ Missing classes!"}
                )
                df = df.loc[df.index.intersection(models_of_interest)].sort_values(
                    "ROC-AUC (weighted OvO) per fold", ascending=False
                )
                if "Accuracy global with abstention" in df.columns:
                    df = df[
                        [
                            "ROC-AUC (weighted OvO) per fold",
                            "au-PRC (weighted OvO) per fold",
                            "Accuracy global with abstention",
                            "MCC global with abstention",
                            "abstention_rate",
                            "sample_size including abstentions",
                            "n_abstentions",
                            "missing_classes",
                        ]
                    ]
                else:
                    df = df[
                        [
                            "ROC-AUC (weighted OvO) per fold",
                            "au-PRC (weighted OvO) per fold",
                            "Accuracy global",
                            "MCC global",
                            "abstention_rate",
                            "sample_size including abstentions",
                            "n_abstentions",
                            "missing_classes",
                        ]
                    ]
                display(df)
            except Exception as err:
                logger.warning(
                    f"{gene_locus}, {target_obs_column} flavor '{metamodel_flavor}': not yet run: {err}"
                )
                continue

In [5]:
for single_gene_locus in config.gene_loci_used:
    choose(single_gene_locus, config.classification_targets)

# GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.964 +/- 0.005 (in 3 folds),0.963 +/- 0.006 (in 3 folds),0.835,0.762,0.022917,480,11,✅ no
lasso_multiclass,0.960 +/- 0.006 (in 3 folds),0.959 +/- 0.008 (in 3 folds),0.827,0.753,0.022917,480,11,✅ no
rf_multiclass,0.959 +/- 0.009 (in 3 folds),0.954 +/- 0.014 (in 3 folds),0.831,0.755,0.022917,480,11,✅ no
elasticnet_cv,0.957 +/- 0.008 (in 3 folds),0.958 +/- 0.009 (in 3 folds),0.802,0.713,0.022917,480,11,✅ no
xgboost,0.953 +/- 0.005 (in 3 folds),0.951 +/- 0.009 (in 3 folds),0.812,0.728,0.022917,480,11,✅ no
lasso_cv,0.949 +/- 0.005 (in 3 folds),0.954 +/- 0.007 (in 3 folds),0.8,0.709,0.022917,480,11,✅ no
ridge_cv,0.948 +/- 0.005 (in 3 folds),0.951 +/- 0.006 (in 3 folds),0.802,0.713,0.022917,480,11,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.684 +/- 0.020 (in 3 folds),0.685 +/- 0.026 (in 3 folds),0.494,0.181,0.0,480,0,❗ Missing classes!
lasso_cv,0.676 +/- 0.023 (in 3 folds),0.681 +/- 0.032 (in 3 folds),0.508,0.215,0.0,480,0,✅ no
ridge_cv,0.675 +/- 0.022 (in 3 folds),0.672 +/- 0.018 (in 3 folds),0.494,0.183,0.0,480,0,❗ Missing classes!
linearsvm_ovr,0.674 +/- 0.020 (in 3 folds),0.666 +/- 0.020 (in 3 folds),0.477,0.194,0.0,480,0,✅ no
rf_multiclass,0.673 +/- 0.030 (in 3 folds),0.648 +/- 0.029 (in 3 folds),0.515,0.259,0.0,480,0,✅ no
lasso_multiclass,0.668 +/- 0.020 (in 3 folds),0.655 +/- 0.014 (in 3 folds),0.471,0.223,0.0,480,0,✅ no
xgboost,0.644 +/- 0.020 (in 3 folds),0.629 +/- 0.019 (in 3 folds),0.483,0.218,0.0,480,0,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.959 +/- 0.009 (in 3 folds),0.957 +/- 0.010 (in 3 folds),0.821,0.744,0.014286,420,6,✅ no
lasso_multiclass,0.959 +/- 0.008 (in 3 folds),0.957 +/- 0.011 (in 3 folds),0.793,0.707,0.014286,420,6,✅ no
lasso_cv,0.956 +/- 0.009 (in 3 folds),0.954 +/- 0.013 (in 3 folds),0.833,0.756,0.014286,420,6,✅ no
elasticnet_cv,0.955 +/- 0.012 (in 3 folds),0.956 +/- 0.012 (in 3 folds),0.831,0.753,0.014286,420,6,✅ no
rf_multiclass,0.953 +/- 0.010 (in 3 folds),0.950 +/- 0.013 (in 3 folds),0.824,0.743,0.014286,420,6,✅ no
ridge_cv,0.949 +/- 0.011 (in 3 folds),0.951 +/- 0.013 (in 3 folds),0.833,0.758,0.014286,420,6,✅ no
xgboost,0.949 +/- 0.007 (in 3 folds),0.948 +/- 0.014 (in 3 folds),0.817,0.733,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.965 +/- 0.005 (in 3 folds),0.963 +/- 0.009 (in 3 folds),0.798,0.704,0.014286,420,6,✅ no
elasticnet_cv,0.965 +/- 0.004 (in 3 folds),0.966 +/- 0.007 (in 3 folds),0.814,0.731,0.014286,420,6,✅ no
xgboost,0.959 +/- 0.013 (in 3 folds),0.957 +/- 0.016 (in 3 folds),0.824,0.744,0.014286,420,6,✅ no
lasso_cv,0.956 +/- 0.006 (in 3 folds),0.958 +/- 0.010 (in 3 folds),0.821,0.74,0.014286,420,6,✅ no
ridge_cv,0.955 +/- 0.006 (in 3 folds),0.956 +/- 0.007 (in 3 folds),0.819,0.737,0.014286,420,6,✅ no
lasso_multiclass,0.953 +/- 0.013 (in 3 folds),0.955 +/- 0.010 (in 3 folds),0.807,0.732,0.014286,420,6,✅ no
linearsvm_ovr,0.912 +/- 0.007 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.783,0.688,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.929 +/- 0.014 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.79,0.693,0.014286,420,6,✅ no
xgboost,0.920 +/- 0.017 (in 3 folds),0.918 +/- 0.024 (in 3 folds),0.762,0.652,0.014286,420,6,✅ no
lasso_multiclass,0.879 +/- 0.036 (in 3 folds),0.893 +/- 0.034 (in 3 folds),0.719,0.612,0.014286,420,6,✅ no
linearsvm_ovr,0.876 +/- 0.035 (in 3 folds),0.892 +/- 0.035 (in 3 folds),0.738,0.626,0.014286,420,6,✅ no
lasso_cv,0.864 +/- 0.032 (in 3 folds),0.894 +/- 0.028 (in 3 folds),0.762,0.649,0.014286,420,6,✅ no
elasticnet_cv,0.862 +/- 0.033 (in 3 folds),0.895 +/- 0.026 (in 3 folds),0.76,0.646,0.014286,420,6,✅ no
ridge_cv,0.861 +/- 0.037 (in 3 folds),0.893 +/- 0.025 (in 3 folds),0.738,0.614,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.814 +/- 0.033 (in 3 folds),0.806 +/- 0.029 (in 3 folds),0.593,0.404,0.0,420,0,✅ no
ridge_cv,0.812 +/- 0.030 (in 3 folds),0.798 +/- 0.028 (in 3 folds),0.569,0.349,0.0,420,0,✅ no
lasso_multiclass,0.811 +/- 0.024 (in 3 folds),0.798 +/- 0.018 (in 3 folds),0.579,0.451,0.0,420,0,✅ no
elasticnet_cv,0.809 +/- 0.035 (in 3 folds),0.797 +/- 0.032 (in 3 folds),0.571,0.362,0.0,420,0,✅ no
linearsvm_ovr,0.809 +/- 0.028 (in 3 folds),0.798 +/- 0.023 (in 3 folds),0.583,0.438,0.0,420,0,✅ no
xgboost,0.803 +/- 0.042 (in 3 folds),0.806 +/- 0.033 (in 3 folds),0.574,0.37,0.0,420,0,✅ no
lasso_cv,0.797 +/- 0.041 (in 3 folds),0.784 +/- 0.029 (in 3 folds),0.557,0.342,0.0,420,0,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.670 +/- 0.025 (in 3 folds),0.660 +/- 0.022 (in 3 folds),0.419,0.187,0.0,420,0,✅ no
xgboost,0.667 +/- 0.015 (in 3 folds),0.654 +/- 0.006 (in 3 folds),0.429,0.14,0.0,420,0,✅ no
lasso_cv,0.638 +/- 0.020 (in 3 folds),0.649 +/- 0.028 (in 3 folds),0.476,0.127,0.0,420,0,❗ Missing classes!
elasticnet_cv,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.469,0.101,0.0,420,0,❗ Missing classes!
linearsvm_ovr,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.407,0.112,0.0,420,0,❗ Missing classes!
ridge_cv,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.46,0.084,0.0,420,0,❗ Missing classes!
lasso_multiclass,0.627 +/- 0.029 (in 3 folds),0.636 +/- 0.031 (in 3 folds),0.283,0.102,0.0,420,0,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.574 +/- 0.022 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.357,0.152,0.0,420,0,❗ Missing classes!
lasso_multiclass,0.574 +/- 0.022 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.357,0.152,0.0,420,0,❗ Missing classes!
linearsvm_ovr,0.563 +/- 0.025 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
xgboost,0.563 +/- 0.025 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
lasso_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
elasticnet_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
ridge_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.751 +/- 0.014 (in 3 folds),0.724 +/- 0.015 (in 3 folds),0.526,0.36,0.0,420,0,✅ no
rf_multiclass,0.750 +/- 0.015 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.526,0.36,0.0,420,0,✅ no
xgboost,0.748 +/- 0.022 (in 3 folds),0.725 +/- 0.017 (in 3 folds),0.595,0.421,0.0,420,0,✅ no
ridge_cv,0.748 +/- 0.017 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.557,0.334,0.0,420,0,✅ no
elasticnet_cv,0.747 +/- 0.022 (in 3 folds),0.724 +/- 0.017 (in 3 folds),0.557,0.334,0.0,420,0,✅ no
linearsvm_ovr,0.741 +/- 0.027 (in 3 folds),0.721 +/- 0.018 (in 3 folds),0.595,0.441,0.0,420,0,❗ Missing classes!
lasso_cv,0.739 +/- 0.029 (in 3 folds),0.722 +/- 0.026 (in 3 folds),0.552,0.327,0.0,420,0,❗ Missing classes!


# GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.746 +/- 0.059 (in 3 folds),0.761 +/- 0.030 (in 3 folds),0.602,0.311,0.04712,191,9,❗ Missing classes!
ridge_cv,0.736 +/- 0.043 (in 3 folds),0.745 +/- 0.015 (in 3 folds),0.686,0.436,0.04712,191,9,❗ Missing classes!
linearsvm_ovr,0.721 +/- 0.043 (in 3 folds),0.722 +/- 0.031 (in 3 folds),0.513,0.301,0.04712,191,9,✅ no
rf_multiclass,0.716 +/- 0.081 (in 3 folds),0.734 +/- 0.076 (in 3 folds),0.67,0.446,0.04712,191,9,✅ no
xgboost,0.695 +/- 0.070 (in 3 folds),0.721 +/- 0.056 (in 3 folds),0.607,0.369,0.04712,191,9,✅ no
lasso_cv,0.690 +/- 0.022 (in 3 folds),0.715 +/- 0.020 (in 3 folds),0.681,0.435,0.04712,191,9,❗ Missing classes!
lasso_multiclass,0.686 +/- 0.086 (in 3 folds),0.711 +/- 0.030 (in 3 folds),0.487,0.331,0.04712,191,9,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.664 +/- 0.015 (in 3 folds),0.697 +/- 0.016 (in 3 folds),0.293,0.181,0.172775,191,33,✅ no
lasso_multiclass,0.635 +/- 0.039 (in 3 folds),0.678 +/- 0.015 (in 3 folds),0.236,0.125,0.172775,191,33,✅ no
lasso_cv,0.631 +/- 0.031 (in 3 folds),0.663 +/- 0.015 (in 3 folds),0.257,0.131,0.172775,191,33,❗ Missing classes!
elasticnet_cv,0.630 +/- 0.039 (in 3 folds),0.667 +/- 0.020 (in 3 folds),0.246,0.114,0.172775,191,33,❗ Missing classes!
linearsvm_ovr,0.615 +/- 0.029 (in 3 folds),0.674 +/- 0.013 (in 3 folds),0.246,0.136,0.172775,191,33,✅ no
ridge_cv,0.607 +/- 0.095 (in 3 folds),0.622 +/- 0.109 (in 3 folds),0.246,0.119,0.172775,191,33,❗ Missing classes!
xgboost,0.600 +/- 0.035 (in 3 folds),0.664 +/- 0.035 (in 3 folds),0.23,0.113,0.172775,191,33,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.657 +/- 0.051 (in 3 folds),0.770 +/- 0.084 (in 3 folds),0.534,0.175,0.13089,191,25,✅ no
lasso_multiclass,0.654 +/- 0.044 (in 3 folds),0.765 +/- 0.083 (in 3 folds),0.545,0.194,0.13089,191,25,✅ no
rf_multiclass,0.616 +/- 0.117 (in 3 folds),0.722 +/- 0.141 (in 3 folds),0.55,0.121,0.13089,191,25,✅ no
xgboost,0.538 +/- 0.128 (in 3 folds),0.632 +/- 0.094 (in 3 folds),0.529,0.103,0.13089,191,25,✅ no
elasticnet_cv,0.514 +/- 0.024 (in 3 folds),0.640 +/- 0.088 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!


# GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.951 +/- 0.063 (in 2 folds),0.906 +/- 0.100 (in 2 folds),0.672,0.41,0.184,125,23,✅ no
lasso_multiclass,0.948 +/- 0.061 (in 2 folds),0.891 +/- 0.081 (in 2 folds),0.68,0.419,0.184,125,23,✅ no
rf_multiclass,0.926 +/- 0.102 (in 2 folds),0.905 +/- 0.101 (in 2 folds),0.768,0.475,0.184,125,23,✅ no
xgboost,0.922 +/- 0.095 (in 2 folds),0.883 +/- 0.068 (in 2 folds),0.768,0.493,0.184,125,23,✅ no
lasso_cv,0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.704,0.216,0.184,125,23,✅ no
elasticnet_cv,0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.664,-0.006,0.184,125,23,❗ Missing classes!
ridge_cv,0.500 +/- 0.000 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.664,-0.006,0.184,125,23,❗ Missing classes!


# GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.560 +/- 0.101 (in 3 folds),0.565 +/- 0.138 (in 3 folds),0.482,-0.013,0.026178,191,5,✅ no
linearsvm_ovr,0.514 +/- 0.049 (in 3 folds),0.525 +/- 0.120 (in 3 folds),0.492,0.012,0.026178,191,5,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
lasso_multiclass,0.496 +/- 0.059 (in 3 folds),0.514 +/- 0.133 (in 3 folds),0.492,0.011,0.026178,191,5,✅ no
xgboost,0.494 +/- 0.107 (in 3 folds),0.519 +/- 0.153 (in 3 folds),0.466,-0.044,0.026178,191,5,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.996 +/- 0.006 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.94,0.824,0.014085,284,4,✅ no
lasso_cv,0.996 +/- 0.005 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.944,0.835,0.014085,284,4,✅ no
linearsvm_ovr,0.995 +/- 0.006 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.951,0.866,0.014085,284,4,✅ no
lasso_multiclass,0.994 +/- 0.008 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.951,0.866,0.014085,284,4,✅ no
ridge_cv,0.993 +/- 0.009 (in 3 folds),0.998 +/- 0.003 (in 3 folds),0.94,0.824,0.014085,284,4,✅ no
xgboost,0.992 +/- 0.007 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.944,0.837,0.014085,284,4,✅ no
rf_multiclass,0.991 +/- 0.009 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.944,0.836,0.014085,284,4,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.987 +/- 0.007 (in 3 folds),0.994 +/- 0.003 (in 3 folds),0.928,0.832,0.018809,319,6,✅ no
linearsvm_ovr,0.987 +/- 0.005 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.934,0.853,0.018809,319,6,✅ no
ridge_cv,0.985 +/- 0.009 (in 3 folds),0.993 +/- 0.004 (in 3 folds),0.931,0.84,0.018809,319,6,✅ no
lasso_multiclass,0.985 +/- 0.006 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.931,0.847,0.018809,319,6,✅ no
lasso_cv,0.983 +/- 0.004 (in 3 folds),0.992 +/- 0.002 (in 3 folds),0.934,0.847,0.018809,319,6,✅ no
rf_multiclass,0.982 +/- 0.010 (in 3 folds),0.992 +/- 0.005 (in 3 folds),0.925,0.83,0.018809,319,6,✅ no
xgboost,0.969 +/- 0.016 (in 3 folds),0.978 +/- 0.015 (in 3 folds),0.912,0.804,0.018809,319,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.928 +/- 0.005 (in 3 folds),0.867 +/- 0.023 (in 3 folds),0.846,0.64,0.021944,319,7,✅ no
lasso_cv,0.925 +/- 0.017 (in 3 folds),0.876 +/- 0.009 (in 3 folds),0.859,0.666,0.021944,319,7,✅ no
linearsvm_ovr,0.925 +/- 0.017 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.837,0.64,0.021944,319,7,✅ no
elasticnet_cv,0.924 +/- 0.023 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.856,0.657,0.021944,319,7,✅ no
lasso_multiclass,0.923 +/- 0.017 (in 3 folds),0.873 +/- 0.011 (in 3 folds),0.831,0.626,0.021944,319,7,✅ no
xgboost,0.916 +/- 0.012 (in 3 folds),0.841 +/- 0.045 (in 3 folds),0.837,0.622,0.021944,319,7,✅ no
ridge_cv,0.910 +/- 0.023 (in 3 folds),0.866 +/- 0.007 (in 3 folds),0.843,0.625,0.021944,319,7,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.956 +/- 0.001 (in 3 folds),0.935 +/- 0.002 (in 3 folds),0.783,0.677,0.002415,414,1,✅ no
elasticnet_cv,0.952 +/- 0.001 (in 3 folds),0.936 +/- 0.003 (in 3 folds),0.795,0.697,0.002415,414,1,✅ no
lasso_multiclass,0.949 +/- 0.008 (in 3 folds),0.942 +/- 0.009 (in 3 folds),0.826,0.755,0.002415,414,1,✅ no
lasso_cv,0.947 +/- 0.008 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.771,0.659,0.002415,414,1,✅ no
rf_multiclass,0.947 +/- 0.006 (in 3 folds),0.939 +/- 0.007 (in 3 folds),0.773,0.665,0.002415,414,1,✅ no
xgboost,0.944 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.773,0.667,0.002415,414,1,✅ no
linearsvm_ovr,0.944 +/- 0.001 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.816,0.736,0.002415,414,1,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.956 +/- 0.003 (in 3 folds),0.945 +/- 0.006 (in 3 folds),0.804,0.713,0.005587,358,2,✅ no
elasticnet_cv,0.955 +/- 0.003 (in 3 folds),0.939 +/- 0.004 (in 3 folds),0.799,0.703,0.005587,358,2,✅ no
ridge_cv,0.954 +/- 0.005 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.799,0.704,0.005587,358,2,✅ no
lasso_multiclass,0.953 +/- 0.003 (in 3 folds),0.945 +/- 0.004 (in 3 folds),0.799,0.717,0.005587,358,2,✅ no
linearsvm_ovr,0.948 +/- 0.004 (in 3 folds),0.947 +/- 0.007 (in 3 folds),0.782,0.687,0.005587,358,2,✅ no
rf_multiclass,0.945 +/- 0.003 (in 3 folds),0.942 +/- 0.004 (in 3 folds),0.799,0.706,0.005587,358,2,✅ no
xgboost,0.942 +/- 0.002 (in 3 folds),0.940 +/- 0.001 (in 3 folds),0.774,0.67,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.958 +/- 0.002 (in 3 folds),0.943 +/- 0.002 (in 3 folds),0.818,0.737,0.005587,358,2,✅ no
ridge_cv,0.957 +/- 0.007 (in 3 folds),0.943 +/- 0.008 (in 3 folds),0.802,0.71,0.005587,358,2,✅ no
lasso_cv,0.955 +/- 0.004 (in 3 folds),0.941 +/- 0.007 (in 3 folds),0.793,0.697,0.005587,358,2,✅ no
rf_multiclass,0.953 +/- 0.009 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.802,0.71,0.005587,358,2,✅ no
xgboost,0.944 +/- 0.002 (in 3 folds),0.944 +/- 0.007 (in 3 folds),0.774,0.667,0.005587,358,2,✅ no
lasso_multiclass,0.930 +/- 0.024 (in 3 folds),0.921 +/- 0.024 (in 3 folds),0.804,0.727,0.005587,358,2,✅ no
linearsvm_ovr,0.888 +/- 0.031 (in 3 folds),0.890 +/- 0.018 (in 3 folds),0.751,0.638,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.922 +/- 0.017 (in 3 folds),0.916 +/- 0.026 (in 3 folds),0.726,0.593,0.005587,358,2,✅ no
xgboost,0.902 +/- 0.003 (in 3 folds),0.902 +/- 0.006 (in 3 folds),0.721,0.584,0.005587,358,2,✅ no
lasso_multiclass,0.870 +/- 0.034 (in 3 folds),0.863 +/- 0.044 (in 3 folds),0.684,0.551,0.005587,358,2,✅ no
linearsvm_ovr,0.859 +/- 0.024 (in 3 folds),0.860 +/- 0.029 (in 3 folds),0.687,0.552,0.005587,358,2,✅ no
ridge_cv,0.835 +/- 0.014 (in 3 folds),0.840 +/- 0.032 (in 3 folds),0.634,0.459,0.005587,358,2,✅ no
lasso_cv,0.832 +/- 0.010 (in 3 folds),0.844 +/- 0.033 (in 3 folds),0.687,0.541,0.005587,358,2,✅ no
elasticnet_cv,0.823 +/- 0.026 (in 3 folds),0.826 +/- 0.060 (in 3 folds),0.69,0.543,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.626,0.429,0.0,358,0,✅ no
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.693,0.552,0.0,358,0,✅ no
lasso_multiclass,0.856 +/- 0.026 (in 3 folds),0.850 +/- 0.012 (in 3 folds),0.651,0.525,0.0,358,0,✅ no
rf_multiclass,0.853 +/- 0.035 (in 3 folds),0.843 +/- 0.037 (in 3 folds),0.67,0.508,0.0,358,0,✅ no
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.656,0.522,0.0,358,0,✅ no
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.654,0.485,0.0,358,0,✅ no
xgboost,0.843 +/- 0.049 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.662,0.496,0.0,358,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.699 +/- 0.040 (in 3 folds),0.682 +/- 0.040 (in 3 folds),0.464,0.251,0.0,358,0,✅ no
xgboost,0.697 +/- 0.032 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.466,0.199,0.0,358,0,✅ no
lasso_multiclass,0.682 +/- 0.067 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.338,0.193,0.0,358,0,✅ no
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.441,0.144,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.48,0.133,0.0,358,0,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.397,0.089,0.0,358,0,❗ Missing classes!
xgboost,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
xgboost,0.790 +/- 0.032 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.665,0.504,0.0,358,0,✅ no
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.561,0.414,0.0,358,0,✅ no
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.679,0.533,0.0,358,0,❗ Missing classes!
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.556,0.406,0.0,358,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.715 +/- 0.041 (in 3 folds),0.734 +/- 0.017 (in 3 folds),0.745,0.562,0.006061,165,1,❗ Missing classes!
elasticnet_cv,0.710 +/- 0.014 (in 3 folds),0.739 +/- 0.033 (in 3 folds),0.739,0.545,0.006061,165,1,❗ Missing classes!
lasso_cv,0.699 +/- 0.017 (in 3 folds),0.727 +/- 0.041 (in 3 folds),0.727,0.513,0.006061,165,1,❗ Missing classes!
lasso_multiclass,0.694 +/- 0.033 (in 3 folds),0.728 +/- 0.017 (in 3 folds),0.461,0.276,0.006061,165,1,✅ no
linearsvm_ovr,0.687 +/- 0.013 (in 3 folds),0.726 +/- 0.012 (in 3 folds),0.503,0.253,0.006061,165,1,✅ no
rf_multiclass,0.668 +/- 0.013 (in 3 folds),0.700 +/- 0.014 (in 3 folds),0.655,0.39,0.006061,165,1,❗ Missing classes!
xgboost,0.630 +/- 0.034 (in 3 folds),0.696 +/- 0.015 (in 3 folds),0.527,0.254,0.006061,165,1,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.704 +/- 0.060 (in 3 folds),0.736 +/- 0.043 (in 3 folds),0.491,0.408,0.0,165,0,❗ Missing classes!
xgboost,0.696 +/- 0.074 (in 3 folds),0.721 +/- 0.064 (in 3 folds),0.412,0.289,0.0,165,0,❗ Missing classes!
elasticnet_cv,0.694 +/- 0.043 (in 3 folds),0.731 +/- 0.033 (in 3 folds),0.442,0.335,0.0,165,0,❗ Missing classes!
lasso_multiclass,0.689 +/- 0.049 (in 3 folds),0.719 +/- 0.027 (in 3 folds),0.436,0.324,0.0,165,0,✅ no
ridge_cv,0.673 +/- 0.036 (in 3 folds),0.714 +/- 0.025 (in 3 folds),0.448,0.337,0.0,165,0,❗ Missing classes!
rf_multiclass,0.659 +/- 0.030 (in 3 folds),0.704 +/- 0.025 (in 3 folds),0.424,0.305,0.0,165,0,❗ Missing classes!
linearsvm_ovr,0.654 +/- 0.030 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.37,0.237,0.0,165,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.777 +/- 0.049 (in 3 folds),0.896 +/- 0.009 (in 3 folds),0.685,0.403,0.018182,165,3,✅ no
linearsvm_ovr,0.755 +/- 0.055 (in 3 folds),0.886 +/- 0.009 (in 3 folds),0.673,0.371,0.018182,165,3,✅ no
xgboost,0.727 +/- 0.052 (in 3 folds),0.851 +/- 0.039 (in 3 folds),0.63,0.139,0.018182,165,3,✅ no
rf_multiclass,0.725 +/- 0.036 (in 3 folds),0.863 +/- 0.017 (in 3 folds),0.648,0.203,0.018182,165,3,✅ no
elasticnet_cv,0.687 +/- 0.168 (in 3 folds),0.817 +/- 0.130 (in 3 folds),0.685,0.268,0.018182,165,3,✅ no
lasso_cv,0.678 +/- 0.162 (in 3 folds),0.810 +/- 0.125 (in 3 folds),0.679,0.238,0.018182,165,3,✅ no
ridge_cv,0.678 +/- 0.157 (in 3 folds),0.813 +/- 0.126 (in 3 folds),0.648,0.16,0.018182,165,3,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.994 +/- 0.011 (in 3 folds),0.988 +/- 0.020 (in 3 folds),0.952,0.849,0.018182,165,3,✅ no
rf_multiclass,0.986 +/- 0.023 (in 3 folds),0.979 +/- 0.037 (in 3 folds),0.964,0.883,0.018182,165,3,✅ no
lasso_cv,0.984 +/- 0.028 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.952,0.84,0.018182,165,3,✅ no
lasso_multiclass,0.984 +/- 0.027 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.964,0.888,0.018182,165,3,✅ no
xgboost,0.982 +/- 0.029 (in 3 folds),0.977 +/- 0.031 (in 3 folds),0.964,0.883,0.018182,165,3,✅ no
elasticnet_cv,0.978 +/- 0.037 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.952,0.84,0.018182,165,3,✅ no
ridge_cv,0.978 +/- 0.037 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.958,0.862,0.018182,165,3,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.562 +/- 0.037 (in 3 folds),0.646 +/- 0.128 (in 3 folds),0.515,0.076,0.054545,165,9,✅ no
xgboost,0.516 +/- 0.075 (in 3 folds),0.603 +/- 0.129 (in 3 folds),0.497,0.037,0.054545,165,9,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
lasso_multiclass,0.487 +/- 0.053 (in 3 folds),0.545 +/- 0.063 (in 3 folds),0.479,0.002,0.054545,165,9,✅ no
linearsvm_ovr,0.482 +/- 0.043 (in 3 folds),0.553 +/- 0.050 (in 3 folds),0.491,0.026,0.054545,165,9,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.992 +/- 0.006 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.944,0.851,0.0,252,0,✅ no
lasso_multiclass,0.992 +/- 0.005 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.944,0.851,0.0,252,0,✅ no
ridge_cv,0.992 +/- 0.004 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.913,0.742,0.0,252,0,✅ no
elasticnet_cv,0.992 +/- 0.003 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.933,0.804,0.0,252,0,✅ no
lasso_cv,0.988 +/- 0.009 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.937,0.816,0.0,252,0,✅ no
rf_multiclass,0.986 +/- 0.009 (in 3 folds),0.996 +/- 0.002 (in 3 folds),0.944,0.843,0.0,252,0,✅ no
xgboost,0.981 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.933,0.811,0.0,252,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.937 +/- 0.006 (in 3 folds),0.973 +/- 0.004 (in 3 folds),0.866,0.723,0.017123,292,5,✅ no
lasso_cv,0.933 +/- 0.011 (in 3 folds),0.972 +/- 0.006 (in 3 folds),0.808,0.563,0.017123,292,5,✅ no
lasso_multiclass,0.933 +/- 0.010 (in 3 folds),0.973 +/- 0.005 (in 3 folds),0.853,0.693,0.017123,292,5,✅ no
elasticnet_cv,0.929 +/- 0.017 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.798,0.54,0.017123,292,5,✅ no
rf_multiclass,0.925 +/- 0.031 (in 3 folds),0.966 +/- 0.016 (in 3 folds),0.818,0.605,0.017123,292,5,✅ no
ridge_cv,0.924 +/- 0.025 (in 3 folds),0.970 +/- 0.010 (in 3 folds),0.805,0.558,0.017123,292,5,✅ no
xgboost,0.922 +/- 0.032 (in 3 folds),0.957 +/- 0.029 (in 3 folds),0.836,0.645,0.017123,292,5,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.973 +/- 0.012 (in 3 folds),0.940 +/- 0.017 (in 3 folds),0.888,0.693,0.01938,258,5,✅ no
lasso_multiclass,0.972 +/- 0.011 (in 3 folds),0.939 +/- 0.016 (in 3 folds),0.888,0.731,0.01938,258,5,✅ no
lasso_cv,0.971 +/- 0.016 (in 3 folds),0.933 +/- 0.030 (in 3 folds),0.884,0.681,0.01938,258,5,✅ no
ridge_cv,0.971 +/- 0.014 (in 3 folds),0.932 +/- 0.030 (in 3 folds),0.872,0.643,0.01938,258,5,✅ no
linearsvm_ovr,0.971 +/- 0.012 (in 3 folds),0.940 +/- 0.015 (in 3 folds),0.88,0.712,0.01938,258,5,✅ no
rf_multiclass,0.959 +/- 0.023 (in 3 folds),0.916 +/- 0.035 (in 3 folds),0.895,0.723,0.01938,258,5,✅ no
xgboost,0.948 +/- 0.034 (in 3 folds),0.912 +/- 0.040 (in 3 folds),0.899,0.73,0.01938,258,5,✅ no


In [6]:
if len(config.gene_loci_used) > 1:
    choose(config.gene_loci_used, config.classification_targets)

# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.983 +/- 0.005 (in 3 folds),0.980 +/- 0.006 (in 3 folds),0.879,0.826,0.016908,414,7,✅ no
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.979 +/- 0.006 (in 3 folds),0.884,0.83,0.016908,414,7,✅ no
ridge_cv,0.982 +/- 0.005 (in 3 folds),0.976 +/- 0.008 (in 3 folds),0.877,0.819,0.016908,414,7,✅ no
rf_multiclass,0.981 +/- 0.013 (in 3 folds),0.976 +/- 0.016 (in 3 folds),0.886,0.833,0.016908,414,7,✅ no
linearsvm_ovr,0.980 +/- 0.003 (in 3 folds),0.977 +/- 0.005 (in 3 folds),0.884,0.832,0.016908,414,7,✅ no
lasso_cv,0.976 +/- 0.010 (in 3 folds),0.975 +/- 0.007 (in 3 folds),0.882,0.826,0.016908,414,7,✅ no
xgboost,0.973 +/- 0.008 (in 3 folds),0.971 +/- 0.008 (in 3 folds),0.874,0.817,0.016908,414,7,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.707 +/- 0.008 (in 3 folds),0.713 +/- 0.017 (in 3 folds),0.543,0.261,0.0,414,0,❗ Missing classes!
lasso_multiclass,0.704 +/- 0.023 (in 3 folds),0.697 +/- 0.014 (in 3 folds),0.5,0.268,0.0,414,0,✅ no
rf_multiclass,0.702 +/- 0.019 (in 3 folds),0.684 +/- 0.017 (in 3 folds),0.548,0.302,0.0,414,0,✅ no
elasticnet_cv,0.702 +/- 0.003 (in 3 folds),0.703 +/- 0.017 (in 3 folds),0.539,0.252,0.0,414,0,❗ Missing classes!
linearsvm_ovr,0.695 +/- 0.020 (in 3 folds),0.686 +/- 0.015 (in 3 folds),0.502,0.234,0.0,414,0,✅ no
ridge_cv,0.691 +/- 0.022 (in 3 folds),0.683 +/- 0.023 (in 3 folds),0.522,0.217,0.0,414,0,❗ Missing classes!
xgboost,0.667 +/- 0.013 (in 3 folds),0.662 +/- 0.017 (in 3 folds),0.512,0.256,0.0,414,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.983 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.874,0.817,0.01676,358,6,✅ no
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.981 +/- 0.004 (in 3 folds),0.891,0.841,0.01676,358,6,✅ no
lasso_multiclass,0.981 +/- 0.007 (in 3 folds),0.981 +/- 0.006 (in 3 folds),0.874,0.823,0.01676,358,6,✅ no
lasso_cv,0.980 +/- 0.006 (in 3 folds),0.980 +/- 0.007 (in 3 folds),0.858,0.791,0.01676,358,6,✅ no
ridge_cv,0.979 +/- 0.004 (in 3 folds),0.976 +/- 0.005 (in 3 folds),0.902,0.858,0.01676,358,6,✅ no
xgboost,0.973 +/- 0.008 (in 3 folds),0.973 +/- 0.009 (in 3 folds),0.874,0.817,0.01676,358,6,✅ no
linearsvm_ovr,0.966 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.863,0.802,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.984 +/- 0.004 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.88,0.824,0.01676,358,6,✅ no
rf_multiclass,0.980 +/- 0.007 (in 3 folds),0.978 +/- 0.006 (in 3 folds),0.866,0.807,0.01676,358,6,✅ no
lasso_cv,0.978 +/- 0.007 (in 3 folds),0.974 +/- 0.011 (in 3 folds),0.835,0.759,0.01676,358,6,✅ no
xgboost,0.977 +/- 0.008 (in 3 folds),0.975 +/- 0.008 (in 3 folds),0.877,0.821,0.01676,358,6,✅ no
lasso_multiclass,0.975 +/- 0.008 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.866,0.809,0.01676,358,6,✅ no
ridge_cv,0.975 +/- 0.006 (in 3 folds),0.970 +/- 0.008 (in 3 folds),0.855,0.788,0.01676,358,6,✅ no
linearsvm_ovr,0.941 +/- 0.012 (in 3 folds),0.945 +/- 0.011 (in 3 folds),0.821,0.74,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.969 +/- 0.016 (in 3 folds),0.963 +/- 0.020 (in 3 folds),0.841,0.766,0.01676,358,6,✅ no
xgboost,0.951 +/- 0.024 (in 3 folds),0.946 +/- 0.029 (in 3 folds),0.788,0.689,0.01676,358,6,✅ no
lasso_multiclass,0.912 +/- 0.035 (in 3 folds),0.915 +/- 0.042 (in 3 folds),0.735,0.627,0.01676,358,6,✅ no
lasso_cv,0.894 +/- 0.020 (in 3 folds),0.911 +/- 0.024 (in 3 folds),0.735,0.606,0.01676,358,6,✅ no
linearsvm_ovr,0.893 +/- 0.058 (in 3 folds),0.894 +/- 0.065 (in 3 folds),0.732,0.615,0.01676,358,6,✅ no
elasticnet_cv,0.893 +/- 0.028 (in 3 folds),0.913 +/- 0.029 (in 3 folds),0.749,0.628,0.01676,358,6,✅ no
ridge_cv,0.882 +/- 0.025 (in 3 folds),0.895 +/- 0.027 (in 3 folds),0.743,0.622,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.626,0.429,0.0,358,0,✅ no
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.693,0.552,0.0,358,0,✅ no
rf_multiclass,0.856 +/- 0.031 (in 3 folds),0.845 +/- 0.033 (in 3 folds),0.665,0.5,0.0,358,0,✅ no
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.656,0.522,0.0,358,0,✅ no
lasso_multiclass,0.851 +/- 0.031 (in 3 folds),0.845 +/- 0.021 (in 3 folds),0.642,0.521,0.0,358,0,✅ no
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.654,0.485,0.0,358,0,✅ no
xgboost,0.843 +/- 0.049 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.662,0.496,0.0,358,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.704 +/- 0.036 (in 3 folds),0.686 +/- 0.036 (in 3 folds),0.466,0.25,0.0,358,0,✅ no
xgboost,0.697 +/- 0.032 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.466,0.199,0.0,358,0,✅ no
lasso_multiclass,0.681 +/- 0.067 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.338,0.193,0.0,358,0,✅ no
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.441,0.144,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.48,0.133,0.0,358,0,❗ Missing classes!


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.397,0.089,0.0,358,0,❗ Missing classes!
xgboost,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
xgboost,0.790 +/- 0.032 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.665,0.504,0.0,358,0,✅ no
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.564,0.42,0.0,358,0,✅ no
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.679,0.533,0.0,358,0,❗ Missing classes!
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.64,0.483,0.0,358,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.752 +/- 0.028 (in 3 folds),0.745 +/- 0.029 (in 3 folds),0.515,0.305,0.042424,165,7,✅ no
ridge_cv,0.734 +/- 0.055 (in 3 folds),0.743 +/- 0.016 (in 3 folds),0.618,0.309,0.042424,165,7,❗ Missing classes!
rf_multiclass,0.732 +/- 0.064 (in 3 folds),0.754 +/- 0.057 (in 3 folds),0.661,0.405,0.042424,165,7,❗ Missing classes!
elasticnet_cv,0.727 +/- 0.080 (in 3 folds),0.738 +/- 0.030 (in 3 folds),0.6,0.311,0.042424,165,7,✅ no
lasso_multiclass,0.721 +/- 0.080 (in 3 folds),0.731 +/- 0.028 (in 3 folds),0.558,0.364,0.042424,165,7,✅ no
xgboost,0.712 +/- 0.062 (in 3 folds),0.735 +/- 0.068 (in 3 folds),0.582,0.35,0.042424,165,7,✅ no
lasso_cv,0.691 +/- 0.088 (in 3 folds),0.712 +/- 0.041 (in 3 folds),0.63,0.305,0.042424,165,7,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.696 +/- 0.026 (in 3 folds),0.734 +/- 0.036 (in 3 folds),0.37,0.269,0.175758,165,29,❗ Missing classes!
lasso_cv,0.687 +/- 0.041 (in 3 folds),0.736 +/- 0.024 (in 3 folds),0.321,0.212,0.175758,165,29,❗ Missing classes!
ridge_cv,0.683 +/- 0.039 (in 3 folds),0.734 +/- 0.026 (in 3 folds),0.309,0.187,0.175758,165,29,❗ Missing classes!
xgboost,0.681 +/- 0.052 (in 3 folds),0.728 +/- 0.046 (in 3 folds),0.358,0.258,0.175758,165,29,✅ no
elasticnet_cv,0.666 +/- 0.058 (in 3 folds),0.727 +/- 0.028 (in 3 folds),0.285,0.157,0.175758,165,29,❗ Missing classes!
linearsvm_ovr,0.662 +/- 0.020 (in 3 folds),0.707 +/- 0.012 (in 3 folds),0.321,0.214,0.175758,165,29,❗ Missing classes!
lasso_multiclass,0.659 +/- 0.016 (in 3 folds),0.702 +/- 0.015 (in 3 folds),0.279,0.173,0.175758,165,29,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.748 +/- 0.071 (in 3 folds),0.859 +/- 0.041 (in 3 folds),0.588,0.279,0.133333,165,22,✅ no
rf_multiclass,0.719 +/- 0.106 (in 3 folds),0.822 +/- 0.088 (in 3 folds),0.582,0.186,0.133333,165,22,✅ no
linearsvm_ovr,0.700 +/- 0.053 (in 3 folds),0.833 +/- 0.018 (in 3 folds),0.545,0.182,0.133333,165,22,✅ no
xgboost,0.663 +/- 0.106 (in 3 folds),0.790 +/- 0.080 (in 3 folds),0.533,0.075,0.133333,165,22,✅ no
lasso_cv,0.661 +/- 0.156 (in 3 folds),0.778 +/- 0.132 (in 3 folds),0.576,0.136,0.133333,165,22,✅ no
elasticnet_cv,0.612 +/- 0.195 (in 3 folds),0.721 +/- 0.154 (in 3 folds),0.564,0.083,0.133333,165,22,✅ no
ridge_cv,0.593 +/- 0.162 (in 3 folds),0.714 +/- 0.143 (in 3 folds),0.558,0.058,0.133333,165,22,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.990 +/- 0.015 (in 2 folds),0.972 +/- 0.040 (in 2 folds),0.716,0.283,0.174312,109,19,✅ no
lasso_multiclass,0.989 +/- 0.015 (in 2 folds),0.976 +/- 0.034 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
xgboost,0.978 +/- 0.031 (in 2 folds),0.971 +/- 0.041 (in 2 folds),0.807,0.605,0.174312,109,19,✅ no
linearsvm_ovr,0.975 +/- 0.035 (in 2 folds),0.975 +/- 0.036 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
rf_multiclass,0.969 +/- 0.025 (in 2 folds),0.940 +/- 0.006 (in 2 folds),0.798,0.59,0.174312,109,19,✅ no
ridge_cv,0.967 +/- 0.046 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
elasticnet_cv,0.966 +/- 0.047 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.546 +/- 0.086 (in 3 folds),0.586 +/- 0.118 (in 3 folds),0.473,0.009,0.072727,165,12,✅ no
xgboost,0.542 +/- 0.090 (in 3 folds),0.595 +/- 0.115 (in 3 folds),0.473,0.009,0.072727,165,12,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
linearsvm_ovr,0.471 +/- 0.041 (in 3 folds),0.564 +/- 0.061 (in 3 folds),0.473,0.014,0.072727,165,12,✅ no
lasso_multiclass,0.457 +/- 0.055 (in 3 folds),0.545 +/- 0.083 (in 3 folds),0.436,-0.06,0.072727,165,12,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.999 +/- 0.001 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.948,0.854,0.015873,252,4,✅ no
ridge_cv,0.999 +/- 0.001 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.944,0.842,0.015873,252,4,✅ no
lasso_cv,0.998 +/- 0.002 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.956,0.878,0.015873,252,4,✅ no
lasso_multiclass,0.996 +/- 0.005 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.956,0.884,0.015873,252,4,✅ no
linearsvm_ovr,0.995 +/- 0.008 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.956,0.882,0.015873,252,4,✅ no
rf_multiclass,0.995 +/- 0.007 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.952,0.867,0.015873,252,4,✅ no
xgboost,0.990 +/- 0.008 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.94,0.834,0.015873,252,4,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.989 +/- 0.005 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.925,0.842,0.034247,292,10,✅ no
elasticnet_cv,0.989 +/- 0.005 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.904,0.792,0.034247,292,10,✅ no
ridge_cv,0.987 +/- 0.007 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.928,0.848,0.034247,292,10,✅ no
linearsvm_ovr,0.987 +/- 0.004 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.918,0.828,0.034247,292,10,✅ no
lasso_cv,0.986 +/- 0.005 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.908,0.8,0.034247,292,10,✅ no
rf_multiclass,0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.925,0.842,0.034247,292,10,✅ no
xgboost,0.978 +/- 0.006 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.911,0.817,0.034247,292,10,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.982 +/- 0.015 (in 3 folds),0.958 +/- 0.033 (in 3 folds),0.891,0.71,0.031008,258,8,✅ no
lasso_multiclass,0.980 +/- 0.015 (in 3 folds),0.953 +/- 0.033 (in 3 folds),0.888,0.721,0.031008,258,8,✅ no
ridge_cv,0.979 +/- 0.017 (in 3 folds),0.953 +/- 0.035 (in 3 folds),0.891,0.706,0.031008,258,8,✅ no
lasso_cv,0.975 +/- 0.020 (in 3 folds),0.946 +/- 0.042 (in 3 folds),0.903,0.749,0.031008,258,8,✅ no
rf_multiclass,0.974 +/- 0.026 (in 3 folds),0.950 +/- 0.039 (in 3 folds),0.903,0.741,0.031008,258,8,✅ no
linearsvm_ovr,0.974 +/- 0.020 (in 3 folds),0.941 +/- 0.042 (in 3 folds),0.884,0.717,0.031008,258,8,✅ no
xgboost,0.969 +/- 0.032 (in 3 folds),0.937 +/- 0.053 (in 3 folds),0.903,0.746,0.031008,258,8,✅ no


In [7]:
# Default
for target in [
    TargetObsColumnEnum.disease,
]:
    for gene_locus in config.gene_loci_used:
        choose(gene_locus, [target])
    choose(config.gene_loci_used, [target])

# GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.964 +/- 0.005 (in 3 folds),0.963 +/- 0.006 (in 3 folds),0.835,0.762,0.022917,480,11,✅ no
lasso_multiclass,0.960 +/- 0.006 (in 3 folds),0.959 +/- 0.008 (in 3 folds),0.827,0.753,0.022917,480,11,✅ no
rf_multiclass,0.959 +/- 0.009 (in 3 folds),0.954 +/- 0.014 (in 3 folds),0.831,0.755,0.022917,480,11,✅ no
elasticnet_cv,0.957 +/- 0.008 (in 3 folds),0.958 +/- 0.009 (in 3 folds),0.802,0.713,0.022917,480,11,✅ no
xgboost,0.953 +/- 0.005 (in 3 folds),0.951 +/- 0.009 (in 3 folds),0.812,0.728,0.022917,480,11,✅ no
lasso_cv,0.949 +/- 0.005 (in 3 folds),0.954 +/- 0.007 (in 3 folds),0.8,0.709,0.022917,480,11,✅ no
ridge_cv,0.948 +/- 0.005 (in 3 folds),0.951 +/- 0.006 (in 3 folds),0.802,0.713,0.022917,480,11,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.684 +/- 0.020 (in 3 folds),0.685 +/- 0.026 (in 3 folds),0.494,0.181,0.0,480,0,❗ Missing classes!
lasso_cv,0.676 +/- 0.023 (in 3 folds),0.681 +/- 0.032 (in 3 folds),0.508,0.215,0.0,480,0,✅ no
ridge_cv,0.675 +/- 0.022 (in 3 folds),0.672 +/- 0.018 (in 3 folds),0.494,0.183,0.0,480,0,❗ Missing classes!
linearsvm_ovr,0.674 +/- 0.020 (in 3 folds),0.666 +/- 0.020 (in 3 folds),0.477,0.194,0.0,480,0,✅ no
rf_multiclass,0.673 +/- 0.030 (in 3 folds),0.648 +/- 0.029 (in 3 folds),0.515,0.259,0.0,480,0,✅ no
lasso_multiclass,0.668 +/- 0.020 (in 3 folds),0.655 +/- 0.014 (in 3 folds),0.471,0.223,0.0,480,0,✅ no
xgboost,0.644 +/- 0.020 (in 3 folds),0.629 +/- 0.019 (in 3 folds),0.483,0.218,0.0,480,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.956 +/- 0.001 (in 3 folds),0.935 +/- 0.002 (in 3 folds),0.783,0.677,0.002415,414,1,✅ no
elasticnet_cv,0.952 +/- 0.001 (in 3 folds),0.936 +/- 0.003 (in 3 folds),0.795,0.697,0.002415,414,1,✅ no
lasso_multiclass,0.949 +/- 0.008 (in 3 folds),0.942 +/- 0.009 (in 3 folds),0.826,0.755,0.002415,414,1,✅ no
lasso_cv,0.947 +/- 0.008 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.771,0.659,0.002415,414,1,✅ no
rf_multiclass,0.947 +/- 0.006 (in 3 folds),0.939 +/- 0.007 (in 3 folds),0.773,0.665,0.002415,414,1,✅ no
xgboost,0.944 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.773,0.667,0.002415,414,1,✅ no
linearsvm_ovr,0.944 +/- 0.001 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.816,0.736,0.002415,414,1,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.983 +/- 0.005 (in 3 folds),0.980 +/- 0.006 (in 3 folds),0.879,0.826,0.016908,414,7,✅ no
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.979 +/- 0.006 (in 3 folds),0.884,0.83,0.016908,414,7,✅ no
ridge_cv,0.982 +/- 0.005 (in 3 folds),0.976 +/- 0.008 (in 3 folds),0.877,0.819,0.016908,414,7,✅ no
rf_multiclass,0.981 +/- 0.013 (in 3 folds),0.976 +/- 0.016 (in 3 folds),0.886,0.833,0.016908,414,7,✅ no
linearsvm_ovr,0.980 +/- 0.003 (in 3 folds),0.977 +/- 0.005 (in 3 folds),0.884,0.832,0.016908,414,7,✅ no
lasso_cv,0.976 +/- 0.010 (in 3 folds),0.975 +/- 0.007 (in 3 folds),0.882,0.826,0.016908,414,7,✅ no
xgboost,0.973 +/- 0.008 (in 3 folds),0.971 +/- 0.008 (in 3 folds),0.874,0.817,0.016908,414,7,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.707 +/- 0.008 (in 3 folds),0.713 +/- 0.017 (in 3 folds),0.543,0.261,0.0,414,0,❗ Missing classes!
lasso_multiclass,0.704 +/- 0.023 (in 3 folds),0.697 +/- 0.014 (in 3 folds),0.5,0.268,0.0,414,0,✅ no
rf_multiclass,0.702 +/- 0.019 (in 3 folds),0.684 +/- 0.017 (in 3 folds),0.548,0.302,0.0,414,0,✅ no
elasticnet_cv,0.702 +/- 0.003 (in 3 folds),0.703 +/- 0.017 (in 3 folds),0.539,0.252,0.0,414,0,❗ Missing classes!
linearsvm_ovr,0.695 +/- 0.020 (in 3 folds),0.686 +/- 0.015 (in 3 folds),0.502,0.234,0.0,414,0,✅ no
ridge_cv,0.691 +/- 0.022 (in 3 folds),0.683 +/- 0.023 (in 3 folds),0.522,0.217,0.0,414,0,❗ Missing classes!
xgboost,0.667 +/- 0.013 (in 3 folds),0.662 +/- 0.017 (in 3 folds),0.512,0.256,0.0,414,0,✅ no


In [8]:
# Demographic controlled
for target in [
    TargetObsColumnEnum.disease_all_demographics_present,
]:
    for gene_locus in config.gene_loci_used:
        choose(gene_locus, [target])
    choose(config.gene_loci_used, [target])

# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.959 +/- 0.009 (in 3 folds),0.957 +/- 0.010 (in 3 folds),0.821,0.744,0.014286,420,6,✅ no
lasso_multiclass,0.959 +/- 0.008 (in 3 folds),0.957 +/- 0.011 (in 3 folds),0.793,0.707,0.014286,420,6,✅ no
lasso_cv,0.956 +/- 0.009 (in 3 folds),0.954 +/- 0.013 (in 3 folds),0.833,0.756,0.014286,420,6,✅ no
elasticnet_cv,0.955 +/- 0.012 (in 3 folds),0.956 +/- 0.012 (in 3 folds),0.831,0.753,0.014286,420,6,✅ no
rf_multiclass,0.953 +/- 0.010 (in 3 folds),0.950 +/- 0.013 (in 3 folds),0.824,0.743,0.014286,420,6,✅ no
ridge_cv,0.949 +/- 0.011 (in 3 folds),0.951 +/- 0.013 (in 3 folds),0.833,0.758,0.014286,420,6,✅ no
xgboost,0.949 +/- 0.007 (in 3 folds),0.948 +/- 0.014 (in 3 folds),0.817,0.733,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.965 +/- 0.005 (in 3 folds),0.963 +/- 0.009 (in 3 folds),0.798,0.704,0.014286,420,6,✅ no
elasticnet_cv,0.965 +/- 0.004 (in 3 folds),0.966 +/- 0.007 (in 3 folds),0.814,0.731,0.014286,420,6,✅ no
xgboost,0.959 +/- 0.013 (in 3 folds),0.957 +/- 0.016 (in 3 folds),0.824,0.744,0.014286,420,6,✅ no
lasso_cv,0.956 +/- 0.006 (in 3 folds),0.958 +/- 0.010 (in 3 folds),0.821,0.74,0.014286,420,6,✅ no
ridge_cv,0.955 +/- 0.006 (in 3 folds),0.956 +/- 0.007 (in 3 folds),0.819,0.737,0.014286,420,6,✅ no
lasso_multiclass,0.953 +/- 0.013 (in 3 folds),0.955 +/- 0.010 (in 3 folds),0.807,0.732,0.014286,420,6,✅ no
linearsvm_ovr,0.912 +/- 0.007 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.783,0.688,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.929 +/- 0.014 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.79,0.693,0.014286,420,6,✅ no
xgboost,0.920 +/- 0.017 (in 3 folds),0.918 +/- 0.024 (in 3 folds),0.762,0.652,0.014286,420,6,✅ no
lasso_multiclass,0.879 +/- 0.036 (in 3 folds),0.893 +/- 0.034 (in 3 folds),0.719,0.612,0.014286,420,6,✅ no
linearsvm_ovr,0.876 +/- 0.035 (in 3 folds),0.892 +/- 0.035 (in 3 folds),0.738,0.626,0.014286,420,6,✅ no
lasso_cv,0.864 +/- 0.032 (in 3 folds),0.894 +/- 0.028 (in 3 folds),0.762,0.649,0.014286,420,6,✅ no
elasticnet_cv,0.862 +/- 0.033 (in 3 folds),0.895 +/- 0.026 (in 3 folds),0.76,0.646,0.014286,420,6,✅ no
ridge_cv,0.861 +/- 0.037 (in 3 folds),0.893 +/- 0.025 (in 3 folds),0.738,0.614,0.014286,420,6,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.814 +/- 0.033 (in 3 folds),0.806 +/- 0.029 (in 3 folds),0.593,0.404,0.0,420,0,✅ no
ridge_cv,0.812 +/- 0.030 (in 3 folds),0.798 +/- 0.028 (in 3 folds),0.569,0.349,0.0,420,0,✅ no
lasso_multiclass,0.811 +/- 0.024 (in 3 folds),0.798 +/- 0.018 (in 3 folds),0.579,0.451,0.0,420,0,✅ no
elasticnet_cv,0.809 +/- 0.035 (in 3 folds),0.797 +/- 0.032 (in 3 folds),0.571,0.362,0.0,420,0,✅ no
linearsvm_ovr,0.809 +/- 0.028 (in 3 folds),0.798 +/- 0.023 (in 3 folds),0.583,0.438,0.0,420,0,✅ no
xgboost,0.803 +/- 0.042 (in 3 folds),0.806 +/- 0.033 (in 3 folds),0.574,0.37,0.0,420,0,✅ no
lasso_cv,0.797 +/- 0.041 (in 3 folds),0.784 +/- 0.029 (in 3 folds),0.557,0.342,0.0,420,0,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.670 +/- 0.025 (in 3 folds),0.660 +/- 0.022 (in 3 folds),0.419,0.187,0.0,420,0,✅ no
xgboost,0.667 +/- 0.015 (in 3 folds),0.654 +/- 0.006 (in 3 folds),0.429,0.14,0.0,420,0,✅ no
lasso_cv,0.638 +/- 0.020 (in 3 folds),0.649 +/- 0.028 (in 3 folds),0.476,0.127,0.0,420,0,❗ Missing classes!
elasticnet_cv,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.469,0.101,0.0,420,0,❗ Missing classes!
linearsvm_ovr,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.407,0.112,0.0,420,0,❗ Missing classes!
ridge_cv,0.628 +/- 0.020 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.46,0.084,0.0,420,0,❗ Missing classes!
lasso_multiclass,0.627 +/- 0.029 (in 3 folds),0.636 +/- 0.031 (in 3 folds),0.283,0.102,0.0,420,0,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.574 +/- 0.022 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.357,0.152,0.0,420,0,❗ Missing classes!
lasso_multiclass,0.574 +/- 0.022 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.357,0.152,0.0,420,0,❗ Missing classes!
linearsvm_ovr,0.563 +/- 0.025 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
xgboost,0.563 +/- 0.025 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
lasso_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
elasticnet_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!
ridge_cv,0.543 +/- 0.040 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.407,0.088,0.0,420,0,❗ Missing classes!


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.751 +/- 0.014 (in 3 folds),0.724 +/- 0.015 (in 3 folds),0.526,0.36,0.0,420,0,✅ no
rf_multiclass,0.750 +/- 0.015 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.526,0.36,0.0,420,0,✅ no
xgboost,0.748 +/- 0.022 (in 3 folds),0.725 +/- 0.017 (in 3 folds),0.595,0.421,0.0,420,0,✅ no
ridge_cv,0.748 +/- 0.017 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.557,0.334,0.0,420,0,✅ no
elasticnet_cv,0.747 +/- 0.022 (in 3 folds),0.724 +/- 0.017 (in 3 folds),0.557,0.334,0.0,420,0,✅ no
linearsvm_ovr,0.741 +/- 0.027 (in 3 folds),0.721 +/- 0.018 (in 3 folds),0.595,0.441,0.0,420,0,❗ Missing classes!
lasso_cv,0.739 +/- 0.029 (in 3 folds),0.722 +/- 0.026 (in 3 folds),0.552,0.327,0.0,420,0,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.956 +/- 0.003 (in 3 folds),0.945 +/- 0.006 (in 3 folds),0.804,0.713,0.005587,358,2,✅ no
elasticnet_cv,0.955 +/- 0.003 (in 3 folds),0.939 +/- 0.004 (in 3 folds),0.799,0.703,0.005587,358,2,✅ no
ridge_cv,0.954 +/- 0.005 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.799,0.704,0.005587,358,2,✅ no
lasso_multiclass,0.953 +/- 0.003 (in 3 folds),0.945 +/- 0.004 (in 3 folds),0.799,0.717,0.005587,358,2,✅ no
linearsvm_ovr,0.948 +/- 0.004 (in 3 folds),0.947 +/- 0.007 (in 3 folds),0.782,0.687,0.005587,358,2,✅ no
rf_multiclass,0.945 +/- 0.003 (in 3 folds),0.942 +/- 0.004 (in 3 folds),0.799,0.706,0.005587,358,2,✅ no
xgboost,0.942 +/- 0.002 (in 3 folds),0.940 +/- 0.001 (in 3 folds),0.774,0.67,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.958 +/- 0.002 (in 3 folds),0.943 +/- 0.002 (in 3 folds),0.818,0.737,0.005587,358,2,✅ no
ridge_cv,0.957 +/- 0.007 (in 3 folds),0.943 +/- 0.008 (in 3 folds),0.802,0.71,0.005587,358,2,✅ no
lasso_cv,0.955 +/- 0.004 (in 3 folds),0.941 +/- 0.007 (in 3 folds),0.793,0.697,0.005587,358,2,✅ no
rf_multiclass,0.953 +/- 0.009 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.802,0.71,0.005587,358,2,✅ no
xgboost,0.944 +/- 0.002 (in 3 folds),0.944 +/- 0.007 (in 3 folds),0.774,0.667,0.005587,358,2,✅ no
lasso_multiclass,0.930 +/- 0.024 (in 3 folds),0.921 +/- 0.024 (in 3 folds),0.804,0.727,0.005587,358,2,✅ no
linearsvm_ovr,0.888 +/- 0.031 (in 3 folds),0.890 +/- 0.018 (in 3 folds),0.751,0.638,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.922 +/- 0.017 (in 3 folds),0.916 +/- 0.026 (in 3 folds),0.726,0.593,0.005587,358,2,✅ no
xgboost,0.902 +/- 0.003 (in 3 folds),0.902 +/- 0.006 (in 3 folds),0.721,0.584,0.005587,358,2,✅ no
lasso_multiclass,0.870 +/- 0.034 (in 3 folds),0.863 +/- 0.044 (in 3 folds),0.684,0.551,0.005587,358,2,✅ no
linearsvm_ovr,0.859 +/- 0.024 (in 3 folds),0.860 +/- 0.029 (in 3 folds),0.687,0.552,0.005587,358,2,✅ no
ridge_cv,0.835 +/- 0.014 (in 3 folds),0.840 +/- 0.032 (in 3 folds),0.634,0.459,0.005587,358,2,✅ no
lasso_cv,0.832 +/- 0.010 (in 3 folds),0.844 +/- 0.033 (in 3 folds),0.687,0.541,0.005587,358,2,✅ no
elasticnet_cv,0.823 +/- 0.026 (in 3 folds),0.826 +/- 0.060 (in 3 folds),0.69,0.543,0.005587,358,2,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.626,0.429,0.0,358,0,✅ no
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.693,0.552,0.0,358,0,✅ no
lasso_multiclass,0.856 +/- 0.026 (in 3 folds),0.850 +/- 0.012 (in 3 folds),0.651,0.525,0.0,358,0,✅ no
rf_multiclass,0.853 +/- 0.035 (in 3 folds),0.843 +/- 0.037 (in 3 folds),0.67,0.508,0.0,358,0,✅ no
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.656,0.522,0.0,358,0,✅ no
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.654,0.485,0.0,358,0,✅ no
xgboost,0.843 +/- 0.049 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.662,0.496,0.0,358,0,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.699 +/- 0.040 (in 3 folds),0.682 +/- 0.040 (in 3 folds),0.464,0.251,0.0,358,0,✅ no
xgboost,0.697 +/- 0.032 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.466,0.199,0.0,358,0,✅ no
lasso_multiclass,0.682 +/- 0.067 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.338,0.193,0.0,358,0,✅ no
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.441,0.144,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.48,0.133,0.0,358,0,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.397,0.089,0.0,358,0,❗ Missing classes!
xgboost,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
xgboost,0.790 +/- 0.032 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.665,0.504,0.0,358,0,✅ no
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.561,0.414,0.0,358,0,✅ no
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.679,0.533,0.0,358,0,❗ Missing classes!
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.556,0.406,0.0,358,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.983 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.874,0.817,0.01676,358,6,✅ no
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.981 +/- 0.004 (in 3 folds),0.891,0.841,0.01676,358,6,✅ no
lasso_multiclass,0.981 +/- 0.007 (in 3 folds),0.981 +/- 0.006 (in 3 folds),0.874,0.823,0.01676,358,6,✅ no
lasso_cv,0.980 +/- 0.006 (in 3 folds),0.980 +/- 0.007 (in 3 folds),0.858,0.791,0.01676,358,6,✅ no
ridge_cv,0.979 +/- 0.004 (in 3 folds),0.976 +/- 0.005 (in 3 folds),0.902,0.858,0.01676,358,6,✅ no
xgboost,0.973 +/- 0.008 (in 3 folds),0.973 +/- 0.009 (in 3 folds),0.874,0.817,0.01676,358,6,✅ no
linearsvm_ovr,0.966 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.863,0.802,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.984 +/- 0.004 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.88,0.824,0.01676,358,6,✅ no
rf_multiclass,0.980 +/- 0.007 (in 3 folds),0.978 +/- 0.006 (in 3 folds),0.866,0.807,0.01676,358,6,✅ no
lasso_cv,0.978 +/- 0.007 (in 3 folds),0.974 +/- 0.011 (in 3 folds),0.835,0.759,0.01676,358,6,✅ no
xgboost,0.977 +/- 0.008 (in 3 folds),0.975 +/- 0.008 (in 3 folds),0.877,0.821,0.01676,358,6,✅ no
lasso_multiclass,0.975 +/- 0.008 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.866,0.809,0.01676,358,6,✅ no
ridge_cv,0.975 +/- 0.006 (in 3 folds),0.970 +/- 0.008 (in 3 folds),0.855,0.788,0.01676,358,6,✅ no
linearsvm_ovr,0.941 +/- 0.012 (in 3 folds),0.945 +/- 0.011 (in 3 folds),0.821,0.74,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.969 +/- 0.016 (in 3 folds),0.963 +/- 0.020 (in 3 folds),0.841,0.766,0.01676,358,6,✅ no
xgboost,0.951 +/- 0.024 (in 3 folds),0.946 +/- 0.029 (in 3 folds),0.788,0.689,0.01676,358,6,✅ no
lasso_multiclass,0.912 +/- 0.035 (in 3 folds),0.915 +/- 0.042 (in 3 folds),0.735,0.627,0.01676,358,6,✅ no
lasso_cv,0.894 +/- 0.020 (in 3 folds),0.911 +/- 0.024 (in 3 folds),0.735,0.606,0.01676,358,6,✅ no
linearsvm_ovr,0.893 +/- 0.058 (in 3 folds),0.894 +/- 0.065 (in 3 folds),0.732,0.615,0.01676,358,6,✅ no
elasticnet_cv,0.893 +/- 0.028 (in 3 folds),0.913 +/- 0.029 (in 3 folds),0.749,0.628,0.01676,358,6,✅ no
ridge_cv,0.882 +/- 0.025 (in 3 folds),0.895 +/- 0.027 (in 3 folds),0.743,0.622,0.01676,358,6,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.626,0.429,0.0,358,0,✅ no
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.693,0.552,0.0,358,0,✅ no
rf_multiclass,0.856 +/- 0.031 (in 3 folds),0.845 +/- 0.033 (in 3 folds),0.665,0.5,0.0,358,0,✅ no
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.656,0.522,0.0,358,0,✅ no
lasso_multiclass,0.851 +/- 0.031 (in 3 folds),0.845 +/- 0.021 (in 3 folds),0.642,0.521,0.0,358,0,✅ no
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.654,0.485,0.0,358,0,✅ no
xgboost,0.843 +/- 0.049 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.662,0.496,0.0,358,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.704 +/- 0.036 (in 3 folds),0.686 +/- 0.036 (in 3 folds),0.466,0.25,0.0,358,0,✅ no
xgboost,0.697 +/- 0.032 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.466,0.199,0.0,358,0,✅ no
lasso_multiclass,0.681 +/- 0.067 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.338,0.193,0.0,358,0,✅ no
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.441,0.144,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.472,0.11,0.0,358,0,❗ Missing classes!
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.48,0.133,0.0,358,0,❗ Missing classes!


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.397,0.089,0.0,358,0,❗ Missing classes!
xgboost,0.573 +/- 0.019 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.332,0.11,0.0,358,0,✅ no
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.461,0.0,0.0,358,0,❗ Missing classes!


# GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
xgboost,0.790 +/- 0.032 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.665,0.504,0.0,358,0,✅ no
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.564,0.42,0.0,358,0,✅ no
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.679,0.533,0.0,358,0,❗ Missing classes!
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.659,0.495,0.0,358,0,❗ Missing classes!
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.64,0.483,0.0,358,0,✅ no


In [9]:
# Demographics from healthy
for target in [
    TargetObsColumnEnum.ethnicity_condensed_healthy_only,
    TargetObsColumnEnum.age_group_healthy_only,
    TargetObsColumnEnum.age_group_binary_healthy_only,
    TargetObsColumnEnum.age_group_pediatric_healthy_only,
    TargetObsColumnEnum.sex_healthy_only,
]:
    for gene_locus in config.gene_loci_used:
        choose(gene_locus, [target])
    choose(config.gene_loci_used, [target])

# GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
elasticnet_cv,0.746 +/- 0.059 (in 3 folds),0.761 +/- 0.030 (in 3 folds),0.602,0.311,0.04712,191,9,❗ Missing classes!
ridge_cv,0.736 +/- 0.043 (in 3 folds),0.745 +/- 0.015 (in 3 folds),0.686,0.436,0.04712,191,9,❗ Missing classes!
linearsvm_ovr,0.721 +/- 0.043 (in 3 folds),0.722 +/- 0.031 (in 3 folds),0.513,0.301,0.04712,191,9,✅ no
rf_multiclass,0.716 +/- 0.081 (in 3 folds),0.734 +/- 0.076 (in 3 folds),0.67,0.446,0.04712,191,9,✅ no
xgboost,0.695 +/- 0.070 (in 3 folds),0.721 +/- 0.056 (in 3 folds),0.607,0.369,0.04712,191,9,✅ no
lasso_cv,0.690 +/- 0.022 (in 3 folds),0.715 +/- 0.020 (in 3 folds),0.681,0.435,0.04712,191,9,❗ Missing classes!
lasso_multiclass,0.686 +/- 0.086 (in 3 folds),0.711 +/- 0.030 (in 3 folds),0.487,0.331,0.04712,191,9,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
ridge_cv,0.715 +/- 0.041 (in 3 folds),0.734 +/- 0.017 (in 3 folds),0.745,0.562,0.006061,165,1,❗ Missing classes!
elasticnet_cv,0.710 +/- 0.014 (in 3 folds),0.739 +/- 0.033 (in 3 folds),0.739,0.545,0.006061,165,1,❗ Missing classes!
lasso_cv,0.699 +/- 0.017 (in 3 folds),0.727 +/- 0.041 (in 3 folds),0.727,0.513,0.006061,165,1,❗ Missing classes!
lasso_multiclass,0.694 +/- 0.033 (in 3 folds),0.728 +/- 0.017 (in 3 folds),0.461,0.276,0.006061,165,1,✅ no
linearsvm_ovr,0.687 +/- 0.013 (in 3 folds),0.726 +/- 0.012 (in 3 folds),0.503,0.253,0.006061,165,1,✅ no
rf_multiclass,0.668 +/- 0.013 (in 3 folds),0.700 +/- 0.014 (in 3 folds),0.655,0.39,0.006061,165,1,❗ Missing classes!
xgboost,0.630 +/- 0.034 (in 3 folds),0.696 +/- 0.015 (in 3 folds),0.527,0.254,0.006061,165,1,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.752 +/- 0.028 (in 3 folds),0.745 +/- 0.029 (in 3 folds),0.515,0.305,0.042424,165,7,✅ no
ridge_cv,0.734 +/- 0.055 (in 3 folds),0.743 +/- 0.016 (in 3 folds),0.618,0.309,0.042424,165,7,❗ Missing classes!
rf_multiclass,0.732 +/- 0.064 (in 3 folds),0.754 +/- 0.057 (in 3 folds),0.661,0.405,0.042424,165,7,❗ Missing classes!
elasticnet_cv,0.727 +/- 0.080 (in 3 folds),0.738 +/- 0.030 (in 3 folds),0.6,0.311,0.042424,165,7,✅ no
lasso_multiclass,0.721 +/- 0.080 (in 3 folds),0.731 +/- 0.028 (in 3 folds),0.558,0.364,0.042424,165,7,✅ no
xgboost,0.712 +/- 0.062 (in 3 folds),0.735 +/- 0.068 (in 3 folds),0.582,0.35,0.042424,165,7,✅ no
lasso_cv,0.691 +/- 0.088 (in 3 folds),0.712 +/- 0.041 (in 3 folds),0.63,0.305,0.042424,165,7,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.664 +/- 0.015 (in 3 folds),0.697 +/- 0.016 (in 3 folds),0.293,0.181,0.172775,191,33,✅ no
lasso_multiclass,0.635 +/- 0.039 (in 3 folds),0.678 +/- 0.015 (in 3 folds),0.236,0.125,0.172775,191,33,✅ no
lasso_cv,0.631 +/- 0.031 (in 3 folds),0.663 +/- 0.015 (in 3 folds),0.257,0.131,0.172775,191,33,❗ Missing classes!
elasticnet_cv,0.630 +/- 0.039 (in 3 folds),0.667 +/- 0.020 (in 3 folds),0.246,0.114,0.172775,191,33,❗ Missing classes!
linearsvm_ovr,0.615 +/- 0.029 (in 3 folds),0.674 +/- 0.013 (in 3 folds),0.246,0.136,0.172775,191,33,✅ no
ridge_cv,0.607 +/- 0.095 (in 3 folds),0.622 +/- 0.109 (in 3 folds),0.246,0.119,0.172775,191,33,❗ Missing classes!
xgboost,0.600 +/- 0.035 (in 3 folds),0.664 +/- 0.035 (in 3 folds),0.23,0.113,0.172775,191,33,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global,MCC global,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.704 +/- 0.060 (in 3 folds),0.736 +/- 0.043 (in 3 folds),0.491,0.408,0.0,165,0,❗ Missing classes!
xgboost,0.696 +/- 0.074 (in 3 folds),0.721 +/- 0.064 (in 3 folds),0.412,0.289,0.0,165,0,❗ Missing classes!
elasticnet_cv,0.694 +/- 0.043 (in 3 folds),0.731 +/- 0.033 (in 3 folds),0.442,0.335,0.0,165,0,❗ Missing classes!
lasso_multiclass,0.689 +/- 0.049 (in 3 folds),0.719 +/- 0.027 (in 3 folds),0.436,0.324,0.0,165,0,✅ no
ridge_cv,0.673 +/- 0.036 (in 3 folds),0.714 +/- 0.025 (in 3 folds),0.448,0.337,0.0,165,0,❗ Missing classes!
rf_multiclass,0.659 +/- 0.030 (in 3 folds),0.704 +/- 0.025 (in 3 folds),0.424,0.305,0.0,165,0,❗ Missing classes!
linearsvm_ovr,0.654 +/- 0.030 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.37,0.237,0.0,165,0,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.696 +/- 0.026 (in 3 folds),0.734 +/- 0.036 (in 3 folds),0.37,0.269,0.175758,165,29,❗ Missing classes!
lasso_cv,0.687 +/- 0.041 (in 3 folds),0.736 +/- 0.024 (in 3 folds),0.321,0.212,0.175758,165,29,❗ Missing classes!
ridge_cv,0.683 +/- 0.039 (in 3 folds),0.734 +/- 0.026 (in 3 folds),0.309,0.187,0.175758,165,29,❗ Missing classes!
xgboost,0.681 +/- 0.052 (in 3 folds),0.728 +/- 0.046 (in 3 folds),0.358,0.258,0.175758,165,29,✅ no
elasticnet_cv,0.666 +/- 0.058 (in 3 folds),0.727 +/- 0.028 (in 3 folds),0.285,0.157,0.175758,165,29,❗ Missing classes!
linearsvm_ovr,0.662 +/- 0.020 (in 3 folds),0.707 +/- 0.012 (in 3 folds),0.321,0.214,0.175758,165,29,❗ Missing classes!
lasso_multiclass,0.659 +/- 0.016 (in 3 folds),0.702 +/- 0.015 (in 3 folds),0.279,0.173,0.175758,165,29,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.657 +/- 0.051 (in 3 folds),0.770 +/- 0.084 (in 3 folds),0.534,0.175,0.13089,191,25,✅ no
lasso_multiclass,0.654 +/- 0.044 (in 3 folds),0.765 +/- 0.083 (in 3 folds),0.545,0.194,0.13089,191,25,✅ no
rf_multiclass,0.616 +/- 0.117 (in 3 folds),0.722 +/- 0.141 (in 3 folds),0.55,0.121,0.13089,191,25,✅ no
xgboost,0.538 +/- 0.128 (in 3 folds),0.632 +/- 0.094 (in 3 folds),0.529,0.103,0.13089,191,25,✅ no
elasticnet_cv,0.514 +/- 0.024 (in 3 folds),0.640 +/- 0.088 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.534,-0.051,0.13089,191,25,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.777 +/- 0.049 (in 3 folds),0.896 +/- 0.009 (in 3 folds),0.685,0.403,0.018182,165,3,✅ no
linearsvm_ovr,0.755 +/- 0.055 (in 3 folds),0.886 +/- 0.009 (in 3 folds),0.673,0.371,0.018182,165,3,✅ no
xgboost,0.727 +/- 0.052 (in 3 folds),0.851 +/- 0.039 (in 3 folds),0.63,0.139,0.018182,165,3,✅ no
rf_multiclass,0.725 +/- 0.036 (in 3 folds),0.863 +/- 0.017 (in 3 folds),0.648,0.203,0.018182,165,3,✅ no
elasticnet_cv,0.687 +/- 0.168 (in 3 folds),0.817 +/- 0.130 (in 3 folds),0.685,0.268,0.018182,165,3,✅ no
lasso_cv,0.678 +/- 0.162 (in 3 folds),0.810 +/- 0.125 (in 3 folds),0.679,0.238,0.018182,165,3,✅ no
ridge_cv,0.678 +/- 0.157 (in 3 folds),0.813 +/- 0.126 (in 3 folds),0.648,0.16,0.018182,165,3,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_multiclass,0.748 +/- 0.071 (in 3 folds),0.859 +/- 0.041 (in 3 folds),0.588,0.279,0.133333,165,22,✅ no
rf_multiclass,0.719 +/- 0.106 (in 3 folds),0.822 +/- 0.088 (in 3 folds),0.582,0.186,0.133333,165,22,✅ no
linearsvm_ovr,0.700 +/- 0.053 (in 3 folds),0.833 +/- 0.018 (in 3 folds),0.545,0.182,0.133333,165,22,✅ no
xgboost,0.663 +/- 0.106 (in 3 folds),0.790 +/- 0.080 (in 3 folds),0.533,0.075,0.133333,165,22,✅ no
lasso_cv,0.661 +/- 0.156 (in 3 folds),0.778 +/- 0.132 (in 3 folds),0.576,0.136,0.133333,165,22,✅ no
elasticnet_cv,0.612 +/- 0.195 (in 3 folds),0.721 +/- 0.154 (in 3 folds),0.564,0.083,0.133333,165,22,✅ no
ridge_cv,0.593 +/- 0.162 (in 3 folds),0.714 +/- 0.143 (in 3 folds),0.558,0.058,0.133333,165,22,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.951 +/- 0.063 (in 2 folds),0.906 +/- 0.100 (in 2 folds),0.672,0.41,0.184,125,23,✅ no
lasso_multiclass,0.948 +/- 0.061 (in 2 folds),0.891 +/- 0.081 (in 2 folds),0.68,0.419,0.184,125,23,✅ no
rf_multiclass,0.926 +/- 0.102 (in 2 folds),0.905 +/- 0.101 (in 2 folds),0.768,0.475,0.184,125,23,✅ no
xgboost,0.922 +/- 0.095 (in 2 folds),0.883 +/- 0.068 (in 2 folds),0.768,0.493,0.184,125,23,✅ no
lasso_cv,0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.704,0.216,0.184,125,23,✅ no
elasticnet_cv,0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.664,-0.006,0.184,125,23,❗ Missing classes!
ridge_cv,0.500 +/- 0.000 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.664,-0.006,0.184,125,23,❗ Missing classes!


# GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
linearsvm_ovr,0.994 +/- 0.011 (in 3 folds),0.988 +/- 0.020 (in 3 folds),0.952,0.849,0.018182,165,3,✅ no
rf_multiclass,0.986 +/- 0.023 (in 3 folds),0.979 +/- 0.037 (in 3 folds),0.964,0.883,0.018182,165,3,✅ no
lasso_cv,0.984 +/- 0.028 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.952,0.84,0.018182,165,3,✅ no
lasso_multiclass,0.984 +/- 0.027 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.964,0.888,0.018182,165,3,✅ no
xgboost,0.982 +/- 0.029 (in 3 folds),0.977 +/- 0.031 (in 3 folds),0.964,0.883,0.018182,165,3,✅ no
elasticnet_cv,0.978 +/- 0.037 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.952,0.84,0.018182,165,3,✅ no
ridge_cv,0.978 +/- 0.037 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.958,0.862,0.018182,165,3,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
lasso_cv,0.990 +/- 0.015 (in 2 folds),0.972 +/- 0.040 (in 2 folds),0.716,0.283,0.174312,109,19,✅ no
lasso_multiclass,0.989 +/- 0.015 (in 2 folds),0.976 +/- 0.034 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
xgboost,0.978 +/- 0.031 (in 2 folds),0.971 +/- 0.041 (in 2 folds),0.807,0.605,0.174312,109,19,✅ no
linearsvm_ovr,0.975 +/- 0.035 (in 2 folds),0.975 +/- 0.036 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
rf_multiclass,0.969 +/- 0.025 (in 2 folds),0.940 +/- 0.006 (in 2 folds),0.798,0.59,0.174312,109,19,✅ no
ridge_cv,0.967 +/- 0.046 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no
elasticnet_cv,0.966 +/- 0.047 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.78,0.514,0.174312,109,19,✅ no


# GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.560 +/- 0.101 (in 3 folds),0.565 +/- 0.138 (in 3 folds),0.482,-0.013,0.026178,191,5,✅ no
linearsvm_ovr,0.514 +/- 0.049 (in 3 folds),0.525 +/- 0.120 (in 3 folds),0.492,0.012,0.026178,191,5,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.476,-0.031,0.026178,191,5,✅ no
lasso_multiclass,0.496 +/- 0.059 (in 3 folds),0.514 +/- 0.133 (in 3 folds),0.492,0.011,0.026178,191,5,✅ no
xgboost,0.494 +/- 0.107 (in 3 folds),0.519 +/- 0.153 (in 3 folds),0.466,-0.044,0.026178,191,5,✅ no


# GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.562 +/- 0.037 (in 3 folds),0.646 +/- 0.128 (in 3 folds),0.515,0.076,0.054545,165,9,✅ no
xgboost,0.516 +/- 0.075 (in 3 folds),0.603 +/- 0.129 (in 3 folds),0.497,0.037,0.054545,165,9,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.424,-0.073,0.054545,165,9,✅ no
lasso_multiclass,0.487 +/- 0.053 (in 3 folds),0.545 +/- 0.063 (in 3 folds),0.479,0.002,0.054545,165,9,✅ no
linearsvm_ovr,0.482 +/- 0.043 (in 3 folds),0.553 +/- 0.050 (in 3 folds),0.491,0.026,0.054545,165,9,✅ no


# GeneLocus.BCR|TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default

Unnamed: 0,ROC-AUC (weighted OvO) per fold,au-PRC (weighted OvO) per fold,Accuracy global with abstention,MCC global with abstention,abstention_rate,sample_size including abstentions,n_abstentions,missing_classes
rf_multiclass,0.546 +/- 0.086 (in 3 folds),0.586 +/- 0.118 (in 3 folds),0.473,0.009,0.072727,165,12,✅ no
xgboost,0.542 +/- 0.090 (in 3 folds),0.595 +/- 0.115 (in 3 folds),0.473,0.009,0.072727,165,12,✅ no
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.418,-0.067,0.072727,165,12,✅ no
linearsvm_ovr,0.471 +/- 0.041 (in 3 folds),0.564 +/- 0.061 (in 3 folds),0.473,0.014,0.072727,165,12,✅ no
lasso_multiclass,0.457 +/- 0.055 (in 3 folds),0.545 +/- 0.083 (in 3 folds),0.436,-0.06,0.072727,165,12,✅ no
