In [1]:
from summarynb import show, indexed_csv, table, chunks
from malid.external.summarynb_extras import plaintext, empty
from malid import config, logger
from malid.datamodels import (
    TargetObsColumnEnum,
    combine_classification_option_names,
)
import pandas as pd
from IPython.display import display, Markdown

In [2]:
fold_label_train, fold_label_validation = config.get_fold_split_labels()

# Summary statistics of the repertoire --> classifiers

## Features for each specimen's repertoire, per isotype:

Derived from same sampled versions of repertoires used for other models:

* Top 15 PCs from V-J gene use counts
* Median sequence SHM rate
* Proportion of sequences at least 1% mutated

All standardized to zero mean and unit variance.

_Some models will not have feature importances._

In [3]:
for gene_locus in config.gene_loci_used:
    # map target_obs_column to results_output_prefix
    targets = {}
    for target in config.classification_targets:
        targets[target] = (
            config.paths.repertoire_stats_classifier_output_dir
            / gene_locus.name
            / combine_classification_option_names(target)
            / "train_smaller_model"
        )

    print(targets)

    for target_obs_column, results_output_prefix in targets.items():
        display(
            Markdown(
                f"# {gene_locus}, {target_obs_column} trained on {fold_label_train} set"
            )
        )

        display(Markdown(f"## Specimen predictions on {fold_label_validation} set"))
        try:
            ## All results in a table
            all_results = pd.read_csv(
                f"{results_output_prefix}.compare_model_scores.tsv",
                sep="\t",
                index_col=0,
            )
            show(table(all_results), headers=["All results, sorted"])

            models_of_interest = all_results.index

            ## Confusion matrices
            for model_names in chunks(models_of_interest, 4):
                show(
                    [
                        [
                            plaintext(
                                f"{results_output_prefix}.classification_report.{model_name}.txt"
                            )
                            for model_name in model_names
                        ],
                        [
                            f"{results_output_prefix}.confusion_matrix.{model_name}.png"
                            for model_name in model_names
                        ],
                        # mistakes
                        [
                            f"{results_output_prefix}.confusion_matrix.{model_name}.binary_vs_ground_truth.png"
                            for model_name in model_names
                        ],
                        # feature importances
                        [
                            f"{results_output_prefix}.feature_importances.{model_name}.png"
                            for model_name in model_names
                        ],
                    ],
                    headers=model_names,
                    max_width=500,
                )
        except FileNotFoundError as err:
            logger.warning(f"Not run: {err}")

        display(
            Markdown(
                "## Apply train-smaller model -- Test set performance - With and without tuning on validation set"
            )
        )
        try:
            ## All results in a table
            all_results = pd.read_csv(
                f"{results_output_prefix}.compare_model_scores.test_set_performance.tsv",
                sep="\t",
                index_col=0,
            )
            show(table(all_results), headers=["All results, sorted"])

            models_of_interest = all_results.index

            ## Confusion matrices
            for model_names in chunks(models_of_interest, 4):
                show(
                    [
                        [
                            plaintext(
                                f"{results_output_prefix}.test_set_performance.{model_name}.classification_report.txt"
                            )
                            for model_name in model_names
                        ],
                        [
                            f"{results_output_prefix}.test_set_performance.{model_name}.confusion_matrix.png"
                            for model_name in model_names
                        ],
                        # mistakes
                        [
                            f"{results_output_prefix}.test_set_performance.{model_name}.confusion_matrix.binary_vs_ground_truth.png"
                            for model_name in model_names
                        ],
                    ],
                    max_width=500,
                    headers=model_names,
                )
        except FileNotFoundError as err:
            logger.warning(f"Not run: {err}")

        display(Markdown("---"))

{<TargetObsColumnEnum.disease: TargetObsColumn(obs_column_name='disease', is_target_binary_for_repertoire_composition_classifier=False, limited_to_disease=None, require_metadata_columns_present=None, confusion_matrix_expanded_column_name='disease.separate_past_exposures', blended_evaluation_column_name='disease.rollup')>: PosixPath('/users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease/train_smaller_model'), <TargetObsColumnEnum.disease_all_demographics_present: TargetObsColumn(obs_column_name='disease', is_target_binary_for_repertoire_composition_classifier=False, limited_to_disease=None, require_metadata_columns_present=['age', 'sex', 'ethnicity_condensed', 'age_group'], confusion_matrix_expanded_column_name='disease.separate_past_exposures', blended_evaluation_column_name='disease.rollup')>: PosixPath('/users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/BCR/disease_all_demographics_present/train_smaller_model'), <TargetOb

# GeneLocus.BCR, TargetObsColumnEnum.disease trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.014 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.937 +/- 0.017 (in 3 folds),0.941 +/- 0.018 (in 3 folds),0.805 +/- 0.023 (in 3 folds),0.719 +/- 0.031 (in 3 folds),0.805,0.718,323.0,0.0,323.0,0.0,False
rf_multiclass,0.922 +/- 0.008 (in 3 folds),0.923 +/- 0.009 (in 3 folds),0.929 +/- 0.008 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.768 +/- 0.051 (in 3 folds),0.659 +/- 0.068 (in 3 folds),0.768,0.658,323.0,0.0,323.0,0.0,False
linearsvm_ovr,0.918 +/- 0.010 (in 3 folds),0.918 +/- 0.014 (in 3 folds),0.916 +/- 0.014 (in 3 folds),0.920 +/- 0.016 (in 3 folds),0.783 +/- 0.005 (in 3 folds),0.685 +/- 0.011 (in 3 folds),0.783,0.683,323.0,0.0,323.0,0.0,False
xgboost,0.910 +/- 0.029 (in 3 folds),0.909 +/- 0.031 (in 3 folds),0.918 +/- 0.029 (in 3 folds),0.922 +/- 0.031 (in 3 folds),0.783 +/- 0.019 (in 3 folds),0.682 +/- 0.031 (in 3 folds),0.783,0.681,323.0,0.0,323.0,0.0,False
ridge_cv,0.907 +/- 0.019 (in 3 folds),0.900 +/- 0.024 (in 3 folds),0.907 +/- 0.022 (in 3 folds),0.906 +/- 0.027 (in 3 folds),0.575 +/- 0.202 (in 3 folds),0.238 +/- 0.412 (in 3 folds),0.576,0.358,323.0,0.0,323.0,0.0,False
elasticnet_cv,0.907 +/- 0.016 (in 3 folds),0.900 +/- 0.017 (in 3 folds),0.906 +/- 0.013 (in 3 folds),0.906 +/- 0.015 (in 3 folds),0.755 +/- 0.046 (in 3 folds),0.641 +/- 0.059 (in 3 folds),0.755,0.638,323.0,0.0,323.0,0.0,False
lasso_cv,0.896 +/- 0.019 (in 3 folds),0.889 +/- 0.019 (in 3 folds),0.892 +/- 0.016 (in 3 folds),0.892 +/- 0.018 (in 3 folds),0.762 +/- 0.072 (in 3 folds),0.650 +/- 0.100 (in 3 folds),0.762,0.646,323.0,0.0,323.0,0.0,False
dummy_stratified,0.509 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.512 +/- 0.015 (in 3 folds),0.515 +/- 0.016 (in 3 folds),0.341 +/- 0.052 (in 3 folds),0.015 +/- 0.067 (in 3 folds),0.341,0.014,323.0,0.0,323.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.019 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,323.0,0.0,323.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.014 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.937 +/- 0.017 (in 3 folds),0.941 +/- 0.018 (in 3 folds),0.805 +/- 0.023 (in 3 folds),0.719 +/- 0.031 (in 3 folds),0.805,0.718,323,0,323,0.0,False
rf_multiclass,0.922 +/- 0.008 (in 3 folds),0.923 +/- 0.009 (in 3 folds),0.929 +/- 0.008 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.768 +/- 0.051 (in 3 folds),0.659 +/- 0.068 (in 3 folds),0.768,0.658,323,0,323,0.0,False
linearsvm_ovr,0.918 +/- 0.010 (in 3 folds),0.918 +/- 0.014 (in 3 folds),0.916 +/- 0.014 (in 3 folds),0.920 +/- 0.016 (in 3 folds),0.783 +/- 0.005 (in 3 folds),0.685 +/- 0.011 (in 3 folds),0.783,0.683,323,0,323,0.0,False
xgboost,0.910 +/- 0.029 (in 3 folds),0.909 +/- 0.031 (in 3 folds),0.918 +/- 0.029 (in 3 folds),0.922 +/- 0.031 (in 3 folds),0.783 +/- 0.019 (in 3 folds),0.682 +/- 0.031 (in 3 folds),0.783,0.681,323,0,323,0.0,False
ridge_cv,0.907 +/- 0.019 (in 3 folds),0.900 +/- 0.024 (in 3 folds),0.907 +/- 0.022 (in 3 folds),0.906 +/- 0.027 (in 3 folds),0.575 +/- 0.202 (in 3 folds),0.238 +/- 0.412 (in 3 folds),0.576,0.358,323,0,323,0.0,False
elasticnet_cv,0.907 +/- 0.016 (in 3 folds),0.900 +/- 0.017 (in 3 folds),0.906 +/- 0.013 (in 3 folds),0.906 +/- 0.015 (in 3 folds),0.755 +/- 0.046 (in 3 folds),0.641 +/- 0.059 (in 3 folds),0.755,0.638,323,0,323,0.0,False
lasso_cv,0.896 +/- 0.019 (in 3 folds),0.889 +/- 0.019 (in 3 folds),0.892 +/- 0.016 (in 3 folds),0.892 +/- 0.018 (in 3 folds),0.762 +/- 0.072 (in 3 folds),0.650 +/- 0.100 (in 3 folds),0.762,0.646,323,0,323,0.0,False
dummy_stratified,0.509 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.512 +/- 0.015 (in 3 folds),0.515 +/- 0.016 (in 3 folds),0.341 +/- 0.052 (in 3 folds),0.015 +/- 0.067 (in 3 folds),0.341,0.014,323,0,323,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.019 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,323,0,323,0.0,True


lasso_multiclass,rf_multiclass,linearsvm_ovr,xgboost
Per-fold scores: ROC-AUC (weighted OvO): 0.938 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.940 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.937 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.941 +/- 0.018 (in 3 folds) Accuracy: 0.805 +/- 0.023 (in 3 folds) MCC: 0.719 +/- 0.031 (in 3 folds) Global scores: Accuracy: 0.805 MCC: 0.718 Global classification report:  precision recall f1-score support  Covid19 0.81 0.81 0.81 42  HIV 0.76 0.86 0.81 64 Healthy/Background 0.84 0.82 0.83 149  Lupus 0.77 0.72 0.74 68  accuracy 0.80 323  macro avg 0.80 0.80 0.80 323  weighted avg 0.81 0.80 0.80 323,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.923 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.929 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.932 +/- 0.010 (in 3 folds) Accuracy: 0.768 +/- 0.051 (in 3 folds) MCC: 0.659 +/- 0.068 (in 3 folds) Global scores: Accuracy: 0.768 MCC: 0.658 Global classification report:  precision recall f1-score support  Covid19 1.00 0.64 0.78 42  HIV 0.74 0.83 0.78 64 Healthy/Background 0.74 0.91 0.81 149  Lupus 0.80 0.49 0.61 68  accuracy 0.77 323  macro avg 0.82 0.72 0.75 323  weighted avg 0.79 0.77 0.76 323,Per-fold scores: ROC-AUC (weighted OvO): 0.918 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.916 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.920 +/- 0.016 (in 3 folds) Accuracy: 0.783 +/- 0.005 (in 3 folds) MCC: 0.685 +/- 0.011 (in 3 folds) Global scores: Accuracy: 0.783 MCC: 0.683 Global classification report:  precision recall f1-score support  Covid19 0.85 0.81 0.83 42  HIV 0.74 0.78 0.76 64 Healthy/Background 0.80 0.82 0.81 149  Lupus 0.75 0.69 0.72 68  accuracy 0.78 323  macro avg 0.78 0.78 0.78 323  weighted avg 0.78 0.78 0.78 323,Per-fold scores: ROC-AUC (weighted OvO): 0.910 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.909 +/- 0.031 (in 3 folds) au-PRC (weighted OvO): 0.918 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.922 +/- 0.031 (in 3 folds) Accuracy: 0.783 +/- 0.019 (in 3 folds) MCC: 0.682 +/- 0.031 (in 3 folds) Global scores: Accuracy: 0.783 MCC: 0.681 Global classification report:  precision recall f1-score support  Covid19 0.94 0.76 0.84 42  HIV 0.72 0.78 0.75 64 Healthy/Background 0.80 0.87 0.83 149  Lupus 0.71 0.62 0.66 68  accuracy 0.78 323  macro avg 0.79 0.76 0.77 323  weighted avg 0.79 0.78 0.78 323
,,,
,,,
,,,


ridge_cv,elasticnet_cv,lasso_cv,dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.907 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.900 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.907 +/- 0.022 (in 3 folds) au-PRC (macro OvO): 0.906 +/- 0.027 (in 3 folds) Accuracy: 0.575 +/- 0.202 (in 3 folds) MCC: 0.238 +/- 0.412 (in 3 folds) Global scores: Accuracy: 0.576 MCC: 0.358 Global classification report:  precision recall f1-score support  Covid19 1.00 0.24 0.38 42  HIV 0.78 0.22 0.34 64 Healthy/Background 0.53 0.97 0.68 149  Lupus 0.78 0.26 0.40 68  accuracy 0.58 323  macro avg 0.77 0.42 0.45 323  weighted avg 0.69 0.58 0.52 323,Per-fold scores: ROC-AUC (weighted OvO): 0.907 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.900 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.906 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.906 +/- 0.015 (in 3 folds) Accuracy: 0.755 +/- 0.046 (in 3 folds) MCC: 0.641 +/- 0.059 (in 3 folds) Global scores: Accuracy: 0.755 MCC: 0.638 Global classification report:  precision recall f1-score support  Covid19 1.00 0.57 0.73 42  HIV 0.78 0.73 0.76 64 Healthy/Background 0.72 0.93 0.81 149  Lupus 0.76 0.50 0.60 68  accuracy 0.76 323  macro avg 0.81 0.68 0.72 323  weighted avg 0.77 0.76 0.75 323,Per-fold scores: ROC-AUC (weighted OvO): 0.896 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.889 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.892 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.892 +/- 0.018 (in 3 folds) Accuracy: 0.762 +/- 0.072 (in 3 folds) MCC: 0.650 +/- 0.100 (in 3 folds) Global scores: Accuracy: 0.762 MCC: 0.646 Global classification report:  precision recall f1-score support  Covid19 0.92 0.57 0.71 42  HIV 0.78 0.80 0.79 64 Healthy/Background 0.74 0.92 0.82 149  Lupus 0.71 0.50 0.59 68  accuracy 0.76 323  macro avg 0.79 0.70 0.73 323  weighted avg 0.77 0.76 0.75 323,Per-fold scores: ROC-AUC (weighted OvO): 0.509 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.515 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.512 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.515 +/- 0.016 (in 3 folds) Accuracy: 0.341 +/- 0.052 (in 3 folds) MCC: 0.015 +/- 0.067 (in 3 folds) Global scores: Accuracy: 0.341 MCC: 0.014 Global classification report:  precision recall f1-score support  Covid19 0.20 0.17 0.18 42  HIV 0.18 0.22 0.20 64 Healthy/Background 0.44 0.51 0.47 149  Lupus 0.33 0.19 0.24 68  accuracy 0.34 323  macro avg 0.29 0.27 0.27 323  weighted avg 0.34 0.34 0.33 323
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.461 +/- 0.019 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.461 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 42  HIV 0.00 0.00 0.00 64 Healthy/Background 0.46 1.00 0.63 149  Lupus 0.00 0.00 0.00 68  accuracy 0.46 323  macro avg 0.12 0.25 0.16 323  weighted avg 0.21 0.46 0.29 323


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.939 +/- 0.014 (in 3 folds),0.942 +/- 0.016 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.943 +/- 0.015 (in 3 folds),0.812 +/- 0.010 (in 3 folds),0.728 +/- 0.015 (in 3 folds),0.812,0.728,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.939 +/- 0.014 (in 3 folds),0.942 +/- 0.016 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.943 +/- 0.015 (in 3 folds),0.800 +/- 0.015 (in 3 folds),0.707 +/- 0.023 (in 3 folds),0.8,0.705,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
linearsvm_ovr,0.932 +/- 0.023 (in 3 folds),0.933 +/- 0.027 (in 3 folds),0.931 +/- 0.024 (in 3 folds),0.935 +/- 0.027 (in 3 folds),0.806 +/- 0.042 (in 3 folds),0.718 +/- 0.061 (in 3 folds),0.806,0.717,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.932 +/- 0.023 (in 3 folds),0.933 +/- 0.027 (in 3 folds),0.931 +/- 0.024 (in 3 folds),0.935 +/- 0.027 (in 3 folds),0.793 +/- 0.037 (in 3 folds),0.700 +/- 0.059 (in 3 folds),0.794,0.699,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
rf_multiclass,0.926 +/- 0.015 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.933 +/- 0.013 (in 3 folds),0.777 +/- 0.014 (in 3 folds),0.675 +/- 0.025 (in 3 folds),0.777,0.673,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.926 +/- 0.015 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.933 +/- 0.013 (in 3 folds),0.773 +/- 0.054 (in 3 folds),0.680 +/- 0.065 (in 3 folds),0.773,0.667,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
xgboost,0.919 +/- 0.006 (in 3 folds),0.918 +/- 0.007 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.004 (in 3 folds),0.761 +/- 0.024 (in 3 folds),0.648 +/- 0.037 (in 3 folds),0.76,0.645,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
xgboost.decision_thresholds_tuned,0.919 +/- 0.006 (in 3 folds),0.918 +/- 0.007 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.004 (in 3 folds),0.790 +/- 0.005 (in 3 folds),0.690 +/- 0.005 (in 3 folds),0.79,0.689,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
elasticnet_cv,0.902 +/- 0.023 (in 3 folds),0.897 +/- 0.026 (in 3 folds),0.903 +/- 0.023 (in 3 folds),0.902 +/- 0.024 (in 3 folds),0.786 +/- 0.022 (in 3 folds),0.687 +/- 0.032 (in 3 folds),0.785,0.686,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.902 +/- 0.023 (in 3 folds),0.897 +/- 0.026 (in 3 folds),0.903 +/- 0.023 (in 3 folds),0.902 +/- 0.024 (in 3 folds),0.750 +/- 0.038 (in 3 folds),0.646 +/- 0.038 (in 3 folds),0.75,0.637,disease.separate_past_exposures,480.0,0.0,480.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.939 +/- 0.014 (in 3 folds),0.942 +/- 0.016 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.943 +/- 0.015 (in 3 folds),0.812 +/- 0.010 (in 3 folds),0.728 +/- 0.015 (in 3 folds),0.812,0.728,disease.separate_past_exposures,480,0,480,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.939 +/- 0.014 (in 3 folds),0.942 +/- 0.016 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.943 +/- 0.015 (in 3 folds),0.800 +/- 0.015 (in 3 folds),0.707 +/- 0.023 (in 3 folds),0.8,0.705,disease.separate_past_exposures,480,0,480,0.0,False
linearsvm_ovr,0.932 +/- 0.023 (in 3 folds),0.933 +/- 0.027 (in 3 folds),0.931 +/- 0.024 (in 3 folds),0.935 +/- 0.027 (in 3 folds),0.806 +/- 0.042 (in 3 folds),0.718 +/- 0.061 (in 3 folds),0.806,0.717,disease.separate_past_exposures,480,0,480,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.932 +/- 0.023 (in 3 folds),0.933 +/- 0.027 (in 3 folds),0.931 +/- 0.024 (in 3 folds),0.935 +/- 0.027 (in 3 folds),0.793 +/- 0.037 (in 3 folds),0.700 +/- 0.059 (in 3 folds),0.794,0.699,disease.separate_past_exposures,480,0,480,0.0,False
rf_multiclass,0.926 +/- 0.015 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.933 +/- 0.013 (in 3 folds),0.777 +/- 0.014 (in 3 folds),0.675 +/- 0.025 (in 3 folds),0.777,0.673,disease.separate_past_exposures,480,0,480,0.0,False
rf_multiclass.decision_thresholds_tuned,0.926 +/- 0.015 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.933 +/- 0.013 (in 3 folds),0.773 +/- 0.054 (in 3 folds),0.680 +/- 0.065 (in 3 folds),0.773,0.667,disease.separate_past_exposures,480,0,480,0.0,False
xgboost,0.919 +/- 0.006 (in 3 folds),0.918 +/- 0.007 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.004 (in 3 folds),0.761 +/- 0.024 (in 3 folds),0.648 +/- 0.037 (in 3 folds),0.76,0.645,disease.separate_past_exposures,480,0,480,0.0,False
xgboost.decision_thresholds_tuned,0.919 +/- 0.006 (in 3 folds),0.918 +/- 0.007 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.004 (in 3 folds),0.790 +/- 0.005 (in 3 folds),0.690 +/- 0.005 (in 3 folds),0.79,0.689,disease.separate_past_exposures,480,0,480,0.0,False
elasticnet_cv,0.902 +/- 0.023 (in 3 folds),0.897 +/- 0.026 (in 3 folds),0.903 +/- 0.023 (in 3 folds),0.902 +/- 0.024 (in 3 folds),0.786 +/- 0.022 (in 3 folds),0.687 +/- 0.032 (in 3 folds),0.785,0.686,disease.separate_past_exposures,480,0,480,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.902 +/- 0.023 (in 3 folds),0.897 +/- 0.026 (in 3 folds),0.903 +/- 0.023 (in 3 folds),0.902 +/- 0.024 (in 3 folds),0.750 +/- 0.038 (in 3 folds),0.646 +/- 0.038 (in 3 folds),0.75,0.637,disease.separate_past_exposures,480,0,480,0.0,False


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.939 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.942 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.938 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.943 +/- 0.015 (in 3 folds) Accuracy: 0.812 +/- 0.010 (in 3 folds) MCC: 0.728 +/- 0.015 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.812 MCC: 0.728 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.88 0.83 0.85 63  HIV 0.82 0.89 0.85 98 Healthy/Background 0.83 0.82 0.83 221  Lupus 0.71 0.70 0.71 98  accuracy 0.81 480  macro avg 0.81 0.81 0.81 480  weighted avg 0.81 0.81 0.81 480,Per-fold scores: ROC-AUC (weighted OvO): 0.939 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.942 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.938 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.943 +/- 0.015 (in 3 folds) Accuracy: 0.800 +/- 0.015 (in 3 folds) MCC: 0.707 +/- 0.023 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.800 MCC: 0.705 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.88 0.81 0.84 63  HIV 0.88 0.80 0.83 98 Healthy/Background 0.78 0.90 0.84 221  Lupus 0.71 0.58 0.64 98  accuracy 0.80 480  macro avg 0.81 0.77 0.79 480  weighted avg 0.80 0.80 0.80 480,Per-fold scores: ROC-AUC (weighted OvO): 0.932 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.933 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.024 (in 3 folds) au-PRC (macro OvO): 0.935 +/- 0.027 (in 3 folds) Accuracy: 0.806 +/- 0.042 (in 3 folds) MCC: 0.718 +/- 0.061 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.806 MCC: 0.717 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.83 0.76 0.79 63  HIV 0.82 0.87 0.84 98 Healthy/Background 0.84 0.86 0.85 221  Lupus 0.70 0.65 0.68 98  accuracy 0.81 480  macro avg 0.80 0.79 0.79 480  weighted avg 0.80 0.81 0.80 480,Per-fold scores: ROC-AUC (weighted OvO): 0.932 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.933 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.024 (in 3 folds) au-PRC (macro OvO): 0.935 +/- 0.027 (in 3 folds) Accuracy: 0.793 +/- 0.037 (in 3 folds) MCC: 0.700 +/- 0.059 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.794 MCC: 0.699 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.78 0.78 0.78 63  HIV 0.86 0.80 0.83 98 Healthy/Background 0.84 0.85 0.84 221  Lupus 0.65 0.67 0.66 98  accuracy 0.79 480  macro avg 0.78 0.77 0.78 480  weighted avg 0.80 0.79 0.79 480
,,,
,,,


rf_multiclass,rf_multiclass.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.926 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.926 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.930 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.933 +/- 0.013 (in 3 folds) Accuracy: 0.777 +/- 0.014 (in 3 folds) MCC: 0.675 +/- 0.025 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.777 MCC: 0.673 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 1.00 0.68 0.81 63  HIV 0.78 0.82 0.80 98 Healthy/Background 0.73 0.93 0.82 221  Lupus 0.83 0.46 0.59 98  accuracy 0.78 480  macro avg 0.84 0.72 0.75 480  weighted avg 0.80 0.78 0.77 480,Per-fold scores: ROC-AUC (weighted OvO): 0.926 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.926 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.930 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.933 +/- 0.013 (in 3 folds) Accuracy: 0.773 +/- 0.054 (in 3 folds) MCC: 0.680 +/- 0.065 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.773 MCC: 0.667 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.89 0.75 0.81 63  HIV 0.76 0.83 0.79 98 Healthy/Background 0.79 0.82 0.81 221  Lupus 0.68 0.62 0.65 98  accuracy 0.77 480  macro avg 0.78 0.75 0.76 480  weighted avg 0.77 0.77 0.77 480,Per-fold scores: ROC-AUC (weighted OvO): 0.919 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.004 (in 3 folds) Accuracy: 0.761 +/- 0.024 (in 3 folds) MCC: 0.648 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.760 MCC: 0.645 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.90 0.73 0.81 63  HIV 0.78 0.76 0.77 98 Healthy/Background 0.76 0.88 0.82 221  Lupus 0.65 0.51 0.57 98  accuracy 0.76 480  macro avg 0.77 0.72 0.74 480  weighted avg 0.76 0.76 0.75 480,Per-fold scores: ROC-AUC (weighted OvO): 0.919 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.004 (in 3 folds) Accuracy: 0.790 +/- 0.005 (in 3 folds) MCC: 0.690 +/- 0.005 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.790 MCC: 0.689 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.87 0.75 0.80 63  HIV 0.91 0.72 0.81 98 Healthy/Background 0.78 0.92 0.84 221  Lupus 0.67 0.58 0.62 98  accuracy 0.79 480  macro avg 0.81 0.74 0.77 480  weighted avg 0.79 0.79 0.79 480
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.902 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.897 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.903 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.902 +/- 0.024 (in 3 folds) Accuracy: 0.786 +/- 0.022 (in 3 folds) MCC: 0.687 +/- 0.032 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.785 MCC: 0.686 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.96 0.68 0.80 63  HIV 0.84 0.78 0.80 98 Healthy/Background 0.74 0.95 0.83 221  Lupus 0.81 0.48 0.60 98  accuracy 0.79 480  macro avg 0.83 0.72 0.76 480  weighted avg 0.80 0.79 0.77 480,Per-fold scores: ROC-AUC (weighted OvO): 0.902 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.897 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.903 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.902 +/- 0.024 (in 3 folds) Accuracy: 0.750 +/- 0.038 (in 3 folds) MCC: 0.646 +/- 0.038 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.750 MCC: 0.637 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.80 0.78 0.79 63  HIV 0.78 0.83 0.80 98 Healthy/Background 0.80 0.78 0.79 221  Lupus 0.58 0.58 0.58 98  accuracy 0.75 480  macro avg 0.74 0.74 0.74 480  weighted avg 0.75 0.75 0.75 480,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.894 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.897 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.897 +/- 0.022 (in 3 folds) Accuracy: 0.790 +/- 0.013 (in 3 folds) MCC: 0.691 +/- 0.019 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.790 MCC: 0.690 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.94 0.71 0.81 63  HIV 0.85 0.79 0.81 98 Healthy/Background 0.76 0.94 0.84 221  Lupus 0.74 0.50 0.60 98  accuracy 0.79 480  macro avg 0.82 0.74 0.77 480  weighted avg 0.80 0.79 0.78 480,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.894 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.897 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.897 +/- 0.022 (in 3 folds) Accuracy: 0.771 +/- 0.017 (in 3 folds) MCC: 0.668 +/- 0.023 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.771 MCC: 0.665 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.84 0.76 0.80 63  HIV 0.81 0.79 0.80 98 Healthy/Background 0.80 0.81 0.81 221  Lupus 0.64 0.66 0.65 98  accuracy 0.77 480  macro avg 0.77 0.76 0.76 480  weighted avg 0.77 0.77 0.77 480
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.892 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.902 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.900 +/- 0.017 (in 3 folds) Accuracy: 0.572 +/- 0.193 (in 3 folds) MCC: 0.233 +/- 0.403 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.569 MCC: 0.352 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.93 0.21 0.34 63  HIV 0.87 0.27 0.41 98 Healthy/Background 0.52 0.98 0.68 221  Lupus 0.82 0.18 0.30 98  accuracy 0.57 480  macro avg 0.78 0.41 0.43 480  weighted avg 0.71 0.57 0.50 480,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.892 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.902 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.900 +/- 0.017 (in 3 folds) Accuracy: 0.738 +/- 0.066 (in 3 folds) MCC: 0.617 +/- 0.097 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.738 MCC: 0.611 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.77 0.54 0.64 63  HIV 0.79 0.71 0.75 98 Healthy/Background 0.78 0.86 0.82 221  Lupus 0.58 0.60 0.59 98  accuracy 0.74 480  macro avg 0.73 0.68 0.70 480  weighted avg 0.74 0.74 0.73 480
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.012 (in 3 folds),0.939 +/- 0.012 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.941 +/- 0.014 (in 3 folds),0.800 +/- 0.008 (in 3 folds),0.708 +/- 0.007 (in 3 folds),0.8,0.707,285.0,0.0,285.0,0.0,False
rf_multiclass,0.918 +/- 0.004 (in 3 folds),0.918 +/- 0.006 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.003 (in 3 folds),0.748 +/- 0.062 (in 3 folds),0.629 +/- 0.075 (in 3 folds),0.747,0.626,285.0,0.0,285.0,0.0,False
xgboost,0.913 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.925 +/- 0.015 (in 3 folds),0.927 +/- 0.014 (in 3 folds),0.754 +/- 0.032 (in 3 folds),0.640 +/- 0.038 (in 3 folds),0.754,0.635,285.0,0.0,285.0,0.0,False
linearsvm_ovr,0.912 +/- 0.010 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.908 +/- 0.007 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.772 +/- 0.048 (in 3 folds),0.668 +/- 0.064 (in 3 folds),0.772,0.664,285.0,0.0,285.0,0.0,False
ridge_cv,0.910 +/- 0.015 (in 3 folds),0.903 +/- 0.022 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.910 +/- 0.024 (in 3 folds),0.654 +/- 0.159 (in 3 folds),0.421 +/- 0.375 (in 3 folds),0.656,0.498,285.0,0.0,285.0,0.0,False
elasticnet_cv,0.904 +/- 0.009 (in 3 folds),0.898 +/- 0.008 (in 3 folds),0.908 +/- 0.013 (in 3 folds),0.909 +/- 0.011 (in 3 folds),0.780 +/- 0.076 (in 3 folds),0.677 +/- 0.101 (in 3 folds),0.779,0.671,285.0,0.0,285.0,0.0,False
lasso_cv,0.891 +/- 0.021 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.759 +/- 0.090 (in 3 folds),0.644 +/- 0.123 (in 3 folds),0.758,0.639,285.0,0.0,285.0,0.0,False
dummy_stratified,0.530 +/- 0.012 (in 3 folds),0.536 +/- 0.011 (in 3 folds),0.524 +/- 0.005 (in 3 folds),0.529 +/- 0.005 (in 3 folds),0.379 +/- 0.006 (in 3 folds),0.059 +/- 0.030 (in 3 folds),0.379,0.059,285.0,0.0,285.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,285.0,0.0,285.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.012 (in 3 folds),0.939 +/- 0.012 (in 3 folds),0.938 +/- 0.015 (in 3 folds),0.941 +/- 0.014 (in 3 folds),0.800 +/- 0.008 (in 3 folds),0.708 +/- 0.007 (in 3 folds),0.8,0.707,285,0,285,0.0,False
rf_multiclass,0.918 +/- 0.004 (in 3 folds),0.918 +/- 0.006 (in 3 folds),0.923 +/- 0.004 (in 3 folds),0.926 +/- 0.003 (in 3 folds),0.748 +/- 0.062 (in 3 folds),0.629 +/- 0.075 (in 3 folds),0.747,0.626,285,0,285,0.0,False
xgboost,0.913 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.925 +/- 0.015 (in 3 folds),0.927 +/- 0.014 (in 3 folds),0.754 +/- 0.032 (in 3 folds),0.640 +/- 0.038 (in 3 folds),0.754,0.635,285,0,285,0.0,False
linearsvm_ovr,0.912 +/- 0.010 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.908 +/- 0.007 (in 3 folds),0.910 +/- 0.010 (in 3 folds),0.772 +/- 0.048 (in 3 folds),0.668 +/- 0.064 (in 3 folds),0.772,0.664,285,0,285,0.0,False
ridge_cv,0.910 +/- 0.015 (in 3 folds),0.903 +/- 0.022 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.910 +/- 0.024 (in 3 folds),0.654 +/- 0.159 (in 3 folds),0.421 +/- 0.375 (in 3 folds),0.656,0.498,285,0,285,0.0,False
elasticnet_cv,0.904 +/- 0.009 (in 3 folds),0.898 +/- 0.008 (in 3 folds),0.908 +/- 0.013 (in 3 folds),0.909 +/- 0.011 (in 3 folds),0.780 +/- 0.076 (in 3 folds),0.677 +/- 0.101 (in 3 folds),0.779,0.671,285,0,285,0.0,False
lasso_cv,0.891 +/- 0.021 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.896 +/- 0.017 (in 3 folds),0.759 +/- 0.090 (in 3 folds),0.644 +/- 0.123 (in 3 folds),0.758,0.639,285,0,285,0.0,False
dummy_stratified,0.530 +/- 0.012 (in 3 folds),0.536 +/- 0.011 (in 3 folds),0.524 +/- 0.005 (in 3 folds),0.529 +/- 0.005 (in 3 folds),0.379 +/- 0.006 (in 3 folds),0.059 +/- 0.030 (in 3 folds),0.379,0.059,285,0,285,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,285,0,285,0.0,True


lasso_multiclass,rf_multiclass,xgboost,linearsvm_ovr
Per-fold scores: ROC-AUC (weighted OvO): 0.938 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.939 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.938 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.941 +/- 0.014 (in 3 folds) Accuracy: 0.800 +/- 0.008 (in 3 folds) MCC: 0.708 +/- 0.007 (in 3 folds) Global scores: Accuracy: 0.800 MCC: 0.707 Global classification report:  precision recall f1-score support  Covid19 0.76 0.76 0.76 33  HIV 0.80 0.85 0.82 55 Healthy/Background 0.83 0.84 0.83 132  Lupus 0.76 0.69 0.73 65  accuracy 0.80 285  macro avg 0.79 0.79 0.79 285  weighted avg 0.80 0.80 0.80 285,Per-fold scores: ROC-AUC (weighted OvO): 0.918 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.003 (in 3 folds) Accuracy: 0.748 +/- 0.062 (in 3 folds) MCC: 0.629 +/- 0.075 (in 3 folds) Global scores: Accuracy: 0.747 MCC: 0.626 Global classification report:  precision recall f1-score support  Covid19 0.94 0.52 0.67 33  HIV 0.74 0.84 0.79 55 Healthy/Background 0.72 0.92 0.80 132  Lupus 0.81 0.45 0.57 65  accuracy 0.75 285  macro avg 0.80 0.68 0.71 285  weighted avg 0.77 0.75 0.73 285,Per-fold scores: ROC-AUC (weighted OvO): 0.913 +/- 0.018 (in 3 folds) ROC-AUC (macro OvO): 0.911 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.925 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.927 +/- 0.014 (in 3 folds) Accuracy: 0.754 +/- 0.032 (in 3 folds) MCC: 0.640 +/- 0.038 (in 3 folds) Global scores: Accuracy: 0.754 MCC: 0.635 Global classification report:  precision recall f1-score support  Covid19 0.83 0.61 0.70 33  HIV 0.71 0.80 0.75 55 Healthy/Background 0.78 0.86 0.82 132  Lupus 0.71 0.57 0.63 65  accuracy 0.75 285  macro avg 0.76 0.71 0.73 285  weighted avg 0.75 0.75 0.75 285,Per-fold scores: ROC-AUC (weighted OvO): 0.912 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.910 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.908 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.910 +/- 0.010 (in 3 folds) Accuracy: 0.772 +/- 0.048 (in 3 folds) MCC: 0.668 +/- 0.064 (in 3 folds) Global scores: Accuracy: 0.772 MCC: 0.664 Global classification report:  precision recall f1-score support  Covid19 0.72 0.70 0.71 33  HIV 0.76 0.80 0.78 55 Healthy/Background 0.81 0.84 0.83 132  Lupus 0.72 0.65 0.68 65  accuracy 0.77 285  macro avg 0.75 0.75 0.75 285  weighted avg 0.77 0.77 0.77 285
,,,
,,,
,,,


ridge_cv,elasticnet_cv,lasso_cv,dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.910 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.903 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.911 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.910 +/- 0.024 (in 3 folds) Accuracy: 0.654 +/- 0.159 (in 3 folds) MCC: 0.421 +/- 0.375 (in 3 folds) Global scores: Accuracy: 0.656 MCC: 0.498 Global classification report:  precision recall f1-score support  Covid19 1.00 0.39 0.57 33  HIV 0.88 0.42 0.57 55 Healthy/Background 0.58 0.98 0.73 132  Lupus 0.88 0.34 0.49 65  accuracy 0.66 285  macro avg 0.84 0.53 0.59 285  weighted avg 0.76 0.66 0.63 285,Per-fold scores: ROC-AUC (weighted OvO): 0.904 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.898 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.908 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.909 +/- 0.011 (in 3 folds) Accuracy: 0.780 +/- 0.076 (in 3 folds) MCC: 0.677 +/- 0.101 (in 3 folds) Global scores: Accuracy: 0.779 MCC: 0.671 Global classification report:  precision recall f1-score support  Covid19 0.94 0.52 0.67 33  HIV 0.80 0.82 0.81 55 Healthy/Background 0.75 0.92 0.83 132  Lupus 0.79 0.58 0.67 65  accuracy 0.78 285  macro avg 0.82 0.71 0.74 285  weighted avg 0.79 0.78 0.77 285,Per-fold scores: ROC-AUC (weighted OvO): 0.891 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.885 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.896 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.896 +/- 0.017 (in 3 folds) Accuracy: 0.759 +/- 0.090 (in 3 folds) MCC: 0.644 +/- 0.123 (in 3 folds) Global scores: Accuracy: 0.758 MCC: 0.639 Global classification report:  precision recall f1-score support  Covid19 0.90 0.55 0.68 33  HIV 0.73 0.78 0.75 55 Healthy/Background 0.76 0.89 0.82 132  Lupus 0.74 0.57 0.64 65  accuracy 0.76 285  macro avg 0.78 0.70 0.72 285  weighted avg 0.76 0.76 0.75 285,Per-fold scores: ROC-AUC (weighted OvO): 0.530 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.536 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.524 +/- 0.005 (in 3 folds) au-PRC (macro OvO): 0.529 +/- 0.005 (in 3 folds) Accuracy: 0.379 +/- 0.006 (in 3 folds) MCC: 0.059 +/- 0.030 (in 3 folds) Global scores: Accuracy: 0.379 MCC: 0.059 Global classification report:  precision recall f1-score support  Covid19 0.23 0.15 0.18 33  HIV 0.28 0.35 0.31 55 Healthy/Background 0.46 0.55 0.51 132  Lupus 0.28 0.17 0.21 65  accuracy 0.38 285  macro avg 0.31 0.30 0.30 285  weighted avg 0.36 0.38 0.36 285
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.463 +/- 0.034 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.463 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 33  HIV 0.00 0.00 0.00 55 Healthy/Background 0.46 1.00 0.63 132  Lupus 0.00 0.00 0.00 65  accuracy 0.46 285  macro avg 0.12 0.25 0.16 285  weighted avg 0.21 0.46 0.29 285


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.942 +/- 0.014 (in 3 folds),0.946 +/- 0.014 (in 3 folds),0.940 +/- 0.019 (in 3 folds),0.946 +/- 0.019 (in 3 folds),0.802 +/- 0.018 (in 3 folds),0.711 +/- 0.023 (in 3 folds),0.802,0.712,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.942 +/- 0.014 (in 3 folds),0.946 +/- 0.014 (in 3 folds),0.940 +/- 0.019 (in 3 folds),0.946 +/- 0.019 (in 3 folds),0.786 +/- 0.019 (in 3 folds),0.690 +/- 0.023 (in 3 folds),0.786,0.688,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
linearsvm_ovr,0.922 +/- 0.027 (in 3 folds),0.918 +/- 0.033 (in 3 folds),0.921 +/- 0.031 (in 3 folds),0.922 +/- 0.035 (in 3 folds),0.786 +/- 0.020 (in 3 folds),0.688 +/- 0.036 (in 3 folds),0.786,0.688,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.922 +/- 0.027 (in 3 folds),0.918 +/- 0.033 (in 3 folds),0.921 +/- 0.031 (in 3 folds),0.922 +/- 0.035 (in 3 folds),0.778 +/- 0.030 (in 3 folds),0.685 +/- 0.024 (in 3 folds),0.779,0.678,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
rf_multiclass,0.915 +/- 0.012 (in 3 folds),0.915 +/- 0.012 (in 3 folds),0.923 +/- 0.012 (in 3 folds),0.926 +/- 0.012 (in 3 folds),0.783 +/- 0.019 (in 3 folds),0.684 +/- 0.018 (in 3 folds),0.783,0.684,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.915 +/- 0.012 (in 3 folds),0.915 +/- 0.012 (in 3 folds),0.923 +/- 0.012 (in 3 folds),0.926 +/- 0.012 (in 3 folds),0.745 +/- 0.063 (in 3 folds),0.650 +/- 0.059 (in 3 folds),0.745,0.631,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
xgboost,0.912 +/- 0.015 (in 3 folds),0.913 +/- 0.015 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.779 +/- 0.019 (in 3 folds),0.674 +/- 0.024 (in 3 folds),0.779,0.674,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
xgboost.decision_thresholds_tuned,0.912 +/- 0.015 (in 3 folds),0.913 +/- 0.015 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.755 +/- 0.046 (in 3 folds),0.656 +/- 0.057 (in 3 folds),0.755,0.638,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
elasticnet_cv,0.907 +/- 0.013 (in 3 folds),0.903 +/- 0.019 (in 3 folds),0.910 +/- 0.018 (in 3 folds),0.910 +/- 0.023 (in 3 folds),0.802 +/- 0.023 (in 3 folds),0.711 +/- 0.029 (in 3 folds),0.802,0.71,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.907 +/- 0.013 (in 3 folds),0.903 +/- 0.019 (in 3 folds),0.910 +/- 0.018 (in 3 folds),0.910 +/- 0.023 (in 3 folds),0.760 +/- 0.038 (in 3 folds),0.659 +/- 0.038 (in 3 folds),0.76,0.652,disease.separate_past_exposures,420.0,0.0,420.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.942 +/- 0.014 (in 3 folds),0.946 +/- 0.014 (in 3 folds),0.940 +/- 0.019 (in 3 folds),0.946 +/- 0.019 (in 3 folds),0.802 +/- 0.018 (in 3 folds),0.711 +/- 0.023 (in 3 folds),0.802,0.712,disease.separate_past_exposures,420,0,420,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.942 +/- 0.014 (in 3 folds),0.946 +/- 0.014 (in 3 folds),0.940 +/- 0.019 (in 3 folds),0.946 +/- 0.019 (in 3 folds),0.786 +/- 0.019 (in 3 folds),0.690 +/- 0.023 (in 3 folds),0.786,0.688,disease.separate_past_exposures,420,0,420,0.0,False
linearsvm_ovr,0.922 +/- 0.027 (in 3 folds),0.918 +/- 0.033 (in 3 folds),0.921 +/- 0.031 (in 3 folds),0.922 +/- 0.035 (in 3 folds),0.786 +/- 0.020 (in 3 folds),0.688 +/- 0.036 (in 3 folds),0.786,0.688,disease.separate_past_exposures,420,0,420,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.922 +/- 0.027 (in 3 folds),0.918 +/- 0.033 (in 3 folds),0.921 +/- 0.031 (in 3 folds),0.922 +/- 0.035 (in 3 folds),0.778 +/- 0.030 (in 3 folds),0.685 +/- 0.024 (in 3 folds),0.779,0.678,disease.separate_past_exposures,420,0,420,0.0,False
rf_multiclass,0.915 +/- 0.012 (in 3 folds),0.915 +/- 0.012 (in 3 folds),0.923 +/- 0.012 (in 3 folds),0.926 +/- 0.012 (in 3 folds),0.783 +/- 0.019 (in 3 folds),0.684 +/- 0.018 (in 3 folds),0.783,0.684,disease.separate_past_exposures,420,0,420,0.0,False
rf_multiclass.decision_thresholds_tuned,0.915 +/- 0.012 (in 3 folds),0.915 +/- 0.012 (in 3 folds),0.923 +/- 0.012 (in 3 folds),0.926 +/- 0.012 (in 3 folds),0.745 +/- 0.063 (in 3 folds),0.650 +/- 0.059 (in 3 folds),0.745,0.631,disease.separate_past_exposures,420,0,420,0.0,False
xgboost,0.912 +/- 0.015 (in 3 folds),0.913 +/- 0.015 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.779 +/- 0.019 (in 3 folds),0.674 +/- 0.024 (in 3 folds),0.779,0.674,disease.separate_past_exposures,420,0,420,0.0,False
xgboost.decision_thresholds_tuned,0.912 +/- 0.015 (in 3 folds),0.913 +/- 0.015 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.926 +/- 0.016 (in 3 folds),0.755 +/- 0.046 (in 3 folds),0.656 +/- 0.057 (in 3 folds),0.755,0.638,disease.separate_past_exposures,420,0,420,0.0,False
elasticnet_cv,0.907 +/- 0.013 (in 3 folds),0.903 +/- 0.019 (in 3 folds),0.910 +/- 0.018 (in 3 folds),0.910 +/- 0.023 (in 3 folds),0.802 +/- 0.023 (in 3 folds),0.711 +/- 0.029 (in 3 folds),0.802,0.71,disease.separate_past_exposures,420,0,420,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.907 +/- 0.013 (in 3 folds),0.903 +/- 0.019 (in 3 folds),0.910 +/- 0.018 (in 3 folds),0.910 +/- 0.023 (in 3 folds),0.760 +/- 0.038 (in 3 folds),0.659 +/- 0.038 (in 3 folds),0.76,0.652,disease.separate_past_exposures,420,0,420,0.0,False


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.946 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.940 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.946 +/- 0.019 (in 3 folds) Accuracy: 0.802 +/- 0.018 (in 3 folds) MCC: 0.711 +/- 0.023 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.802 MCC: 0.712 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.91 0.83 0.87 47  HIV 0.79 0.87 0.83 87 Healthy/Background 0.81 0.82 0.81 191  Lupus 0.75 0.69 0.72 95  accuracy 0.80 420  macro avg 0.81 0.80 0.81 420  weighted avg 0.80 0.80 0.80 420,Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.946 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.940 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.946 +/- 0.019 (in 3 folds) Accuracy: 0.786 +/- 0.019 (in 3 folds) MCC: 0.690 +/- 0.023 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.786 MCC: 0.688 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.75 0.77 0.76 47  HIV 0.81 0.90 0.85 87 Healthy/Background 0.81 0.84 0.82 191  Lupus 0.72 0.59 0.65 95  accuracy 0.79 420  macro avg 0.77 0.77 0.77 420  weighted avg 0.78 0.79 0.78 420,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.922 +/- 0.035 (in 3 folds) Accuracy: 0.786 +/- 0.020 (in 3 folds) MCC: 0.688 +/- 0.036 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.786 MCC: 0.688 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.75 0.70 0.73 47  HIV 0.83 0.93 0.88 87 Healthy/Background 0.82 0.82 0.82 191  Lupus 0.69 0.62 0.65 95  accuracy 0.79 420  macro avg 0.77 0.77 0.77 420  weighted avg 0.78 0.79 0.78 420,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.922 +/- 0.035 (in 3 folds) Accuracy: 0.778 +/- 0.030 (in 3 folds) MCC: 0.685 +/- 0.024 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.779 MCC: 0.678 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.73 0.70 0.72 47  HIV 0.87 0.86 0.87 87 Healthy/Background 0.82 0.81 0.81 191  Lupus 0.65 0.67 0.66 95  accuracy 0.78 420  macro avg 0.77 0.76 0.76 420  weighted avg 0.78 0.78 0.78 420
,,,
,,,


rf_multiclass,rf_multiclass.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.915 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.915 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.012 (in 3 folds) Accuracy: 0.783 +/- 0.019 (in 3 folds) MCC: 0.684 +/- 0.018 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.783 MCC: 0.684 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 1.00 0.66 0.79 47  HIV 0.79 0.86 0.82 87 Healthy/Background 0.73 0.92 0.82 191  Lupus 0.87 0.49 0.63 95  accuracy 0.78 420  macro avg 0.85 0.73 0.77 420  weighted avg 0.81 0.78 0.77 420,Per-fold scores: ROC-AUC (weighted OvO): 0.915 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.915 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.012 (in 3 folds) Accuracy: 0.745 +/- 0.063 (in 3 folds) MCC: 0.650 +/- 0.059 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.745 MCC: 0.631 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.92 0.70 0.80 47  HIV 0.74 0.87 0.80 87 Healthy/Background 0.79 0.76 0.78 191  Lupus 0.60 0.61 0.61 95  accuracy 0.75 420  macro avg 0.76 0.74 0.74 420  weighted avg 0.75 0.75 0.75 420,Per-fold scores: ROC-AUC (weighted OvO): 0.912 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.913 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.016 (in 3 folds) Accuracy: 0.779 +/- 0.019 (in 3 folds) MCC: 0.674 +/- 0.024 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.779 MCC: 0.674 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.86 0.68 0.76 47  HIV 0.80 0.85 0.82 87 Healthy/Background 0.78 0.88 0.83 191  Lupus 0.72 0.55 0.62 95  accuracy 0.78 420  macro avg 0.79 0.74 0.76 420  weighted avg 0.78 0.78 0.77 420,Per-fold scores: ROC-AUC (weighted OvO): 0.912 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.913 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.016 (in 3 folds) Accuracy: 0.755 +/- 0.046 (in 3 folds) MCC: 0.656 +/- 0.057 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.755 MCC: 0.638 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.83 0.72 0.77 47  HIV 0.81 0.83 0.82 87 Healthy/Background 0.77 0.85 0.81 191  Lupus 0.62 0.52 0.56 95  accuracy 0.75 420  macro avg 0.76 0.73 0.74 420  weighted avg 0.75 0.75 0.75 420
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.907 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.903 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.910 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.910 +/- 0.023 (in 3 folds) Accuracy: 0.802 +/- 0.023 (in 3 folds) MCC: 0.711 +/- 0.029 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.802 MCC: 0.710 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.93 0.60 0.73 47  HIV 0.89 0.86 0.88 87 Healthy/Background 0.75 0.94 0.84 191  Lupus 0.81 0.57 0.67 95  accuracy 0.80 420  macro avg 0.85 0.74 0.78 420  weighted avg 0.81 0.80 0.79 420,Per-fold scores: ROC-AUC (weighted OvO): 0.907 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.903 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.910 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.910 +/- 0.023 (in 3 folds) Accuracy: 0.760 +/- 0.038 (in 3 folds) MCC: 0.659 +/- 0.038 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.760 MCC: 0.652 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.85 0.72 0.78 47  HIV 0.78 0.87 0.83 87 Healthy/Background 0.80 0.77 0.78 191  Lupus 0.63 0.65 0.64 95  accuracy 0.76 420  macro avg 0.76 0.75 0.76 420  weighted avg 0.76 0.76 0.76 420,Per-fold scores: ROC-AUC (weighted OvO): 0.898 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.893 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.899 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.900 +/- 0.022 (in 3 folds) Accuracy: 0.783 +/- 0.030 (in 3 folds) MCC: 0.680 +/- 0.036 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.783 MCC: 0.680 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.93 0.60 0.73 47  HIV 0.82 0.85 0.84 87 Healthy/Background 0.76 0.91 0.83 191  Lupus 0.74 0.57 0.64 95  accuracy 0.78 420  macro avg 0.81 0.73 0.76 420  weighted avg 0.79 0.78 0.78 420,Per-fold scores: ROC-AUC (weighted OvO): 0.898 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.893 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.899 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.900 +/- 0.022 (in 3 folds) Accuracy: 0.729 +/- 0.042 (in 3 folds) MCC: 0.620 +/- 0.041 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.729 MCC: 0.613 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.74 0.72 0.73 47  HIV 0.76 0.85 0.80 87 Healthy/Background 0.80 0.71 0.75 191  Lupus 0.58 0.65 0.62 95  accuracy 0.73 420  macro avg 0.72 0.73 0.73 420  weighted avg 0.73 0.73 0.73 420
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.898 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.891 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.902 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.901 +/- 0.018 (in 3 folds) Accuracy: 0.640 +/- 0.203 (in 3 folds) MCC: 0.418 +/- 0.373 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.640 MCC: 0.471 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.93 0.28 0.43 47  HIV 0.87 0.46 0.60 87 Healthy/Background 0.57 0.96 0.72 191  Lupus 0.80 0.34 0.47 95  accuracy 0.64 420  macro avg 0.79 0.51 0.56 420  weighted avg 0.73 0.64 0.61 420,Per-fold scores: ROC-AUC (weighted OvO): 0.898 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.891 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.902 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.901 +/- 0.018 (in 3 folds) Accuracy: 0.654 +/- 0.208 (in 3 folds) MCC: 0.448 +/- 0.390 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.655 MCC: 0.478 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.83 0.40 0.54 47  HIV 0.75 0.61 0.67 87 Healthy/Background 0.61 0.84 0.71 191  Lupus 0.69 0.44 0.54 95  accuracy 0.65 420  macro avg 0.72 0.57 0.61 420  weighted avg 0.68 0.65 0.64 420
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.746 +/- 0.009 (in 3 folds),0.739 +/- 0.009 (in 3 folds),0.771 +/- 0.012 (in 3 folds),0.769 +/- 0.006 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132.0,0.0,132.0,0.0,True
lasso_cv,0.741 +/- 0.009 (in 3 folds),0.735 +/- 0.010 (in 3 folds),0.772 +/- 0.015 (in 3 folds),0.772 +/- 0.008 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132.0,0.0,132.0,0.0,True
linearsvm_ovr,0.716 +/- 0.111 (in 3 folds),0.691 +/- 0.127 (in 3 folds),0.719 +/- 0.089 (in 3 folds),0.698 +/- 0.099 (in 3 folds),0.637 +/- 0.051 (in 3 folds),0.454 +/- 0.125 (in 3 folds),0.636,0.435,132.0,0.0,132.0,0.0,False
lasso_multiclass,0.696 +/- 0.143 (in 3 folds),0.671 +/- 0.165 (in 3 folds),0.738 +/- 0.113 (in 3 folds),0.721 +/- 0.133 (in 3 folds),0.542 +/- 0.126 (in 3 folds),0.288 +/- 0.201 (in 3 folds),0.545,0.309,132.0,0.0,132.0,0.0,False
rf_multiclass,0.670 +/- 0.036 (in 3 folds),0.616 +/- 0.022 (in 3 folds),0.692 +/- 0.008 (in 3 folds),0.653 +/- 0.006 (in 3 folds),0.658 +/- 0.094 (in 3 folds),0.517 +/- 0.069 (in 3 folds),0.659,0.513,132.0,0.0,132.0,0.0,True
xgboost,0.648 +/- 0.110 (in 3 folds),0.634 +/- 0.125 (in 3 folds),0.682 +/- 0.048 (in 3 folds),0.667 +/- 0.051 (in 3 folds),0.643 +/- 0.048 (in 3 folds),0.452 +/- 0.033 (in 3 folds),0.644,0.452,132.0,0.0,132.0,0.0,True
ridge_cv,0.602 +/- 0.089 (in 3 folds),0.564 +/- 0.057 (in 3 folds),0.625 +/- 0.109 (in 3 folds),0.605 +/- 0.092 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132.0,0.0,132.0,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132.0,0.0,132.0,0.0,True
dummy_stratified,0.457 +/- 0.015 (in 3 folds),0.467 +/- 0.014 (in 3 folds),0.493 +/- 0.003 (in 3 folds),0.495 +/- 0.002 (in 3 folds),0.318 +/- 0.073 (in 3 folds),-0.102 +/- 0.053 (in 3 folds),0.318,-0.103,132.0,0.0,132.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.746 +/- 0.009 (in 3 folds),0.739 +/- 0.009 (in 3 folds),0.771 +/- 0.012 (in 3 folds),0.769 +/- 0.006 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
lasso_cv,0.741 +/- 0.009 (in 3 folds),0.735 +/- 0.010 (in 3 folds),0.772 +/- 0.015 (in 3 folds),0.772 +/- 0.008 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
linearsvm_ovr,0.716 +/- 0.111 (in 3 folds),0.691 +/- 0.127 (in 3 folds),0.719 +/- 0.089 (in 3 folds),0.698 +/- 0.099 (in 3 folds),0.637 +/- 0.051 (in 3 folds),0.454 +/- 0.125 (in 3 folds),0.636,0.435,132,0,132,0.0,False
lasso_multiclass,0.696 +/- 0.143 (in 3 folds),0.671 +/- 0.165 (in 3 folds),0.738 +/- 0.113 (in 3 folds),0.721 +/- 0.133 (in 3 folds),0.542 +/- 0.126 (in 3 folds),0.288 +/- 0.201 (in 3 folds),0.545,0.309,132,0,132,0.0,False
rf_multiclass,0.670 +/- 0.036 (in 3 folds),0.616 +/- 0.022 (in 3 folds),0.692 +/- 0.008 (in 3 folds),0.653 +/- 0.006 (in 3 folds),0.658 +/- 0.094 (in 3 folds),0.517 +/- 0.069 (in 3 folds),0.659,0.513,132,0,132,0.0,True
xgboost,0.648 +/- 0.110 (in 3 folds),0.634 +/- 0.125 (in 3 folds),0.682 +/- 0.048 (in 3 folds),0.667 +/- 0.051 (in 3 folds),0.643 +/- 0.048 (in 3 folds),0.452 +/- 0.033 (in 3 folds),0.644,0.452,132,0,132,0.0,True
ridge_cv,0.602 +/- 0.089 (in 3 folds),0.564 +/- 0.057 (in 3 folds),0.625 +/- 0.109 (in 3 folds),0.605 +/- 0.092 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.460 +/- 0.112 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.462,0.0,132,0,132,0.0,True
dummy_stratified,0.457 +/- 0.015 (in 3 folds),0.467 +/- 0.014 (in 3 folds),0.493 +/- 0.003 (in 3 folds),0.495 +/- 0.002 (in 3 folds),0.318 +/- 0.073 (in 3 folds),-0.102 +/- 0.053 (in 3 folds),0.318,-0.103,132,0,132,0.0,False


elasticnet_cv,lasso_cv,linearsvm_ovr,lasso_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.746 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.739 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.771 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.769 +/- 0.006 (in 3 folds) Accuracy: 0.460 +/- 0.112 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.462 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 36  Asian 0.00 0.00 0.00 30  Caucasian 0.46 1.00 0.63 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.46 132  macro avg 0.12 0.25 0.16 132  weighted avg 0.21 0.46 0.29 132,Per-fold scores: ROC-AUC (weighted OvO): 0.741 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.735 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.772 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.772 +/- 0.008 (in 3 folds) Accuracy: 0.460 +/- 0.112 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.462 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 36  Asian 0.00 0.00 0.00 30  Caucasian 0.46 1.00 0.63 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.46 132  macro avg 0.12 0.25 0.16 132  weighted avg 0.21 0.46 0.29 132,Per-fold scores: ROC-AUC (weighted OvO): 0.716 +/- 0.111 (in 3 folds) ROC-AUC (macro OvO): 0.691 +/- 0.127 (in 3 folds) au-PRC (weighted OvO): 0.719 +/- 0.089 (in 3 folds) au-PRC (macro OvO): 0.698 +/- 0.099 (in 3 folds) Accuracy: 0.637 +/- 0.051 (in 3 folds) MCC: 0.454 +/- 0.125 (in 3 folds) Global scores: Accuracy: 0.636 MCC: 0.435 Global classification report:  precision recall f1-score support  African 0.94 0.83 0.88 36  Asian 0.33 0.17 0.22 30  Caucasian 0.62 0.80 0.70 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.64 132  macro avg 0.47 0.45 0.45 132  weighted avg 0.62 0.64 0.61 132,Per-fold scores: ROC-AUC (weighted OvO): 0.696 +/- 0.143 (in 3 folds) ROC-AUC (macro OvO): 0.671 +/- 0.165 (in 3 folds) au-PRC (weighted OvO): 0.738 +/- 0.113 (in 3 folds) au-PRC (macro OvO): 0.721 +/- 0.133 (in 3 folds) Accuracy: 0.542 +/- 0.126 (in 3 folds) MCC: 0.288 +/- 0.201 (in 3 folds) Global scores: Accuracy: 0.545 MCC: 0.309 Global classification report:  precision recall f1-score support  African 0.91 0.89 0.90 36  Asian 0.20 0.20 0.20 30  Caucasian 0.53 0.54 0.54 61 Hispanic/Latino 0.20 0.20 0.20 5  accuracy 0.55 132  macro avg 0.46 0.46 0.46 132  weighted avg 0.55 0.55 0.55 132
,,,
,,,
,,,


rf_multiclass,xgboost,ridge_cv,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.670 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.616 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.692 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.653 +/- 0.006 (in 3 folds) Accuracy: 0.658 +/- 0.094 (in 3 folds) MCC: 0.517 +/- 0.069 (in 3 folds) Global scores: Accuracy: 0.659 MCC: 0.513 Global classification report:  precision recall f1-score support  African 1.00 0.72 0.84 36  Asian 0.00 0.00 0.00 30  Caucasian 0.58 1.00 0.73 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.66 132  macro avg 0.39 0.43 0.39 132  weighted avg 0.54 0.66 0.57 132,Per-fold scores: ROC-AUC (weighted OvO): 0.648 +/- 0.110 (in 3 folds) ROC-AUC (macro OvO): 0.634 +/- 0.125 (in 3 folds) au-PRC (weighted OvO): 0.682 +/- 0.048 (in 3 folds) au-PRC (macro OvO): 0.667 +/- 0.051 (in 3 folds) Accuracy: 0.643 +/- 0.048 (in 3 folds) MCC: 0.452 +/- 0.033 (in 3 folds) Global scores: Accuracy: 0.644 MCC: 0.452 Global classification report:  precision recall f1-score support  African 0.96 0.72 0.83 36  Asian 0.29 0.07 0.11 30  Caucasian 0.58 0.93 0.72 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.64 132  macro avg 0.46 0.43 0.41 132  weighted avg 0.60 0.64 0.58 132,Per-fold scores: ROC-AUC (weighted OvO): 0.602 +/- 0.089 (in 3 folds) ROC-AUC (macro OvO): 0.564 +/- 0.057 (in 3 folds) au-PRC (weighted OvO): 0.625 +/- 0.109 (in 3 folds) au-PRC (macro OvO): 0.605 +/- 0.092 (in 3 folds) Accuracy: 0.460 +/- 0.112 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.462 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 36  Asian 0.00 0.00 0.00 30  Caucasian 0.46 1.00 0.63 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.46 132  macro avg 0.12 0.25 0.16 132  weighted avg 0.21 0.46 0.29 132,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.460 +/- 0.112 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.462 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 36  Asian 0.00 0.00 0.00 30  Caucasian 0.46 1.00 0.63 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.46 132  macro avg 0.12 0.25 0.16 132  weighted avg 0.21 0.46 0.29 132
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.457 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.467 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.493 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.495 +/- 0.002 (in 3 folds) Accuracy: 0.318 +/- 0.073 (in 3 folds) MCC: -0.102 +/- 0.053 (in 3 folds) Global scores: Accuracy: 0.318 MCC: -0.103 Global classification report:  precision recall f1-score support  African 0.06 0.03 0.04 36  Asian 0.19 0.20 0.20 30  Caucasian 0.42 0.57 0.49 61 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.32 132  macro avg 0.17 0.20 0.18 132  weighted avg 0.25 0.32 0.28 132


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.758 +/- 0.043 (in 3 folds),0.764 +/- 0.025 (in 3 folds),0.736 +/- 0.029 (in 3 folds),0.737 +/- 0.023 (in 3 folds),0.624 +/- 0.016 (in 3 folds),0.375 +/- 0.049 (in 3 folds),0.623,0.375,191.0,0.0,191.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.758 +/- 0.043 (in 3 folds),0.764 +/- 0.025 (in 3 folds),0.736 +/- 0.029 (in 3 folds),0.737 +/- 0.023 (in 3 folds),0.700 +/- 0.030 (in 3 folds),0.457 +/- 0.024 (in 3 folds),0.702,0.463,191.0,0.0,191.0,0.0,False
elasticnet_cv,0.729 +/- 0.017 (in 3 folds),0.737 +/- 0.006 (in 3 folds),0.735 +/- 0.016 (in 3 folds),0.747 +/- 0.006 (in 3 folds),0.567 +/- 0.081 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.571,0.0,191.0,0.0,191.0,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.729 +/- 0.017 (in 3 folds),0.737 +/- 0.006 (in 3 folds),0.735 +/- 0.016 (in 3 folds),0.747 +/- 0.006 (in 3 folds),0.714 +/- 0.066 (in 3 folds),0.498 +/- 0.106 (in 3 folds),0.717,0.497,191.0,0.0,191.0,0.0,True
lasso_cv,0.723 +/- 0.028 (in 3 folds),0.725 +/- 0.021 (in 3 folds),0.730 +/- 0.030 (in 3 folds),0.738 +/- 0.022 (in 3 folds),0.567 +/- 0.081 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.571,0.0,191.0,0.0,191.0,0.0,True
lasso_cv.decision_thresholds_tuned,0.723 +/- 0.028 (in 3 folds),0.725 +/- 0.021 (in 3 folds),0.730 +/- 0.030 (in 3 folds),0.738 +/- 0.022 (in 3 folds),0.720 +/- 0.044 (in 3 folds),0.505 +/- 0.082 (in 3 folds),0.723,0.508,191.0,0.0,191.0,0.0,True
rf_multiclass,0.712 +/- 0.028 (in 3 folds),0.704 +/- 0.079 (in 3 folds),0.729 +/- 0.054 (in 3 folds),0.723 +/- 0.078 (in 3 folds),0.705 +/- 0.038 (in 3 folds),0.479 +/- 0.026 (in 3 folds),0.707,0.483,191.0,0.0,191.0,0.0,True
rf_multiclass.decision_thresholds_tuned,0.712 +/- 0.028 (in 3 folds),0.704 +/- 0.079 (in 3 folds),0.729 +/- 0.054 (in 3 folds),0.723 +/- 0.078 (in 3 folds),0.738 +/- 0.031 (in 3 folds),0.536 +/- 0.108 (in 3 folds),0.738,0.531,191.0,0.0,191.0,0.0,True
linearsvm_ovr,0.700 +/- 0.050 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.703 +/- 0.016 (in 3 folds),0.687 +/- 0.018 (in 3 folds),0.624 +/- 0.029 (in 3 folds),0.377 +/- 0.086 (in 3 folds),0.623,0.376,191.0,0.0,191.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.700 +/- 0.050 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.703 +/- 0.016 (in 3 folds),0.687 +/- 0.018 (in 3 folds),0.651 +/- 0.048 (in 3 folds),0.410 +/- 0.094 (in 3 folds),0.649,0.381,191.0,0.0,191.0,0.0,True

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.758 +/- 0.043 (in 3 folds),0.764 +/- 0.025 (in 3 folds),0.736 +/- 0.029 (in 3 folds),0.737 +/- 0.023 (in 3 folds),0.624 +/- 0.016 (in 3 folds),0.375 +/- 0.049 (in 3 folds),0.623,0.375,191,0,191,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.758 +/- 0.043 (in 3 folds),0.764 +/- 0.025 (in 3 folds),0.736 +/- 0.029 (in 3 folds),0.737 +/- 0.023 (in 3 folds),0.700 +/- 0.030 (in 3 folds),0.457 +/- 0.024 (in 3 folds),0.702,0.463,191,0,191,0.0,False
elasticnet_cv,0.729 +/- 0.017 (in 3 folds),0.737 +/- 0.006 (in 3 folds),0.735 +/- 0.016 (in 3 folds),0.747 +/- 0.006 (in 3 folds),0.567 +/- 0.081 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.571,0.0,191,0,191,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.729 +/- 0.017 (in 3 folds),0.737 +/- 0.006 (in 3 folds),0.735 +/- 0.016 (in 3 folds),0.747 +/- 0.006 (in 3 folds),0.714 +/- 0.066 (in 3 folds),0.498 +/- 0.106 (in 3 folds),0.717,0.497,191,0,191,0.0,True
lasso_cv,0.723 +/- 0.028 (in 3 folds),0.725 +/- 0.021 (in 3 folds),0.730 +/- 0.030 (in 3 folds),0.738 +/- 0.022 (in 3 folds),0.567 +/- 0.081 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.571,0.0,191,0,191,0.0,True
lasso_cv.decision_thresholds_tuned,0.723 +/- 0.028 (in 3 folds),0.725 +/- 0.021 (in 3 folds),0.730 +/- 0.030 (in 3 folds),0.738 +/- 0.022 (in 3 folds),0.720 +/- 0.044 (in 3 folds),0.505 +/- 0.082 (in 3 folds),0.723,0.508,191,0,191,0.0,True
rf_multiclass,0.712 +/- 0.028 (in 3 folds),0.704 +/- 0.079 (in 3 folds),0.729 +/- 0.054 (in 3 folds),0.723 +/- 0.078 (in 3 folds),0.705 +/- 0.038 (in 3 folds),0.479 +/- 0.026 (in 3 folds),0.707,0.483,191,0,191,0.0,True
rf_multiclass.decision_thresholds_tuned,0.712 +/- 0.028 (in 3 folds),0.704 +/- 0.079 (in 3 folds),0.729 +/- 0.054 (in 3 folds),0.723 +/- 0.078 (in 3 folds),0.738 +/- 0.031 (in 3 folds),0.536 +/- 0.108 (in 3 folds),0.738,0.531,191,0,191,0.0,True
linearsvm_ovr,0.700 +/- 0.050 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.703 +/- 0.016 (in 3 folds),0.687 +/- 0.018 (in 3 folds),0.624 +/- 0.029 (in 3 folds),0.377 +/- 0.086 (in 3 folds),0.623,0.376,191,0,191,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.700 +/- 0.050 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.703 +/- 0.016 (in 3 folds),0.687 +/- 0.018 (in 3 folds),0.651 +/- 0.048 (in 3 folds),0.410 +/- 0.094 (in 3 folds),0.649,0.381,191,0,191,0.0,True


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,elasticnet_cv,elasticnet_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.758 +/- 0.043 (in 3 folds) ROC-AUC (macro OvO): 0.764 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.736 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.737 +/- 0.023 (in 3 folds) Accuracy: 0.624 +/- 0.016 (in 3 folds) MCC: 0.375 +/- 0.049 (in 3 folds) Global scores: Accuracy: 0.623 MCC: 0.375 Global classification report:  precision recall f1-score support  African 0.82 0.82 0.82 44  Asian 0.21 0.22 0.22 32  Caucasian 0.72 0.70 0.71 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.62 191  macro avg 0.44 0.43 0.44 191  weighted avg 0.64 0.62 0.63 191,Per-fold scores: ROC-AUC (weighted OvO): 0.758 +/- 0.043 (in 3 folds) ROC-AUC (macro OvO): 0.764 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.736 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.737 +/- 0.023 (in 3 folds) Accuracy: 0.700 +/- 0.030 (in 3 folds) MCC: 0.457 +/- 0.024 (in 3 folds) Global scores: Accuracy: 0.702 MCC: 0.463 Global classification report:  precision recall f1-score support  African 0.81 0.89 0.85 44  Asian 0.00 0.00 0.00 32  Caucasian 0.70 0.87 0.78 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 191  macro avg 0.38 0.44 0.41 191  weighted avg 0.59 0.70 0.64 191,Per-fold scores: ROC-AUC (weighted OvO): 0.729 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.737 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.735 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.747 +/- 0.006 (in 3 folds) Accuracy: 0.567 +/- 0.081 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.571 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 44  Asian 0.00 0.00 0.00 32  Caucasian 0.57 1.00 0.73 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.57 191  macro avg 0.14 0.25 0.18 191  weighted avg 0.33 0.57 0.41 191,Per-fold scores: ROC-AUC (weighted OvO): 0.729 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.737 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.735 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.747 +/- 0.006 (in 3 folds) Accuracy: 0.714 +/- 0.066 (in 3 folds) MCC: 0.498 +/- 0.106 (in 3 folds) Global scores: Accuracy: 0.717 MCC: 0.497 Global classification report:  precision recall f1-score support  African 0.70 0.89 0.78 44  Asian 0.00 0.00 0.00 32  Caucasian 0.73 0.90 0.80 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.72 191  macro avg 0.36 0.45 0.40 191  weighted avg 0.57 0.72 0.64 191
,,,
,,,


lasso_cv,lasso_cv.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.723 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.725 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.730 +/- 0.030 (in 3 folds) au-PRC (macro OvO): 0.738 +/- 0.022 (in 3 folds) Accuracy: 0.567 +/- 0.081 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.571 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 44  Asian 0.00 0.00 0.00 32  Caucasian 0.57 1.00 0.73 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.57 191  macro avg 0.14 0.25 0.18 191  weighted avg 0.33 0.57 0.41 191,Per-fold scores: ROC-AUC (weighted OvO): 0.723 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.725 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.730 +/- 0.030 (in 3 folds) au-PRC (macro OvO): 0.738 +/- 0.022 (in 3 folds) Accuracy: 0.720 +/- 0.044 (in 3 folds) MCC: 0.505 +/- 0.082 (in 3 folds) Global scores: Accuracy: 0.723 MCC: 0.508 Global classification report:  precision recall f1-score support  African 0.71 0.91 0.80 44  Asian 0.00 0.00 0.00 32  Caucasian 0.73 0.90 0.80 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.72 191  macro avg 0.36 0.45 0.40 191  weighted avg 0.58 0.72 0.64 191,Per-fold scores: ROC-AUC (weighted OvO): 0.712 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.704 +/- 0.079 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.054 (in 3 folds) au-PRC (macro OvO): 0.723 +/- 0.078 (in 3 folds) Accuracy: 0.705 +/- 0.038 (in 3 folds) MCC: 0.479 +/- 0.026 (in 3 folds) Global scores: Accuracy: 0.707 MCC: 0.483 Global classification report:  precision recall f1-score support  African 0.96 0.61 0.75 44  Asian 0.00 0.00 0.00 32  Caucasian 0.66 0.99 0.79 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.71 191  macro avg 0.41 0.40 0.39 191  weighted avg 0.60 0.71 0.63 191,Per-fold scores: ROC-AUC (weighted OvO): 0.712 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.704 +/- 0.079 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.054 (in 3 folds) au-PRC (macro OvO): 0.723 +/- 0.078 (in 3 folds) Accuracy: 0.738 +/- 0.031 (in 3 folds) MCC: 0.536 +/- 0.108 (in 3 folds) Global scores: Accuracy: 0.738 MCC: 0.531 Global classification report:  precision recall f1-score support  African 0.89 0.93 0.91 44  Asian 0.14 0.03 0.05 32  Caucasian 0.72 0.91 0.80 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.74 191  macro avg 0.44 0.47 0.44 191  weighted avg 0.64 0.74 0.68 191
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.700 +/- 0.050 (in 3 folds) ROC-AUC (macro OvO): 0.689 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.703 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.687 +/- 0.018 (in 3 folds) Accuracy: 0.624 +/- 0.029 (in 3 folds) MCC: 0.377 +/- 0.086 (in 3 folds) Global scores: Accuracy: 0.623 MCC: 0.376 Global classification report:  precision recall f1-score support  African 0.74 0.84 0.79 44  Asian 0.22 0.19 0.20 32  Caucasian 0.73 0.70 0.71 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.62 191  macro avg 0.42 0.43 0.43 191  weighted avg 0.62 0.62 0.62 191,Per-fold scores: ROC-AUC (weighted OvO): 0.700 +/- 0.050 (in 3 folds) ROC-AUC (macro OvO): 0.689 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.703 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.687 +/- 0.018 (in 3 folds) Accuracy: 0.651 +/- 0.048 (in 3 folds) MCC: 0.410 +/- 0.094 (in 3 folds) Global scores: Accuracy: 0.649 MCC: 0.381 Global classification report:  precision recall f1-score support  African 0.69 0.84 0.76 44  Asian 0.18 0.09 0.12 32  Caucasian 0.70 0.77 0.73 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.65 191  macro avg 0.39 0.43 0.40 191  weighted avg 0.59 0.65 0.61 191,Per-fold scores: ROC-AUC (weighted OvO): 0.694 +/- 0.068 (in 3 folds) ROC-AUC (macro OvO): 0.690 +/- 0.048 (in 3 folds) au-PRC (weighted OvO): 0.721 +/- 0.055 (in 3 folds) au-PRC (macro OvO): 0.717 +/- 0.081 (in 3 folds) Accuracy: 0.694 +/- 0.060 (in 3 folds) MCC: 0.438 +/- 0.147 (in 3 folds) Global scores: Accuracy: 0.696 MCC: 0.442 Global classification report:  precision recall f1-score support  African 0.84 0.70 0.77 44  Asian 0.40 0.12 0.19 32  Caucasian 0.68 0.90 0.77 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 191  macro avg 0.48 0.43 0.43 191  weighted avg 0.65 0.70 0.65 191,Per-fold scores: ROC-AUC (weighted OvO): 0.694 +/- 0.068 (in 3 folds) ROC-AUC (macro OvO): 0.690 +/- 0.048 (in 3 folds) au-PRC (weighted OvO): 0.721 +/- 0.055 (in 3 folds) au-PRC (macro OvO): 0.717 +/- 0.081 (in 3 folds) Accuracy: 0.692 +/- 0.080 (in 3 folds) MCC: 0.440 +/- 0.146 (in 3 folds) Global scores: Accuracy: 0.696 MCC: 0.445 Global classification report:  precision recall f1-score support  African 0.83 0.80 0.81 44  Asian 0.00 0.00 0.00 32  Caucasian 0.68 0.90 0.77 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 191  macro avg 0.38 0.42 0.40 191  weighted avg 0.58 0.70 0.63 191
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.685 +/- 0.160 (in 3 folds) ROC-AUC (macro OvO): 0.685 +/- 0.160 (in 3 folds) au-PRC (weighted OvO): 0.671 +/- 0.149 (in 3 folds) au-PRC (macro OvO): 0.668 +/- 0.147 (in 3 folds) Accuracy: 0.567 +/- 0.081 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.571 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 44  Asian 0.00 0.00 0.00 32  Caucasian 0.57 1.00 0.73 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.57 191  macro avg 0.14 0.25 0.18 191  weighted avg 0.33 0.57 0.41 191,Per-fold scores: ROC-AUC (weighted OvO): 0.685 +/- 0.160 (in 3 folds) ROC-AUC (macro OvO): 0.685 +/- 0.160 (in 3 folds) au-PRC (weighted OvO): 0.671 +/- 0.149 (in 3 folds) au-PRC (macro OvO): 0.668 +/- 0.147 (in 3 folds) Accuracy: 0.625 +/- 0.069 (in 3 folds) MCC: 0.153 +/- 0.265 (in 3 folds) Global scores: Accuracy: 0.623 MCC: 0.264 Global classification report:  precision recall f1-score support  African 0.73 0.36 0.48 44  Asian 0.00 0.00 0.00 32  Caucasian 0.61 0.94 0.74 109 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.62 191  macro avg 0.33 0.33 0.31 191  weighted avg 0.52 0.62 0.53 191
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.647 +/- 0.043 (in 3 folds),0.644 +/- 0.043 (in 3 folds),0.690 +/- 0.040 (in 3 folds),0.688 +/- 0.041 (in 3 folds),0.191 +/- 0.047 (in 3 folds),0.148 +/- 0.070 (in 3 folds),0.189,0.019,132.0,0.0,132.0,0.0,True
lasso_cv,0.642 +/- 0.036 (in 3 folds),0.638 +/- 0.038 (in 3 folds),0.688 +/- 0.035 (in 3 folds),0.685 +/- 0.038 (in 3 folds),0.214 +/- 0.058 (in 3 folds),0.166 +/- 0.083 (in 3 folds),0.212,0.051,132.0,0.0,132.0,0.0,True
ridge_cv,0.633 +/- 0.065 (in 3 folds),0.628 +/- 0.072 (in 3 folds),0.680 +/- 0.047 (in 3 folds),0.675 +/- 0.051 (in 3 folds),0.152 +/- 0.020 (in 3 folds),-0.003 +/- 0.006 (in 3 folds),0.152,-0.045,132.0,0.0,132.0,0.0,True
lasso_multiclass,0.624 +/- 0.062 (in 3 folds),0.616 +/- 0.076 (in 3 folds),0.677 +/- 0.059 (in 3 folds),0.669 +/- 0.070 (in 3 folds),0.281 +/- 0.059 (in 3 folds),0.148 +/- 0.063 (in 3 folds),0.28,0.144,132.0,0.0,132.0,0.0,False
rf_multiclass,0.610 +/- 0.038 (in 3 folds),0.604 +/- 0.037 (in 3 folds),0.669 +/- 0.035 (in 3 folds),0.665 +/- 0.038 (in 3 folds),0.260 +/- 0.061 (in 3 folds),0.133 +/- 0.069 (in 3 folds),0.258,0.106,132.0,0.0,132.0,0.0,True
linearsvm_ovr,0.605 +/- 0.071 (in 3 folds),0.601 +/- 0.082 (in 3 folds),0.651 +/- 0.066 (in 3 folds),0.649 +/- 0.077 (in 3 folds),0.235 +/- 0.036 (in 3 folds),0.094 +/- 0.035 (in 3 folds),0.235,0.092,132.0,0.0,132.0,0.0,False
xgboost,0.552 +/- 0.031 (in 3 folds),0.550 +/- 0.040 (in 3 folds),0.631 +/- 0.025 (in 3 folds),0.633 +/- 0.035 (in 3 folds),0.205 +/- 0.013 (in 3 folds),0.058 +/- 0.007 (in 3 folds),0.205,0.05,132.0,0.0,132.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.167 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.167,-0.046,132.0,0.0,132.0,0.0,True
dummy_stratified,0.479 +/- 0.043 (in 3 folds),0.479 +/- 0.041 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.126 +/- 0.068 (in 3 folds),-0.044 +/- 0.099 (in 3 folds),0.129,-0.047,132.0,0.0,132.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.647 +/- 0.043 (in 3 folds),0.644 +/- 0.043 (in 3 folds),0.690 +/- 0.040 (in 3 folds),0.688 +/- 0.041 (in 3 folds),0.191 +/- 0.047 (in 3 folds),0.148 +/- 0.070 (in 3 folds),0.189,0.019,132,0,132,0.0,True
lasso_cv,0.642 +/- 0.036 (in 3 folds),0.638 +/- 0.038 (in 3 folds),0.688 +/- 0.035 (in 3 folds),0.685 +/- 0.038 (in 3 folds),0.214 +/- 0.058 (in 3 folds),0.166 +/- 0.083 (in 3 folds),0.212,0.051,132,0,132,0.0,True
ridge_cv,0.633 +/- 0.065 (in 3 folds),0.628 +/- 0.072 (in 3 folds),0.680 +/- 0.047 (in 3 folds),0.675 +/- 0.051 (in 3 folds),0.152 +/- 0.020 (in 3 folds),-0.003 +/- 0.006 (in 3 folds),0.152,-0.045,132,0,132,0.0,True
lasso_multiclass,0.624 +/- 0.062 (in 3 folds),0.616 +/- 0.076 (in 3 folds),0.677 +/- 0.059 (in 3 folds),0.669 +/- 0.070 (in 3 folds),0.281 +/- 0.059 (in 3 folds),0.148 +/- 0.063 (in 3 folds),0.28,0.144,132,0,132,0.0,False
rf_multiclass,0.610 +/- 0.038 (in 3 folds),0.604 +/- 0.037 (in 3 folds),0.669 +/- 0.035 (in 3 folds),0.665 +/- 0.038 (in 3 folds),0.260 +/- 0.061 (in 3 folds),0.133 +/- 0.069 (in 3 folds),0.258,0.106,132,0,132,0.0,True
linearsvm_ovr,0.605 +/- 0.071 (in 3 folds),0.601 +/- 0.082 (in 3 folds),0.651 +/- 0.066 (in 3 folds),0.649 +/- 0.077 (in 3 folds),0.235 +/- 0.036 (in 3 folds),0.094 +/- 0.035 (in 3 folds),0.235,0.092,132,0,132,0.0,False
xgboost,0.552 +/- 0.031 (in 3 folds),0.550 +/- 0.040 (in 3 folds),0.631 +/- 0.025 (in 3 folds),0.633 +/- 0.035 (in 3 folds),0.205 +/- 0.013 (in 3 folds),0.058 +/- 0.007 (in 3 folds),0.205,0.05,132,0,132,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.167 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.167,-0.046,132,0,132,0.0,True
dummy_stratified,0.479 +/- 0.043 (in 3 folds),0.479 +/- 0.041 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.519 +/- 0.014 (in 3 folds),0.126 +/- 0.068 (in 3 folds),-0.044 +/- 0.099 (in 3 folds),0.129,-0.047,132,0,132,0.0,True


elasticnet_cv,lasso_cv,ridge_cv,lasso_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.647 +/- 0.043 (in 3 folds) ROC-AUC (macro OvO): 0.644 +/- 0.043 (in 3 folds) au-PRC (weighted OvO): 0.690 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.688 +/- 0.041 (in 3 folds) Accuracy: 0.191 +/- 0.047 (in 3 folds) MCC: 0.148 +/- 0.070 (in 3 folds) Global scores: Accuracy: 0.189 MCC: 0.019 Global classification report:  precision recall f1-score support  20-30 0.17 0.28 0.21 25  30-40 0.00 0.00 0.00 20  40-50 0.11 0.28 0.16 18  50-60 0.18 0.26 0.22 27  60-70 0.00 0.00 0.00 15  70-80 0.00 0.00 0.00 3  <20 1.00 0.25 0.40 24  accuracy 0.19 132  macro avg 0.21 0.15 0.14 132 weighted avg 0.27 0.19 0.18 132,Per-fold scores: ROC-AUC (weighted OvO): 0.642 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.638 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.688 +/- 0.035 (in 3 folds) au-PRC (macro OvO): 0.685 +/- 0.038 (in 3 folds) Accuracy: 0.214 +/- 0.058 (in 3 folds) MCC: 0.166 +/- 0.083 (in 3 folds) Global scores: Accuracy: 0.212 MCC: 0.051 Global classification report:  precision recall f1-score support  20-30 0.19 0.28 0.23 25  30-40 0.00 0.00 0.00 20  40-50 0.12 0.28 0.17 18  50-60 0.19 0.26 0.22 27  60-70 0.29 0.13 0.18 15  70-80 0.00 0.00 0.00 3  <20 0.70 0.29 0.41 24  accuracy 0.21 132  macro avg 0.21 0.18 0.17 132 weighted avg 0.25 0.21 0.21 132,Per-fold scores: ROC-AUC (weighted OvO): 0.633 +/- 0.065 (in 3 folds) ROC-AUC (macro OvO): 0.628 +/- 0.072 (in 3 folds) au-PRC (weighted OvO): 0.680 +/- 0.047 (in 3 folds) au-PRC (macro OvO): 0.675 +/- 0.051 (in 3 folds) Accuracy: 0.152 +/- 0.020 (in 3 folds) MCC: -0.003 +/- 0.006 (in 3 folds) Global scores: Accuracy: 0.152 MCC: -0.045 Global classification report:  precision recall f1-score support  20-30 0.15 0.40 0.22 25  30-40 0.00 0.00 0.00 20  40-50 0.12 0.17 0.14 18  50-60 0.17 0.26 0.21 27  60-70 0.00 0.00 0.00 15  70-80 0.00 0.00 0.00 3  <20 0.00 0.00 0.00 24  accuracy 0.15 132  macro avg 0.06 0.12 0.08 132 weighted avg 0.08 0.15 0.10 132,Per-fold scores: ROC-AUC (weighted OvO): 0.624 +/- 0.062 (in 3 folds) ROC-AUC (macro OvO): 0.616 +/- 0.076 (in 3 folds) au-PRC (weighted OvO): 0.677 +/- 0.059 (in 3 folds) au-PRC (macro OvO): 0.669 +/- 0.070 (in 3 folds) Accuracy: 0.281 +/- 0.059 (in 3 folds) MCC: 0.148 +/- 0.063 (in 3 folds) Global scores: Accuracy: 0.280 MCC: 0.144 Global classification report:  precision recall f1-score support  20-30 0.36 0.36 0.36 25  30-40 0.11 0.10 0.10 20  40-50 0.00 0.00 0.00 18  50-60 0.27 0.22 0.24 27  60-70 0.19 0.33 0.24 15  70-80 0.00 0.00 0.00 3  <20 0.83 0.62 0.71 24  accuracy 0.28 132  macro avg 0.25 0.23 0.24 132 weighted avg 0.31 0.28 0.29 132
,,,
,,,
,,,


rf_multiclass,linearsvm_ovr,xgboost,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.610 +/- 0.038 (in 3 folds) ROC-AUC (macro OvO): 0.604 +/- 0.037 (in 3 folds) au-PRC (weighted OvO): 0.669 +/- 0.035 (in 3 folds) au-PRC (macro OvO): 0.665 +/- 0.038 (in 3 folds) Accuracy: 0.260 +/- 0.061 (in 3 folds) MCC: 0.133 +/- 0.069 (in 3 folds) Global scores: Accuracy: 0.258 MCC: 0.106 Global classification report:  precision recall f1-score support  20-30 0.30 0.44 0.35 25  30-40 0.00 0.00 0.00 20  40-50 0.09 0.11 0.10 18  50-60 0.25 0.30 0.27 27  60-70 0.17 0.20 0.18 15  70-80 0.00 0.00 0.00 3  <20 0.62 0.42 0.50 24  accuracy 0.26 132  macro avg 0.20 0.21 0.20 132 weighted avg 0.25 0.26 0.25 132,Per-fold scores: ROC-AUC (weighted OvO): 0.605 +/- 0.071 (in 3 folds) ROC-AUC (macro OvO): 0.601 +/- 0.082 (in 3 folds) au-PRC (weighted OvO): 0.651 +/- 0.066 (in 3 folds) au-PRC (macro OvO): 0.649 +/- 0.077 (in 3 folds) Accuracy: 0.235 +/- 0.036 (in 3 folds) MCC: 0.094 +/- 0.035 (in 3 folds) Global scores: Accuracy: 0.235 MCC: 0.092 Global classification report:  precision recall f1-score support  20-30 0.26 0.28 0.27 25  30-40 0.00 0.00 0.00 20  40-50 0.00 0.00 0.00 18  50-60 0.24 0.15 0.18 27  60-70 0.19 0.40 0.26 15  70-80 0.00 0.00 0.00 3  <20 0.67 0.58 0.62 24  accuracy 0.23 132  macro avg 0.19 0.20 0.19 132 weighted avg 0.24 0.23 0.23 132,Per-fold scores: ROC-AUC (weighted OvO): 0.552 +/- 0.031 (in 3 folds) ROC-AUC (macro OvO): 0.550 +/- 0.040 (in 3 folds) au-PRC (weighted OvO): 0.631 +/- 0.025 (in 3 folds) au-PRC (macro OvO): 0.633 +/- 0.035 (in 3 folds) Accuracy: 0.205 +/- 0.013 (in 3 folds) MCC: 0.058 +/- 0.007 (in 3 folds) Global scores: Accuracy: 0.205 MCC: 0.050 Global classification report:  precision recall f1-score support  20-30 0.20 0.20 0.20 25  30-40 0.10 0.05 0.07 20  40-50 0.00 0.00 0.00 18  50-60 0.21 0.22 0.22 27  60-70 0.23 0.40 0.29 15  70-80 0.00 0.00 0.00 3  <20 0.50 0.38 0.43 24  accuracy 0.20 132  macro avg 0.18 0.18 0.17 132 weighted avg 0.21 0.20 0.20 132,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.167 +/- 0.010 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.167 MCC: -0.046 Global classification report:  precision recall f1-score support  20-30 0.16 0.60 0.26 25  30-40 0.00 0.00 0.00 20  40-50 0.00 0.00 0.00 18  50-60 0.17 0.26 0.21 27  60-70 0.00 0.00 0.00 15  70-80 0.00 0.00 0.00 3  <20 0.00 0.00 0.00 24  accuracy 0.17 132  macro avg 0.05 0.12 0.07 132 weighted avg 0.07 0.17 0.09 132
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.479 +/- 0.043 (in 3 folds) ROC-AUC (macro OvO): 0.479 +/- 0.041 (in 3 folds) au-PRC (weighted OvO): 0.519 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.519 +/- 0.014 (in 3 folds) Accuracy: 0.126 +/- 0.068 (in 3 folds) MCC: -0.044 +/- 0.099 (in 3 folds) Global scores: Accuracy: 0.129 MCC: -0.047 Global classification report:  precision recall f1-score support  20-30 0.21 0.16 0.18 25  30-40 0.11 0.10 0.10 20  40-50 0.31 0.22 0.26 18  50-60 0.14 0.19 0.16 27  60-70 0.04 0.07 0.05 15  70-80 0.00 0.00 0.00 3  <20 0.05 0.04 0.04 24  accuracy 0.13 132  macro avg 0.12 0.11 0.11 132 weighted avg 0.14 0.13 0.13 132


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.665 +/- 0.050 (in 3 folds),0.661 +/- 0.045 (in 3 folds),0.690 +/- 0.016 (in 3 folds),0.680 +/- 0.005 (in 3 folds),0.160 +/- 0.060 (in 3 folds),0.033 +/- 0.057 (in 3 folds),0.157,-0.042,191.0,0.0,191.0,0.0,True
ridge_cv.decision_thresholds_tuned,0.665 +/- 0.050 (in 3 folds),0.661 +/- 0.045 (in 3 folds),0.690 +/- 0.016 (in 3 folds),0.680 +/- 0.005 (in 3 folds),0.357 +/- 0.124 (in 3 folds),0.224 +/- 0.133 (in 3 folds),0.351,0.213,191.0,0.0,191.0,0.0,True
lasso_multiclass,0.665 +/- 0.025 (in 3 folds),0.663 +/- 0.018 (in 3 folds),0.693 +/- 0.003 (in 3 folds),0.681 +/- 0.010 (in 3 folds),0.340 +/- 0.021 (in 3 folds),0.228 +/- 0.021 (in 3 folds),0.34,0.213,191.0,0.0,191.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.665 +/- 0.025 (in 3 folds),0.663 +/- 0.018 (in 3 folds),0.693 +/- 0.003 (in 3 folds),0.681 +/- 0.010 (in 3 folds),0.324 +/- 0.044 (in 3 folds),0.207 +/- 0.060 (in 3 folds),0.325,0.189,191.0,0.0,191.0,0.0,True
linearsvm_ovr,0.645 +/- 0.004 (in 3 folds),0.642 +/- 0.003 (in 3 folds),0.677 +/- 0.005 (in 3 folds),0.671 +/- 0.012 (in 3 folds),0.288 +/- 0.043 (in 3 folds),0.165 +/- 0.046 (in 3 folds),0.288,0.147,191.0,0.0,191.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.645 +/- 0.004 (in 3 folds),0.642 +/- 0.003 (in 3 folds),0.677 +/- 0.005 (in 3 folds),0.671 +/- 0.012 (in 3 folds),0.320 +/- 0.029 (in 3 folds),0.206 +/- 0.060 (in 3 folds),0.319,0.203,191.0,0.0,191.0,0.0,False
rf_multiclass,0.627 +/- 0.028 (in 3 folds),0.622 +/- 0.022 (in 3 folds),0.670 +/- 0.013 (in 3 folds),0.662 +/- 0.006 (in 3 folds),0.292 +/- 0.076 (in 3 folds),0.188 +/- 0.068 (in 3 folds),0.288,0.138,191.0,0.0,191.0,0.0,True
rf_multiclass.decision_thresholds_tuned,0.627 +/- 0.028 (in 3 folds),0.622 +/- 0.022 (in 3 folds),0.670 +/- 0.013 (in 3 folds),0.662 +/- 0.006 (in 3 folds),0.252 +/- 0.024 (in 3 folds),0.149 +/- 0.040 (in 3 folds),0.251,0.104,191.0,0.0,191.0,0.0,True
elasticnet_cv,0.610 +/- 0.011 (in 3 folds),0.609 +/- 0.019 (in 3 folds),0.662 +/- 0.033 (in 3 folds),0.657 +/- 0.037 (in 3 folds),0.177 +/- 0.020 (in 3 folds),0.145 +/- 0.047 (in 3 folds),0.178,0.001,191.0,0.0,191.0,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.610 +/- 0.011 (in 3 folds),0.609 +/- 0.019 (in 3 folds),0.662 +/- 0.033 (in 3 folds),0.657 +/- 0.037 (in 3 folds),0.244 +/- 0.048 (in 3 folds),0.099 +/- 0.047 (in 3 folds),0.246,0.091,191.0,0.0,191.0,0.0,True

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.665 +/- 0.050 (in 3 folds),0.661 +/- 0.045 (in 3 folds),0.690 +/- 0.016 (in 3 folds),0.680 +/- 0.005 (in 3 folds),0.160 +/- 0.060 (in 3 folds),0.033 +/- 0.057 (in 3 folds),0.157,-0.042,191,0,191,0.0,True
ridge_cv.decision_thresholds_tuned,0.665 +/- 0.050 (in 3 folds),0.661 +/- 0.045 (in 3 folds),0.690 +/- 0.016 (in 3 folds),0.680 +/- 0.005 (in 3 folds),0.357 +/- 0.124 (in 3 folds),0.224 +/- 0.133 (in 3 folds),0.351,0.213,191,0,191,0.0,True
lasso_multiclass,0.665 +/- 0.025 (in 3 folds),0.663 +/- 0.018 (in 3 folds),0.693 +/- 0.003 (in 3 folds),0.681 +/- 0.010 (in 3 folds),0.340 +/- 0.021 (in 3 folds),0.228 +/- 0.021 (in 3 folds),0.34,0.213,191,0,191,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.665 +/- 0.025 (in 3 folds),0.663 +/- 0.018 (in 3 folds),0.693 +/- 0.003 (in 3 folds),0.681 +/- 0.010 (in 3 folds),0.324 +/- 0.044 (in 3 folds),0.207 +/- 0.060 (in 3 folds),0.325,0.189,191,0,191,0.0,True
linearsvm_ovr,0.645 +/- 0.004 (in 3 folds),0.642 +/- 0.003 (in 3 folds),0.677 +/- 0.005 (in 3 folds),0.671 +/- 0.012 (in 3 folds),0.288 +/- 0.043 (in 3 folds),0.165 +/- 0.046 (in 3 folds),0.288,0.147,191,0,191,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.645 +/- 0.004 (in 3 folds),0.642 +/- 0.003 (in 3 folds),0.677 +/- 0.005 (in 3 folds),0.671 +/- 0.012 (in 3 folds),0.320 +/- 0.029 (in 3 folds),0.206 +/- 0.060 (in 3 folds),0.319,0.203,191,0,191,0.0,False
rf_multiclass,0.627 +/- 0.028 (in 3 folds),0.622 +/- 0.022 (in 3 folds),0.670 +/- 0.013 (in 3 folds),0.662 +/- 0.006 (in 3 folds),0.292 +/- 0.076 (in 3 folds),0.188 +/- 0.068 (in 3 folds),0.288,0.138,191,0,191,0.0,True
rf_multiclass.decision_thresholds_tuned,0.627 +/- 0.028 (in 3 folds),0.622 +/- 0.022 (in 3 folds),0.670 +/- 0.013 (in 3 folds),0.662 +/- 0.006 (in 3 folds),0.252 +/- 0.024 (in 3 folds),0.149 +/- 0.040 (in 3 folds),0.251,0.104,191,0,191,0.0,True
elasticnet_cv,0.610 +/- 0.011 (in 3 folds),0.609 +/- 0.019 (in 3 folds),0.662 +/- 0.033 (in 3 folds),0.657 +/- 0.037 (in 3 folds),0.177 +/- 0.020 (in 3 folds),0.145 +/- 0.047 (in 3 folds),0.178,0.001,191,0,191,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.610 +/- 0.011 (in 3 folds),0.609 +/- 0.019 (in 3 folds),0.662 +/- 0.033 (in 3 folds),0.657 +/- 0.037 (in 3 folds),0.244 +/- 0.048 (in 3 folds),0.099 +/- 0.047 (in 3 folds),0.246,0.091,191,0,191,0.0,True


ridge_cv,ridge_cv.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.665 +/- 0.050 (in 3 folds) ROC-AUC (macro OvO): 0.661 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.690 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.680 +/- 0.005 (in 3 folds) Accuracy: 0.160 +/- 0.060 (in 3 folds) MCC: 0.033 +/- 0.057 (in 3 folds) Global scores: Accuracy: 0.157 MCC: -0.042 Global classification report:  precision recall f1-score support  20-30 0.19 0.57 0.29 35  30-40 0.00 0.00 0.00 23  40-50 0.14 0.11 0.12 28  50-60 0.11 0.18 0.13 39  60-70 0.00 0.00 0.00 27  70-80 0.00 0.00 0.00 4  <20 0.00 0.00 0.00 35  accuracy 0.16 191  macro avg 0.06 0.12 0.08 191 weighted avg 0.08 0.16 0.10 191,Per-fold scores: ROC-AUC (weighted OvO): 0.665 +/- 0.050 (in 3 folds) ROC-AUC (macro OvO): 0.661 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.690 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.680 +/- 0.005 (in 3 folds) Accuracy: 0.357 +/- 0.124 (in 3 folds) MCC: 0.224 +/- 0.133 (in 3 folds) Global scores: Accuracy: 0.351 MCC: 0.213 Global classification report:  precision recall f1-score support  20-30 0.30 0.51 0.37 35  30-40 0.00 0.00 0.00 23  40-50 0.00 0.00 0.00 28  50-60 0.30 0.56 0.39 39  60-70 0.08 0.04 0.05 27  70-80 0.00 0.00 0.00 4  <20 0.58 0.74 0.65 35  accuracy 0.35 191  macro avg 0.18 0.27 0.21 191 weighted avg 0.23 0.35 0.28 191,Per-fold scores: ROC-AUC (weighted OvO): 0.665 +/- 0.025 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.693 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.681 +/- 0.010 (in 3 folds) Accuracy: 0.340 +/- 0.021 (in 3 folds) MCC: 0.228 +/- 0.021 (in 3 folds) Global scores: Accuracy: 0.340 MCC: 0.213 Global classification report:  precision recall f1-score support  20-30 0.42 0.43 0.42 35  30-40 0.27 0.26 0.27 23  40-50 0.17 0.18 0.17 28  50-60 0.37 0.28 0.32 39  60-70 0.15 0.22 0.18 27  70-80 0.00 0.00 0.00 4  <20 0.71 0.63 0.67 35  accuracy 0.34 191  macro avg 0.30 0.29 0.29 191 weighted avg 0.36 0.34 0.35 191,Per-fold scores: ROC-AUC (weighted OvO): 0.665 +/- 0.025 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.693 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.681 +/- 0.010 (in 3 folds) Accuracy: 0.324 +/- 0.044 (in 3 folds) MCC: 0.207 +/- 0.060 (in 3 folds) Global scores: Accuracy: 0.325 MCC: 0.189 Global classification report:  precision recall f1-score support  20-30 0.34 0.49 0.40 35  30-40 1.00 0.04 0.08 23  40-50 0.15 0.11 0.12 28  50-60 0.26 0.18 0.21 39  60-70 0.16 0.26 0.19 27  70-80 0.00 0.00 0.00 4  <20 0.56 0.77 0.65 35  accuracy 0.32 191  macro avg 0.35 0.26 0.24 191 weighted avg 0.38 0.32 0.29 191
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.645 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.642 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.677 +/- 0.005 (in 3 folds) au-PRC (macro OvO): 0.671 +/- 0.012 (in 3 folds) Accuracy: 0.288 +/- 0.043 (in 3 folds) MCC: 0.165 +/- 0.046 (in 3 folds) Global scores: Accuracy: 0.288 MCC: 0.147 Global classification report:  precision recall f1-score support  20-30 0.39 0.34 0.36 35  30-40 0.24 0.26 0.25 23  40-50 0.00 0.00 0.00 28  50-60 0.27 0.26 0.26 39  60-70 0.06 0.07 0.07 27  70-80 0.00 0.00 0.00 4  <20 0.61 0.71 0.66 35  accuracy 0.29 191  macro avg 0.22 0.24 0.23 191 weighted avg 0.28 0.29 0.28 191,Per-fold scores: ROC-AUC (weighted OvO): 0.645 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.642 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.677 +/- 0.005 (in 3 folds) au-PRC (macro OvO): 0.671 +/- 0.012 (in 3 folds) Accuracy: 0.320 +/- 0.029 (in 3 folds) MCC: 0.206 +/- 0.060 (in 3 folds) Global scores: Accuracy: 0.319 MCC: 0.203 Global classification report:  precision recall f1-score support  20-30 0.43 0.17 0.24 35  30-40 0.26 0.35 0.30 23  40-50 0.14 0.04 0.06 28  50-60 0.31 0.21 0.25 39  60-70 0.21 0.48 0.29 27  70-80 0.00 0.00 0.00 4  <20 0.58 0.71 0.64 35  accuracy 0.32 191  macro avg 0.28 0.28 0.25 191 weighted avg 0.33 0.32 0.30 191,Per-fold scores: ROC-AUC (weighted OvO): 0.627 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.622 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.670 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.662 +/- 0.006 (in 3 folds) Accuracy: 0.292 +/- 0.076 (in 3 folds) MCC: 0.188 +/- 0.068 (in 3 folds) Global scores: Accuracy: 0.288 MCC: 0.138 Global classification report:  precision recall f1-score support  20-30 0.33 0.46 0.39 35  30-40 0.33 0.09 0.14 23  40-50 0.10 0.07 0.08 28  50-60 0.19 0.23 0.21 39  60-70 0.15 0.19 0.17 27  70-80 0.00 0.00 0.00 4  <20 0.58 0.60 0.59 35  accuracy 0.29 191  macro avg 0.24 0.23 0.22 191 weighted avg 0.28 0.29 0.27 191,Per-fold scores: ROC-AUC (weighted OvO): 0.627 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.622 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.670 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.662 +/- 0.006 (in 3 folds) Accuracy: 0.252 +/- 0.024 (in 3 folds) MCC: 0.149 +/- 0.040 (in 3 folds) Global scores: Accuracy: 0.251 MCC: 0.104 Global classification report:  precision recall f1-score support  20-30 0.31 0.37 0.34 35  30-40 0.20 0.22 0.21 23  40-50 0.00 0.00 0.00 28  50-60 0.15 0.13 0.14 39  60-70 0.15 0.26 0.19 27  70-80 0.00 0.00 0.00 4  <20 0.58 0.51 0.55 35  accuracy 0.25 191  macro avg 0.20 0.21 0.20 191 weighted avg 0.24 0.25 0.24 191
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.610 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.609 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.662 +/- 0.033 (in 3 folds) au-PRC (macro OvO): 0.657 +/- 0.037 (in 3 folds) Accuracy: 0.177 +/- 0.020 (in 3 folds) MCC: 0.145 +/- 0.047 (in 3 folds) Global scores: Accuracy: 0.178 MCC: 0.001 Global classification report:  precision recall f1-score support  20-30 0.16 0.26 0.20 35  30-40 0.00 0.00 0.00 23  40-50 0.12 0.25 0.17 28  50-60 0.12 0.18 0.14 39  60-70 0.12 0.04 0.06 27  70-80 0.00 0.00 0.00 4  <20 0.91 0.29 0.43 35  accuracy 0.18 191  macro avg 0.21 0.14 0.14 191 weighted avg 0.26 0.18 0.18 191,Per-fold scores: ROC-AUC (weighted OvO): 0.610 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.609 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.662 +/- 0.033 (in 3 folds) au-PRC (macro OvO): 0.657 +/- 0.037 (in 3 folds) Accuracy: 0.244 +/- 0.048 (in 3 folds) MCC: 0.099 +/- 0.047 (in 3 folds) Global scores: Accuracy: 0.246 MCC: 0.091 Global classification report:  precision recall f1-score support  20-30 0.20 0.40 0.27 35  30-40 0.00 0.00 0.00 23  40-50 0.50 0.04 0.07 28  50-60 0.22 0.10 0.14 39  60-70 0.15 0.22 0.18 27  70-80 0.00 0.00 0.00 4  <20 0.35 0.63 0.45 35  accuracy 0.25 191  macro avg 0.20 0.20 0.16 191 weighted avg 0.24 0.25 0.20 191,Per-fold scores: ROC-AUC (weighted OvO): 0.607 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.605 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.658 +/- 0.032 (in 3 folds) au-PRC (macro OvO): 0.653 +/- 0.037 (in 3 folds) Accuracy: 0.199 +/- 0.010 (in 3 folds) MCC: 0.150 +/- 0.039 (in 3 folds) Global scores: Accuracy: 0.199 MCC: 0.029 Global classification report:  precision recall f1-score support  20-30 0.20 0.26 0.23 35  30-40 0.00 0.00 0.00 23  40-50 0.14 0.25 0.18 28  50-60 0.12 0.18 0.15 39  60-70 0.06 0.04 0.04 27  70-80 0.00 0.00 0.00 4  <20 0.70 0.40 0.51 35  accuracy 0.20 191  macro avg 0.17 0.16 0.16 191 weighted avg 0.22 0.20 0.20 191,Per-fold scores: ROC-AUC (weighted OvO): 0.607 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.605 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.658 +/- 0.032 (in 3 folds) au-PRC (macro OvO): 0.653 +/- 0.037 (in 3 folds) Accuracy: 0.244 +/- 0.048 (in 3 folds) MCC: 0.099 +/- 0.048 (in 3 folds) Global scores: Accuracy: 0.246 MCC: 0.092 Global classification report:  precision recall f1-score support  20-30 0.21 0.43 0.28 35  30-40 0.00 0.00 0.00 23  40-50 0.50 0.04 0.07 28  50-60 0.27 0.10 0.15 39  60-70 0.13 0.19 0.15 27  70-80 0.00 0.00 0.00 4  <20 0.34 0.63 0.44 35  accuracy 0.25 191  macro avg 0.21 0.20 0.16 191 weighted avg 0.25 0.25 0.19 191
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.588 +/- 0.061 (in 3 folds) ROC-AUC (macro OvO): 0.583 +/- 0.058 (in 3 folds) au-PRC (weighted OvO): 0.643 +/- 0.057 (in 3 folds) au-PRC (macro OvO): 0.636 +/- 0.051 (in 3 folds) Accuracy: 0.288 +/- 0.097 (in 3 folds) MCC: 0.170 +/- 0.110 (in 3 folds) Global scores: Accuracy: 0.283 MCC: 0.138 Global classification report:  precision recall f1-score support  20-30 0.30 0.49 0.37 35  30-40 0.30 0.13 0.18 23  40-50 0.07 0.07 0.07 28  50-60 0.25 0.21 0.23 39  60-70 0.17 0.22 0.19 27  70-80 0.00 0.00 0.00 4  <20 0.62 0.51 0.56 35  accuracy 0.28 191  macro avg 0.25 0.23 0.23 191 weighted avg 0.29 0.28 0.28 191,Per-fold scores: ROC-AUC (weighted OvO): 0.588 +/- 0.061 (in 3 folds) ROC-AUC (macro OvO): 0.583 +/- 0.058 (in 3 folds) au-PRC (weighted OvO): 0.643 +/- 0.057 (in 3 folds) au-PRC (macro OvO): 0.636 +/- 0.051 (in 3 folds) Accuracy: 0.258 +/- 0.123 (in 3 folds) MCC: 0.145 +/- 0.146 (in 3 folds) Global scores: Accuracy: 0.251 MCC: 0.094 Global classification report:  precision recall f1-score support  20-30 0.29 0.54 0.38 35  30-40 0.00 0.00 0.00 23  40-50 0.27 0.21 0.24 28  50-60 0.12 0.13 0.13 39  60-70 0.07 0.07 0.07 27  70-80 0.00 0.00 0.00 4  <20 0.52 0.46 0.48 35  accuracy 0.25 191  macro avg 0.18 0.20 0.19 191 weighted avg 0.22 0.25 0.23 191
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.684 +/- 0.097 (in 3 folds),0.684 +/- 0.097 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.691 +/- 0.071 (in 3 folds),0.274 +/- 0.255 (in 3 folds),0.689,0.247,132.0,0.0,132.0,0.0,False
xgboost,0.673 +/- 0.077 (in 3 folds),0.673 +/- 0.077 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.685 +/- 0.074 (in 3 folds),0.276 +/- 0.162 (in 3 folds),0.682,0.263,132.0,0.0,132.0,0.0,False
lasso_multiclass,0.621 +/- 0.062 (in 3 folds),0.621 +/- 0.062 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.576 +/- 0.097 (in 3 folds),0.137 +/- 0.193 (in 3 folds),0.576,0.115,132.0,0.0,132.0,0.0,False
linearsvm_ovr,0.593 +/- 0.059 (in 3 folds),0.593 +/- 0.059 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.547 +/- 0.061 (in 3 folds),0.023 +/- 0.123 (in 3 folds),0.545,0.01,132.0,0.0,132.0,0.0,False
elasticnet_cv,0.539 +/- 0.080 (in 3 folds),0.539 +/- 0.080 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.654 +/- 0.077 (in 3 folds),0.037 +/- 0.142 (in 3 folds),0.652,0.025,132.0,0.0,132.0,0.0,False
lasso_cv,0.532 +/- 0.084 (in 3 folds),0.532 +/- 0.084 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.632 +/- 0.099 (in 3 folds),-0.001 +/- 0.085 (in 3 folds),0.629,0.012,132.0,0.0,132.0,0.0,False
dummy_stratified,0.511 +/- 0.050 (in 3 folds),0.511 +/- 0.050 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.520 +/- 0.085 (in 3 folds),0.025 +/- 0.101 (in 3 folds),0.523,0.007,132.0,0.0,132.0,0.0,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132.0,0.0,132.0,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132.0,0.0,132.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.684 +/- 0.097 (in 3 folds),0.684 +/- 0.097 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.793 +/- 0.104 (in 3 folds),0.691 +/- 0.071 (in 3 folds),0.274 +/- 0.255 (in 3 folds),0.689,0.247,132,0,132,0.0,False
xgboost,0.673 +/- 0.077 (in 3 folds),0.673 +/- 0.077 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.802 +/- 0.073 (in 3 folds),0.685 +/- 0.074 (in 3 folds),0.276 +/- 0.162 (in 3 folds),0.682,0.263,132,0,132,0.0,False
lasso_multiclass,0.621 +/- 0.062 (in 3 folds),0.621 +/- 0.062 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.765 +/- 0.100 (in 3 folds),0.576 +/- 0.097 (in 3 folds),0.137 +/- 0.193 (in 3 folds),0.576,0.115,132,0,132,0.0,False
linearsvm_ovr,0.593 +/- 0.059 (in 3 folds),0.593 +/- 0.059 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.768 +/- 0.090 (in 3 folds),0.547 +/- 0.061 (in 3 folds),0.023 +/- 0.123 (in 3 folds),0.545,0.01,132,0,132,0.0,False
elasticnet_cv,0.539 +/- 0.080 (in 3 folds),0.539 +/- 0.080 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.706 +/- 0.070 (in 3 folds),0.654 +/- 0.077 (in 3 folds),0.037 +/- 0.142 (in 3 folds),0.652,0.025,132,0,132,0.0,False
lasso_cv,0.532 +/- 0.084 (in 3 folds),0.532 +/- 0.084 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.700 +/- 0.075 (in 3 folds),0.632 +/- 0.099 (in 3 folds),-0.001 +/- 0.085 (in 3 folds),0.629,0.012,132,0,132,0.0,False
dummy_stratified,0.511 +/- 0.050 (in 3 folds),0.511 +/- 0.050 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.668 +/- 0.052 (in 3 folds),0.520 +/- 0.085 (in 3 folds),0.025 +/- 0.101 (in 3 folds),0.523,0.007,132,0,132,0.0,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.661 +/- 0.061 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.659,0.0,132,0,132,0.0,True


rf_multiclass,xgboost,lasso_multiclass,linearsvm_ovr
Per-fold scores: ROC-AUC (weighted OvO): 0.684 +/- 0.097 (in 3 folds) ROC-AUC (macro OvO): 0.684 +/- 0.097 (in 3 folds) au-PRC (weighted OvO): 0.793 +/- 0.104 (in 3 folds) au-PRC (macro OvO): 0.793 +/- 0.104 (in 3 folds) Accuracy: 0.691 +/- 0.071 (in 3 folds) MCC: 0.274 +/- 0.255 (in 3 folds) Global scores: Accuracy: 0.689 MCC: 0.247 Global classification report:  precision recall f1-score support  50+ 0.58 0.33 0.42 45  under 50 0.72 0.87 0.79 87  accuracy 0.69 132  macro avg 0.65 0.60 0.61 132 weighted avg 0.67 0.69 0.66 132,Per-fold scores: ROC-AUC (weighted OvO): 0.673 +/- 0.077 (in 3 folds) ROC-AUC (macro OvO): 0.673 +/- 0.077 (in 3 folds) au-PRC (weighted OvO): 0.802 +/- 0.073 (in 3 folds) au-PRC (macro OvO): 0.802 +/- 0.073 (in 3 folds) Accuracy: 0.685 +/- 0.074 (in 3 folds) MCC: 0.276 +/- 0.162 (in 3 folds) Global scores: Accuracy: 0.682 MCC: 0.263 Global classification report:  precision recall f1-score support  50+ 0.54 0.44 0.49 45  under 50 0.74 0.80 0.77 87  accuracy 0.68 132  macro avg 0.64 0.62 0.63 132 weighted avg 0.67 0.68 0.67 132,Per-fold scores: ROC-AUC (weighted OvO): 0.621 +/- 0.062 (in 3 folds) ROC-AUC (macro OvO): 0.621 +/- 0.062 (in 3 folds) au-PRC (weighted OvO): 0.765 +/- 0.100 (in 3 folds) au-PRC (macro OvO): 0.765 +/- 0.100 (in 3 folds) Accuracy: 0.576 +/- 0.097 (in 3 folds) MCC: 0.137 +/- 0.193 (in 3 folds) Global scores: Accuracy: 0.576 MCC: 0.115 Global classification report:  precision recall f1-score support  50+ 0.40 0.51 0.45 45  under 50 0.71 0.61 0.65 87  accuracy 0.58 132  macro avg 0.56 0.56 0.55 132 weighted avg 0.60 0.58 0.59 132,Per-fold scores: ROC-AUC (weighted OvO): 0.593 +/- 0.059 (in 3 folds) ROC-AUC (macro OvO): 0.593 +/- 0.059 (in 3 folds) au-PRC (weighted OvO): 0.768 +/- 0.090 (in 3 folds) au-PRC (macro OvO): 0.768 +/- 0.090 (in 3 folds) Accuracy: 0.547 +/- 0.061 (in 3 folds) MCC: 0.023 +/- 0.123 (in 3 folds) Global scores: Accuracy: 0.545 MCC: 0.010 Global classification report:  precision recall f1-score support  50+ 0.35 0.38 0.36 45  under 50 0.66 0.63 0.65 87  accuracy 0.55 132  macro avg 0.50 0.50 0.50 132 weighted avg 0.56 0.55 0.55 132
,,,
,,,
,,,


elasticnet_cv,lasso_cv,dummy_stratified,ridge_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.539 +/- 0.080 (in 3 folds) ROC-AUC (macro OvO): 0.539 +/- 0.080 (in 3 folds) au-PRC (weighted OvO): 0.706 +/- 0.070 (in 3 folds) au-PRC (macro OvO): 0.706 +/- 0.070 (in 3 folds) Accuracy: 0.654 +/- 0.077 (in 3 folds) MCC: 0.037 +/- 0.142 (in 3 folds) Global scores: Accuracy: 0.652 MCC: 0.025 Global classification report:  precision recall f1-score support  50+ 0.40 0.04 0.08 45  under 50 0.66 0.97 0.79 87  accuracy 0.65 132  macro avg 0.53 0.50 0.43 132 weighted avg 0.57 0.65 0.54 132,Per-fold scores: ROC-AUC (weighted OvO): 0.532 +/- 0.084 (in 3 folds) ROC-AUC (macro OvO): 0.532 +/- 0.084 (in 3 folds) au-PRC (weighted OvO): 0.700 +/- 0.075 (in 3 folds) au-PRC (macro OvO): 0.700 +/- 0.075 (in 3 folds) Accuracy: 0.632 +/- 0.099 (in 3 folds) MCC: -0.001 +/- 0.085 (in 3 folds) Global scores: Accuracy: 0.629 MCC: 0.012 Global classification report:  precision recall f1-score support  50+ 0.36 0.11 0.17 45  under 50 0.66 0.90 0.76 87  accuracy 0.63 132  macro avg 0.51 0.50 0.47 132 weighted avg 0.56 0.63 0.56 132,Per-fold scores: ROC-AUC (weighted OvO): 0.511 +/- 0.050 (in 3 folds) ROC-AUC (macro OvO): 0.511 +/- 0.050 (in 3 folds) au-PRC (weighted OvO): 0.668 +/- 0.052 (in 3 folds) au-PRC (macro OvO): 0.668 +/- 0.052 (in 3 folds) Accuracy: 0.520 +/- 0.085 (in 3 folds) MCC: 0.025 +/- 0.101 (in 3 folds) Global scores: Accuracy: 0.523 MCC: 0.007 Global classification report:  precision recall f1-score support  50+ 0.34 0.44 0.39 45  under 50 0.66 0.56 0.61 87  accuracy 0.52 132  macro avg 0.50 0.50 0.50 132 weighted avg 0.55 0.52 0.53 132,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.661 +/- 0.061 (in 3 folds) au-PRC (macro OvO): 0.661 +/- 0.061 (in 3 folds) Accuracy: 0.661 +/- 0.061 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.659 MCC: 0.000 Global classification report:  precision recall f1-score support  50+ 0.00 0.00 0.00 45  under 50 0.66 1.00 0.79 87  accuracy 0.66 132  macro avg 0.33 0.50 0.40 132 weighted avg 0.43 0.66 0.52 132
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.661 +/- 0.061 (in 3 folds) au-PRC (macro OvO): 0.661 +/- 0.061 (in 3 folds) Accuracy: 0.661 +/- 0.061 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.659 MCC: 0.000 Global classification report:  precision recall f1-score support  50+ 0.00 0.00 0.00 45  under 50 0.66 1.00 0.79 87  accuracy 0.66 132  macro avg 0.33 0.50 0.40 132 weighted avg 0.43 0.66 0.52 132


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.719 +/- 0.026 (in 3 folds),0.719 +/- 0.026 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.669 +/- 0.028 (in 3 folds),0.271 +/- 0.091 (in 3 folds),0.67,0.25,191.0,0.0,191.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.719 +/- 0.026 (in 3 folds),0.719 +/- 0.026 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.674 +/- 0.033 (in 3 folds),0.306 +/- 0.041 (in 3 folds),0.675,0.301,191.0,0.0,191.0,0.0,False
lasso_multiclass,0.679 +/- 0.042 (in 3 folds),0.679 +/- 0.042 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.675 +/- 0.006 (in 3 folds),0.341 +/- 0.032 (in 3 folds),0.675,0.333,191.0,0.0,191.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.679 +/- 0.042 (in 3 folds),0.679 +/- 0.042 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.571 +/- 0.064 (in 3 folds),0.165 +/- 0.042 (in 3 folds),0.571,0.178,191.0,0.0,191.0,0.0,False
xgboost,0.676 +/- 0.007 (in 3 folds),0.676 +/- 0.007 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.654 +/- 0.015 (in 3 folds),0.258 +/- 0.006 (in 3 folds),0.654,0.256,191.0,0.0,191.0,0.0,False
xgboost.decision_thresholds_tuned,0.676 +/- 0.007 (in 3 folds),0.676 +/- 0.007 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.636 +/- 0.058 (in 3 folds),0.292 +/- 0.036 (in 3 folds),0.639,0.298,191.0,0.0,191.0,0.0,False
linearsvm_ovr,0.675 +/- 0.019 (in 3 folds),0.675 +/- 0.019 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.617 +/- 0.044 (in 3 folds),0.208 +/- 0.063 (in 3 folds),0.618,0.194,191.0,0.0,191.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.675 +/- 0.019 (in 3 folds),0.675 +/- 0.019 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.562 +/- 0.087 (in 3 folds),0.235 +/- 0.100 (in 3 folds),0.56,0.175,191.0,0.0,191.0,0.0,False
elasticnet_cv,0.599 +/- 0.086 (in 3 folds),0.599 +/- 0.086 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.620 +/- 0.065 (in 3 folds),-0.036 +/- 0.080 (in 3 folds),0.623,-0.035,191.0,0.0,191.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.599 +/- 0.086 (in 3 folds),0.599 +/- 0.086 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.556 +/- 0.132 (in 3 folds),0.100 +/- 0.086 (in 3 folds),0.555,0.107,191.0,0.0,191.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.719 +/- 0.026 (in 3 folds),0.719 +/- 0.026 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.669 +/- 0.028 (in 3 folds),0.271 +/- 0.091 (in 3 folds),0.67,0.25,191,0,191,0.0,False
rf_multiclass.decision_thresholds_tuned,0.719 +/- 0.026 (in 3 folds),0.719 +/- 0.026 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.825 +/- 0.072 (in 3 folds),0.674 +/- 0.033 (in 3 folds),0.306 +/- 0.041 (in 3 folds),0.675,0.301,191,0,191,0.0,False
lasso_multiclass,0.679 +/- 0.042 (in 3 folds),0.679 +/- 0.042 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.675 +/- 0.006 (in 3 folds),0.341 +/- 0.032 (in 3 folds),0.675,0.333,191,0,191,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.679 +/- 0.042 (in 3 folds),0.679 +/- 0.042 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.810 +/- 0.074 (in 3 folds),0.571 +/- 0.064 (in 3 folds),0.165 +/- 0.042 (in 3 folds),0.571,0.178,191,0,191,0.0,False
xgboost,0.676 +/- 0.007 (in 3 folds),0.676 +/- 0.007 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.654 +/- 0.015 (in 3 folds),0.258 +/- 0.006 (in 3 folds),0.654,0.256,191,0,191,0.0,False
xgboost.decision_thresholds_tuned,0.676 +/- 0.007 (in 3 folds),0.676 +/- 0.007 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.804 +/- 0.039 (in 3 folds),0.636 +/- 0.058 (in 3 folds),0.292 +/- 0.036 (in 3 folds),0.639,0.298,191,0,191,0.0,False
linearsvm_ovr,0.675 +/- 0.019 (in 3 folds),0.675 +/- 0.019 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.617 +/- 0.044 (in 3 folds),0.208 +/- 0.063 (in 3 folds),0.618,0.194,191,0,191,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.675 +/- 0.019 (in 3 folds),0.675 +/- 0.019 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.811 +/- 0.045 (in 3 folds),0.562 +/- 0.087 (in 3 folds),0.235 +/- 0.100 (in 3 folds),0.56,0.175,191,0,191,0.0,False
elasticnet_cv,0.599 +/- 0.086 (in 3 folds),0.599 +/- 0.086 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.620 +/- 0.065 (in 3 folds),-0.036 +/- 0.080 (in 3 folds),0.623,-0.035,191,0,191,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.599 +/- 0.086 (in 3 folds),0.599 +/- 0.086 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.729 +/- 0.050 (in 3 folds),0.556 +/- 0.132 (in 3 folds),0.100 +/- 0.086 (in 3 folds),0.555,0.107,191,0,191,0.0,False


rf_multiclass,rf_multiclass.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.719 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.719 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.825 +/- 0.072 (in 3 folds) au-PRC (macro OvO): 0.825 +/- 0.072 (in 3 folds) Accuracy: 0.669 +/- 0.028 (in 3 folds) MCC: 0.271 +/- 0.091 (in 3 folds) Global scores: Accuracy: 0.670 MCC: 0.250 Global classification report:  precision recall f1-score support  50+ 0.57 0.40 0.47 70  under 50 0.70 0.83 0.76 121  accuracy 0.67 191  macro avg 0.64 0.61 0.62 191 weighted avg 0.66 0.67 0.65 191,Per-fold scores: ROC-AUC (weighted OvO): 0.719 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.719 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.825 +/- 0.072 (in 3 folds) au-PRC (macro OvO): 0.825 +/- 0.072 (in 3 folds) Accuracy: 0.674 +/- 0.033 (in 3 folds) MCC: 0.306 +/- 0.041 (in 3 folds) Global scores: Accuracy: 0.675 MCC: 0.301 Global classification report:  precision recall f1-score support  50+ 0.56 0.56 0.56 70  under 50 0.74 0.74 0.74 121  accuracy 0.68 191  macro avg 0.65 0.65 0.65 191 weighted avg 0.68 0.68 0.68 191,Per-fold scores: ROC-AUC (weighted OvO): 0.679 +/- 0.042 (in 3 folds) ROC-AUC (macro OvO): 0.679 +/- 0.042 (in 3 folds) au-PRC (weighted OvO): 0.810 +/- 0.074 (in 3 folds) au-PRC (macro OvO): 0.810 +/- 0.074 (in 3 folds) Accuracy: 0.675 +/- 0.006 (in 3 folds) MCC: 0.341 +/- 0.032 (in 3 folds) Global scores: Accuracy: 0.675 MCC: 0.333 Global classification report:  precision recall f1-score support  50+ 0.55 0.66 0.60 70  under 50 0.78 0.69 0.73 121  accuracy 0.68 191  macro avg 0.66 0.67 0.66 191 weighted avg 0.69 0.68 0.68 191,Per-fold scores: ROC-AUC (weighted OvO): 0.679 +/- 0.042 (in 3 folds) ROC-AUC (macro OvO): 0.679 +/- 0.042 (in 3 folds) au-PRC (weighted OvO): 0.810 +/- 0.074 (in 3 folds) au-PRC (macro OvO): 0.810 +/- 0.074 (in 3 folds) Accuracy: 0.571 +/- 0.064 (in 3 folds) MCC: 0.165 +/- 0.042 (in 3 folds) Global scores: Accuracy: 0.571 MCC: 0.178 Global classification report:  precision recall f1-score support  50+ 0.44 0.67 0.53 70  under 50 0.73 0.51 0.60 121  accuracy 0.57 191  macro avg 0.59 0.59 0.57 191 weighted avg 0.62 0.57 0.58 191
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.676 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.804 +/- 0.039 (in 3 folds) au-PRC (macro OvO): 0.804 +/- 0.039 (in 3 folds) Accuracy: 0.654 +/- 0.015 (in 3 folds) MCC: 0.258 +/- 0.006 (in 3 folds) Global scores: Accuracy: 0.654 MCC: 0.256 Global classification report:  precision recall f1-score support  50+ 0.53 0.53 0.53 70  under 50 0.73 0.73 0.73 121  accuracy 0.65 191  macro avg 0.63 0.63 0.63 191 weighted avg 0.65 0.65 0.65 191,Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.676 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.804 +/- 0.039 (in 3 folds) au-PRC (macro OvO): 0.804 +/- 0.039 (in 3 folds) Accuracy: 0.636 +/- 0.058 (in 3 folds) MCC: 0.292 +/- 0.036 (in 3 folds) Global scores: Accuracy: 0.639 MCC: 0.298 Global classification report:  precision recall f1-score support  50+ 0.51 0.71 0.59 70  under 50 0.78 0.60 0.68 121  accuracy 0.64 191  macro avg 0.64 0.65 0.63 191 weighted avg 0.68 0.64 0.65 191,Per-fold scores: ROC-AUC (weighted OvO): 0.675 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.675 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.811 +/- 0.045 (in 3 folds) au-PRC (macro OvO): 0.811 +/- 0.045 (in 3 folds) Accuracy: 0.617 +/- 0.044 (in 3 folds) MCC: 0.208 +/- 0.063 (in 3 folds) Global scores: Accuracy: 0.618 MCC: 0.194 Global classification report:  precision recall f1-score support  50+ 0.48 0.53 0.50 70  under 50 0.71 0.67 0.69 121  accuracy 0.62 191  macro avg 0.60 0.60 0.60 191 weighted avg 0.63 0.62 0.62 191,Per-fold scores: ROC-AUC (weighted OvO): 0.675 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.675 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.811 +/- 0.045 (in 3 folds) au-PRC (macro OvO): 0.811 +/- 0.045 (in 3 folds) Accuracy: 0.562 +/- 0.087 (in 3 folds) MCC: 0.235 +/- 0.100 (in 3 folds) Global scores: Accuracy: 0.560 MCC: 0.175 Global classification report:  precision recall f1-score support  50+ 0.44 0.70 0.54 70  under 50 0.73 0.48 0.58 121  accuracy 0.56 191  macro avg 0.59 0.59 0.56 191 weighted avg 0.63 0.56 0.56 191
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.599 +/- 0.086 (in 3 folds) ROC-AUC (macro OvO): 0.599 +/- 0.086 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.050 (in 3 folds) au-PRC (macro OvO): 0.729 +/- 0.050 (in 3 folds) Accuracy: 0.620 +/- 0.065 (in 3 folds) MCC: -0.036 +/- 0.080 (in 3 folds) Global scores: Accuracy: 0.623 MCC: -0.035 Global classification report:  precision recall f1-score support  50+ 0.25 0.01 0.03 70  under 50 0.63 0.98 0.77 121  accuracy 0.62 191  macro avg 0.44 0.49 0.40 191 weighted avg 0.49 0.62 0.50 191,Per-fold scores: ROC-AUC (weighted OvO): 0.599 +/- 0.086 (in 3 folds) ROC-AUC (macro OvO): 0.599 +/- 0.086 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.050 (in 3 folds) au-PRC (macro OvO): 0.729 +/- 0.050 (in 3 folds) Accuracy: 0.556 +/- 0.132 (in 3 folds) MCC: 0.100 +/- 0.086 (in 3 folds) Global scores: Accuracy: 0.555 MCC: 0.107 Global classification report:  precision recall f1-score support  50+ 0.42 0.56 0.48 70  under 50 0.68 0.55 0.61 121  accuracy 0.55 191  macro avg 0.55 0.56 0.55 191 weighted avg 0.59 0.55 0.56 191,Per-fold scores: ROC-AUC (weighted OvO): 0.595 +/- 0.083 (in 3 folds) ROC-AUC (macro OvO): 0.595 +/- 0.083 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.048 (in 3 folds) au-PRC (macro OvO): 0.729 +/- 0.048 (in 3 folds) Accuracy: 0.604 +/- 0.083 (in 3 folds) MCC: -0.012 +/- 0.061 (in 3 folds) Global scores: Accuracy: 0.607 MCC: 0.001 Global classification report:  precision recall f1-score support  50+ 0.37 0.10 0.16 70  under 50 0.63 0.90 0.74 121  accuracy 0.61 191  macro avg 0.50 0.50 0.45 191 weighted avg 0.54 0.61 0.53 191,Per-fold scores: ROC-AUC (weighted OvO): 0.595 +/- 0.083 (in 3 folds) ROC-AUC (macro OvO): 0.595 +/- 0.083 (in 3 folds) au-PRC (weighted OvO): 0.729 +/- 0.048 (in 3 folds) au-PRC (macro OvO): 0.729 +/- 0.048 (in 3 folds) Accuracy: 0.615 +/- 0.065 (in 3 folds) MCC: 0.013 +/- 0.105 (in 3 folds) Global scores: Accuracy: 0.618 MCC: 0.100 Global classification report:  precision recall f1-score support  50+ 0.46 0.26 0.33 70  under 50 0.66 0.83 0.73 121  accuracy 0.62 191  macro avg 0.56 0.54 0.53 191 weighted avg 0.59 0.62 0.59 191
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.630 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.630 +/- 0.069 (in 3 folds) Accuracy: 0.630 +/- 0.069 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.634 MCC: 0.000 Global classification report:  precision recall f1-score support  50+ 0.00 0.00 0.00 70  under 50 0.63 1.00 0.78 121  accuracy 0.63 191  macro avg 0.32 0.50 0.39 191 weighted avg 0.40 0.63 0.49 191,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.630 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.630 +/- 0.069 (in 3 folds) Accuracy: 0.596 +/- 0.128 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.602 MCC: 0.112 Global classification report:  precision recall f1-score support  50+ 0.45 0.37 0.41 70  under 50 0.67 0.74 0.70 121  accuracy 0.60 191  macro avg 0.56 0.55 0.55 191 weighted avg 0.59 0.60 0.59 191
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.942 +/- 0.060 (in 3 folds),0.942 +/- 0.060 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.856 +/- 0.053 (in 3 folds),0.142 +/- 0.246 (in 3 folds),0.856,0.207,132.0,0.0,132.0,0.0,False
lasso_multiclass,0.938 +/- 0.055 (in 3 folds),0.938 +/- 0.055 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.901 +/- 0.020 (in 3 folds),0.647 +/- 0.085 (in 3 folds),0.902,0.625,132.0,0.0,132.0,0.0,False
linearsvm_ovr,0.928 +/- 0.078 (in 3 folds),0.928 +/- 0.078 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.909 +/- 0.022 (in 3 folds),0.654 +/- 0.065 (in 3 folds),0.909,0.632,132.0,0.0,132.0,0.0,False
rf_multiclass,0.890 +/- 0.065 (in 3 folds),0.890 +/- 0.065 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.865 +/- 0.053 (in 3 folds),0.182 +/- 0.316 (in 3 folds),0.864,0.294,132.0,0.0,132.0,0.0,False
elasticnet_cv,0.869 +/- 0.113 (in 3 folds),0.869 +/- 0.113 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.871 +/- 0.077 (in 3 folds),0.252 +/- 0.436 (in 3 folds),0.871,0.361,132.0,0.0,132.0,0.0,False
xgboost,0.841 +/- 0.167 (in 3 folds),0.841 +/- 0.167 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.889 +/- 0.055 (in 3 folds),0.391 +/- 0.425 (in 3 folds),0.886,0.466,132.0,0.0,132.0,0.0,False
lasso_cv,0.816 +/- 0.100 (in 3 folds),0.816 +/- 0.100 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132.0,0.0,132.0,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132.0,0.0,132.0,0.0,True
dummy_stratified,0.452 +/- 0.034 (in 3 folds),0.452 +/- 0.034 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.704 +/- 0.026 (in 3 folds),-0.087 +/- 0.056 (in 3 folds),0.705,-0.083,132.0,0.0,132.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.942 +/- 0.060 (in 3 folds),0.942 +/- 0.060 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.815 +/- 0.164 (in 3 folds),0.856 +/- 0.053 (in 3 folds),0.142 +/- 0.246 (in 3 folds),0.856,0.207,132,0,132,0.0,False
lasso_multiclass,0.938 +/- 0.055 (in 3 folds),0.938 +/- 0.055 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.825 +/- 0.175 (in 3 folds),0.901 +/- 0.020 (in 3 folds),0.647 +/- 0.085 (in 3 folds),0.902,0.625,132,0,132,0.0,False
linearsvm_ovr,0.928 +/- 0.078 (in 3 folds),0.928 +/- 0.078 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.846 +/- 0.110 (in 3 folds),0.909 +/- 0.022 (in 3 folds),0.654 +/- 0.065 (in 3 folds),0.909,0.632,132,0,132,0.0,False
rf_multiclass,0.890 +/- 0.065 (in 3 folds),0.890 +/- 0.065 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.629 +/- 0.207 (in 3 folds),0.865 +/- 0.053 (in 3 folds),0.182 +/- 0.316 (in 3 folds),0.864,0.294,132,0,132,0.0,False
elasticnet_cv,0.869 +/- 0.113 (in 3 folds),0.869 +/- 0.113 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.693 +/- 0.171 (in 3 folds),0.871 +/- 0.077 (in 3 folds),0.252 +/- 0.436 (in 3 folds),0.871,0.361,132,0,132,0.0,False
xgboost,0.841 +/- 0.167 (in 3 folds),0.841 +/- 0.167 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.644 +/- 0.261 (in 3 folds),0.889 +/- 0.055 (in 3 folds),0.391 +/- 0.425 (in 3 folds),0.886,0.466,132,0,132,0.0,False
lasso_cv,0.816 +/- 0.100 (in 3 folds),0.816 +/- 0.100 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.568 +/- 0.217 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132,0,132,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.151 +/- 0.042 (in 3 folds),0.849 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.848,0.0,132,0,132,0.0,True
dummy_stratified,0.452 +/- 0.034 (in 3 folds),0.452 +/- 0.034 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.147 +/- 0.040 (in 3 folds),0.704 +/- 0.026 (in 3 folds),-0.087 +/- 0.056 (in 3 folds),0.705,-0.083,132,0,132,0.0,False


ridge_cv,lasso_multiclass,linearsvm_ovr,rf_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.060 (in 3 folds) ROC-AUC (macro OvO): 0.942 +/- 0.060 (in 3 folds) au-PRC (weighted OvO): 0.815 +/- 0.164 (in 3 folds) au-PRC (macro OvO): 0.815 +/- 0.164 (in 3 folds) Accuracy: 0.856 +/- 0.053 (in 3 folds) MCC: 0.142 +/- 0.246 (in 3 folds) Global scores: Accuracy: 0.856 MCC: 0.207 Global classification report:  precision recall f1-score support  18+ 0.85 1.00 0.92 112  under 18 1.00 0.05 0.10 20  accuracy 0.86 132  macro avg 0.93 0.53 0.51 132 weighted avg 0.88 0.86 0.80 132,Per-fold scores: ROC-AUC (weighted OvO): 0.938 +/- 0.055 (in 3 folds) ROC-AUC (macro OvO): 0.938 +/- 0.055 (in 3 folds) au-PRC (weighted OvO): 0.825 +/- 0.175 (in 3 folds) au-PRC (macro OvO): 0.825 +/- 0.175 (in 3 folds) Accuracy: 0.901 +/- 0.020 (in 3 folds) MCC: 0.647 +/- 0.085 (in 3 folds) Global scores: Accuracy: 0.902 MCC: 0.625 Global classification report:  precision recall f1-score support  18+ 0.95 0.94 0.94 112  under 18 0.67 0.70 0.68 20  accuracy 0.90 132  macro avg 0.81 0.82 0.81 132 weighted avg 0.90 0.90 0.90 132,Per-fold scores: ROC-AUC (weighted OvO): 0.928 +/- 0.078 (in 3 folds) ROC-AUC (macro OvO): 0.928 +/- 0.078 (in 3 folds) au-PRC (weighted OvO): 0.846 +/- 0.110 (in 3 folds) au-PRC (macro OvO): 0.846 +/- 0.110 (in 3 folds) Accuracy: 0.909 +/- 0.022 (in 3 folds) MCC: 0.654 +/- 0.065 (in 3 folds) Global scores: Accuracy: 0.909 MCC: 0.632 Global classification report:  precision recall f1-score support  18+ 0.94 0.96 0.95 112  under 18 0.72 0.65 0.68 20  accuracy 0.91 132  macro avg 0.83 0.80 0.82 132 weighted avg 0.91 0.91 0.91 132,Per-fold scores: ROC-AUC (weighted OvO): 0.890 +/- 0.065 (in 3 folds) ROC-AUC (macro OvO): 0.890 +/- 0.065 (in 3 folds) au-PRC (weighted OvO): 0.629 +/- 0.207 (in 3 folds) au-PRC (macro OvO): 0.629 +/- 0.207 (in 3 folds) Accuracy: 0.865 +/- 0.053 (in 3 folds) MCC: 0.182 +/- 0.316 (in 3 folds) Global scores: Accuracy: 0.864 MCC: 0.294 Global classification report:  precision recall f1-score support  18+ 0.86 1.00 0.93 112  under 18 1.00 0.10 0.18 20  accuracy 0.86 132  macro avg 0.93 0.55 0.55 132 weighted avg 0.88 0.86 0.81 132
,,,
,,,
,,,


elasticnet_cv,xgboost,lasso_cv,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.869 +/- 0.113 (in 3 folds) ROC-AUC (macro OvO): 0.869 +/- 0.113 (in 3 folds) au-PRC (weighted OvO): 0.693 +/- 0.171 (in 3 folds) au-PRC (macro OvO): 0.693 +/- 0.171 (in 3 folds) Accuracy: 0.871 +/- 0.077 (in 3 folds) MCC: 0.252 +/- 0.436 (in 3 folds) Global scores: Accuracy: 0.871 MCC: 0.361 Global classification report:  precision recall f1-score support  18+ 0.87 1.00 0.93 112  under 18 1.00 0.15 0.26 20  accuracy 0.87 132  macro avg 0.93 0.57 0.60 132 weighted avg 0.89 0.87 0.83 132,Per-fold scores: ROC-AUC (weighted OvO): 0.841 +/- 0.167 (in 3 folds) ROC-AUC (macro OvO): 0.841 +/- 0.167 (in 3 folds) au-PRC (weighted OvO): 0.644 +/- 0.261 (in 3 folds) au-PRC (macro OvO): 0.644 +/- 0.261 (in 3 folds) Accuracy: 0.889 +/- 0.055 (in 3 folds) MCC: 0.391 +/- 0.425 (in 3 folds) Global scores: Accuracy: 0.886 MCC: 0.466 Global classification report:  precision recall f1-score support  18+ 0.89 0.99 0.94 112  under 18 0.86 0.30 0.44 20  accuracy 0.89 132  macro avg 0.87 0.65 0.69 132 weighted avg 0.88 0.89 0.86 132,Per-fold scores: ROC-AUC (weighted OvO): 0.816 +/- 0.100 (in 3 folds) ROC-AUC (macro OvO): 0.816 +/- 0.100 (in 3 folds) au-PRC (weighted OvO): 0.568 +/- 0.217 (in 3 folds) au-PRC (macro OvO): 0.568 +/- 0.217 (in 3 folds) Accuracy: 0.849 +/- 0.042 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.848 MCC: 0.000 Global classification report:  precision recall f1-score support  18+ 0.85 1.00 0.92 112  under 18 0.00 0.00 0.00 20  accuracy 0.85 132  macro avg 0.42 0.50 0.46 132 weighted avg 0.72 0.85 0.78 132,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.151 +/- 0.042 (in 3 folds) au-PRC (macro OvO): 0.151 +/- 0.042 (in 3 folds) Accuracy: 0.849 +/- 0.042 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.848 MCC: 0.000 Global classification report:  precision recall f1-score support  18+ 0.85 1.00 0.92 112  under 18 0.00 0.00 0.00 20  accuracy 0.85 132  macro avg 0.42 0.50 0.46 132 weighted avg 0.72 0.85 0.78 132
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.452 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.452 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.147 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.147 +/- 0.040 (in 3 folds) Accuracy: 0.704 +/- 0.026 (in 3 folds) MCC: -0.087 +/- 0.056 (in 3 folds) Global scores: Accuracy: 0.705 MCC: -0.083 Global classification report:  precision recall f1-score support  18+ 0.83 0.81 0.82 112  under 18 0.09 0.10 0.09 20  accuracy 0.70 132  macro avg 0.46 0.46 0.46 132 weighted avg 0.72 0.70 0.71 132


## Apply train-smaller model -- Test set performance - With and without tuning on validation set



---

# GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
dummy_stratified,0.518 +/- 0.048 (in 3 folds),0.518 +/- 0.048 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.538 +/- 0.063 (in 3 folds),0.037 +/- 0.096 (in 3 folds),0.538,0.063,132.0,0.0,132.0,0.0,False
lasso_cv,0.516 +/- 0.123 (in 3 folds),0.516 +/- 0.123 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.441 +/- 0.063 (in 3 folds),0.045 +/- 0.084 (in 3 folds),0.439,-0.101,132.0,0.0,132.0,0.0,False
xgboost,0.514 +/- 0.139 (in 3 folds),0.514 +/- 0.139 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.513 +/- 0.096 (in 3 folds),0.058 +/- 0.179 (in 3 folds),0.515,0.049,132.0,0.0,132.0,0.0,False
rf_multiclass,0.507 +/- 0.142 (in 3 folds),0.507 +/- 0.142 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.468 +/- 0.060 (in 3 folds),0.007 +/- 0.157 (in 3 folds),0.47,-0.043,132.0,0.0,132.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.397 +/- 0.069 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.394,-0.199,132.0,0.0,132.0,0.0,False
ridge_cv,0.480 +/- 0.034 (in 3 folds),0.480 +/- 0.034 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.418 +/- 0.039 (in 3 folds),-0.015 +/- 0.026 (in 3 folds),0.417,-0.157,132.0,0.0,132.0,0.0,False
lasso_multiclass,0.475 +/- 0.125 (in 3 folds),0.475 +/- 0.125 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.491 +/- 0.114 (in 3 folds),0.002 +/- 0.207 (in 3 folds),0.492,-0.001,132.0,0.0,132.0,0.0,False
elasticnet_cv,0.458 +/- 0.040 (in 3 folds),0.458 +/- 0.040 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.373 +/- 0.060 (in 3 folds),-0.145 +/- 0.128 (in 3 folds),0.371,-0.247,132.0,0.0,132.0,0.0,False
linearsvm_ovr,0.439 +/- 0.118 (in 3 folds),0.439 +/- 0.118 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.487 +/- 0.083 (in 3 folds),-0.053 +/- 0.215 (in 3 folds),0.485,-0.029,132.0,0.0,132.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
dummy_stratified,0.518 +/- 0.048 (in 3 folds),0.518 +/- 0.048 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.474 +/- 0.120 (in 3 folds),0.538 +/- 0.063 (in 3 folds),0.037 +/- 0.096 (in 3 folds),0.538,0.063,132,0,132,0.0,False
lasso_cv,0.516 +/- 0.123 (in 3 folds),0.516 +/- 0.123 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.502 +/- 0.213 (in 3 folds),0.441 +/- 0.063 (in 3 folds),0.045 +/- 0.084 (in 3 folds),0.439,-0.101,132,0,132,0.0,False
xgboost,0.514 +/- 0.139 (in 3 folds),0.514 +/- 0.139 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.475 +/- 0.175 (in 3 folds),0.513 +/- 0.096 (in 3 folds),0.058 +/- 0.179 (in 3 folds),0.515,0.049,132,0,132,0.0,False
rf_multiclass,0.507 +/- 0.142 (in 3 folds),0.507 +/- 0.142 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.499 +/- 0.203 (in 3 folds),0.468 +/- 0.060 (in 3 folds),0.007 +/- 0.157 (in 3 folds),0.47,-0.043,132,0,132,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.463 +/- 0.137 (in 3 folds),0.397 +/- 0.069 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.394,-0.199,132,0,132,0.0,False
ridge_cv,0.480 +/- 0.034 (in 3 folds),0.480 +/- 0.034 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.454 +/- 0.151 (in 3 folds),0.418 +/- 0.039 (in 3 folds),-0.015 +/- 0.026 (in 3 folds),0.417,-0.157,132,0,132,0.0,False
lasso_multiclass,0.475 +/- 0.125 (in 3 folds),0.475 +/- 0.125 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.496 +/- 0.214 (in 3 folds),0.491 +/- 0.114 (in 3 folds),0.002 +/- 0.207 (in 3 folds),0.492,-0.001,132,0,132,0.0,False
elasticnet_cv,0.458 +/- 0.040 (in 3 folds),0.458 +/- 0.040 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.464 +/- 0.154 (in 3 folds),0.373 +/- 0.060 (in 3 folds),-0.145 +/- 0.128 (in 3 folds),0.371,-0.247,132,0,132,0.0,False
linearsvm_ovr,0.439 +/- 0.118 (in 3 folds),0.439 +/- 0.118 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.465 +/- 0.204 (in 3 folds),0.487 +/- 0.083 (in 3 folds),-0.053 +/- 0.215 (in 3 folds),0.485,-0.029,132,0,132,0.0,False


dummy_stratified,lasso_cv,xgboost,rf_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.518 +/- 0.048 (in 3 folds) ROC-AUC (macro OvO): 0.518 +/- 0.048 (in 3 folds) au-PRC (weighted OvO): 0.474 +/- 0.120 (in 3 folds) au-PRC (macro OvO): 0.474 +/- 0.120 (in 3 folds) Accuracy: 0.538 +/- 0.063 (in 3 folds) MCC: 0.037 +/- 0.096 (in 3 folds) Global scores: Accuracy: 0.538 MCC: 0.063 Global classification report:  precision recall f1-score support  F 0.56 0.62 0.59 71  M 0.50 0.44 0.47 61  accuracy 0.54 132  macro avg 0.53 0.53 0.53 132 weighted avg 0.53 0.54 0.53 132,Per-fold scores: ROC-AUC (weighted OvO): 0.516 +/- 0.123 (in 3 folds) ROC-AUC (macro OvO): 0.516 +/- 0.123 (in 3 folds) au-PRC (weighted OvO): 0.502 +/- 0.213 (in 3 folds) au-PRC (macro OvO): 0.502 +/- 0.213 (in 3 folds) Accuracy: 0.441 +/- 0.063 (in 3 folds) MCC: 0.045 +/- 0.084 (in 3 folds) Global scores: Accuracy: 0.439 MCC: -0.101 Global classification report:  precision recall f1-score support  F 0.47 0.28 0.35 71  M 0.43 0.62 0.51 61  accuracy 0.44 132  macro avg 0.45 0.45 0.43 132 weighted avg 0.45 0.44 0.42 132,Per-fold scores: ROC-AUC (weighted OvO): 0.514 +/- 0.139 (in 3 folds) ROC-AUC (macro OvO): 0.514 +/- 0.139 (in 3 folds) au-PRC (weighted OvO): 0.475 +/- 0.175 (in 3 folds) au-PRC (macro OvO): 0.475 +/- 0.175 (in 3 folds) Accuracy: 0.513 +/- 0.096 (in 3 folds) MCC: 0.058 +/- 0.179 (in 3 folds) Global scores: Accuracy: 0.515 MCC: 0.049 Global classification report:  precision recall f1-score support  F 0.57 0.41 0.48 71  M 0.48 0.64 0.55 61  accuracy 0.52 132  macro avg 0.53 0.52 0.51 132 weighted avg 0.53 0.52 0.51 132,Per-fold scores: ROC-AUC (weighted OvO): 0.507 +/- 0.142 (in 3 folds) ROC-AUC (macro OvO): 0.507 +/- 0.142 (in 3 folds) au-PRC (weighted OvO): 0.499 +/- 0.203 (in 3 folds) au-PRC (macro OvO): 0.499 +/- 0.203 (in 3 folds) Accuracy: 0.468 +/- 0.060 (in 3 folds) MCC: 0.007 +/- 0.157 (in 3 folds) Global scores: Accuracy: 0.470 MCC: -0.043 Global classification report:  precision recall f1-score support  F 0.51 0.35 0.42 71  M 0.45 0.61 0.51 61  accuracy 0.47 132  macro avg 0.48 0.48 0.47 132 weighted avg 0.48 0.47 0.46 132
,,,
,,,
,,,


dummy_most_frequent,ridge_cv,lasso_multiclass,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.463 +/- 0.137 (in 3 folds) au-PRC (macro OvO): 0.463 +/- 0.137 (in 3 folds) Accuracy: 0.397 +/- 0.069 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.394 MCC: -0.199 Global classification report:  precision recall f1-score support  F 0.40 0.25 0.31 71  M 0.39 0.56 0.46 61  accuracy 0.39 132  macro avg 0.40 0.41 0.38 132 weighted avg 0.40 0.39 0.38 132,Per-fold scores: ROC-AUC (weighted OvO): 0.480 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.480 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.454 +/- 0.151 (in 3 folds) au-PRC (macro OvO): 0.454 +/- 0.151 (in 3 folds) Accuracy: 0.418 +/- 0.039 (in 3 folds) MCC: -0.015 +/- 0.026 (in 3 folds) Global scores: Accuracy: 0.417 MCC: -0.157 Global classification report:  precision recall f1-score support  F 0.45 0.35 0.39 71  M 0.39 0.49 0.44 61  accuracy 0.42 132  macro avg 0.42 0.42 0.42 132 weighted avg 0.42 0.42 0.41 132,Per-fold scores: ROC-AUC (weighted OvO): 0.475 +/- 0.125 (in 3 folds) ROC-AUC (macro OvO): 0.475 +/- 0.125 (in 3 folds) au-PRC (weighted OvO): 0.496 +/- 0.214 (in 3 folds) au-PRC (macro OvO): 0.496 +/- 0.214 (in 3 folds) Accuracy: 0.491 +/- 0.114 (in 3 folds) MCC: 0.002 +/- 0.207 (in 3 folds) Global scores: Accuracy: 0.492 MCC: -0.001 Global classification report:  precision recall f1-score support  F 0.54 0.41 0.46 71  M 0.46 0.59 0.52 61  accuracy 0.49 132  macro avg 0.50 0.50 0.49 132 weighted avg 0.50 0.49 0.49 132,Per-fold scores: ROC-AUC (weighted OvO): 0.458 +/- 0.040 (in 3 folds) ROC-AUC (macro OvO): 0.458 +/- 0.040 (in 3 folds) au-PRC (weighted OvO): 0.464 +/- 0.154 (in 3 folds) au-PRC (macro OvO): 0.464 +/- 0.154 (in 3 folds) Accuracy: 0.373 +/- 0.060 (in 3 folds) MCC: -0.145 +/- 0.128 (in 3 folds) Global scores: Accuracy: 0.371 MCC: -0.247 Global classification report:  precision recall f1-score support  F 0.38 0.27 0.31 71  M 0.37 0.49 0.42 61  accuracy 0.37 132  macro avg 0.37 0.38 0.37 132 weighted avg 0.37 0.37 0.36 132
,,,
,,,
,,,


linearsvm_ovr
Per-fold scores: ROC-AUC (weighted OvO): 0.439 +/- 0.118 (in 3 folds) ROC-AUC (macro OvO): 0.439 +/- 0.118 (in 3 folds) au-PRC (weighted OvO): 0.465 +/- 0.204 (in 3 folds) au-PRC (macro OvO): 0.465 +/- 0.204 (in 3 folds) Accuracy: 0.487 +/- 0.083 (in 3 folds) MCC: -0.053 +/- 0.215 (in 3 folds) Global scores: Accuracy: 0.485 MCC: -0.029 Global classification report:  precision recall f1-score support  F 0.52 0.48 0.50 71  M 0.45 0.49 0.47 61  accuracy 0.48 132  macro avg 0.49 0.49 0.48 132 weighted avg 0.49 0.48 0.49 132


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.555 +/- 0.036 (in 3 folds),0.555 +/- 0.036 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.548 +/- 0.076 (in 3 folds),0.105 +/- 0.110 (in 3 folds),0.545,0.099,191.0,0.0,191.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.555 +/- 0.036 (in 3 folds),0.555 +/- 0.036 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.541 +/- 0.042 (in 3 folds),0.040 +/- 0.047 (in 3 folds),0.539,0.08,191.0,0.0,191.0,0.0,False
lasso_multiclass,0.519 +/- 0.022 (in 3 folds),0.519 +/- 0.022 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.504 +/- 0.033 (in 3 folds),0.022 +/- 0.049 (in 3 folds),0.503,0.011,191.0,0.0,191.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.519 +/- 0.022 (in 3 folds),0.519 +/- 0.022 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.551 +/- 0.052 (in 3 folds),0.095 +/- 0.100 (in 3 folds),0.55,0.098,191.0,0.0,191.0,0.0,False
xgboost,0.517 +/- 0.067 (in 3 folds),0.517 +/- 0.067 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.525 +/- 0.041 (in 3 folds),0.058 +/- 0.098 (in 3 folds),0.524,0.056,191.0,0.0,191.0,0.0,False
xgboost.decision_thresholds_tuned,0.517 +/- 0.067 (in 3 folds),0.517 +/- 0.067 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.480 +/- 0.063 (in 3 folds),-0.015 +/- 0.093 (in 3 folds),0.476,-0.032,191.0,0.0,191.0,0.0,False
linearsvm_ovr,0.515 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.488 +/- 0.036 (in 3 folds),-0.017 +/- 0.073 (in 3 folds),0.487,-0.024,191.0,0.0,191.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.515 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.495 +/- 0.053 (in 3 folds),-0.028 +/- 0.083 (in 3 folds),0.492,-0.01,191.0,0.0,191.0,0.0,False
ridge_cv,0.512 +/- 0.021 (in 3 folds),0.512 +/- 0.021 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.052 (in 3 folds),0.001 +/- 0.001 (in 3 folds),0.497,-0.002,191.0,0.0,191.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.512 +/- 0.021 (in 3 folds),0.512 +/- 0.021 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.490 +/- 0.069 (in 3 folds),0.049 +/- 0.085 (in 3 folds),0.487,0.07,191.0,0.0,191.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.555 +/- 0.036 (in 3 folds),0.555 +/- 0.036 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.548 +/- 0.076 (in 3 folds),0.105 +/- 0.110 (in 3 folds),0.545,0.099,191,0,191,0.0,False
rf_multiclass.decision_thresholds_tuned,0.555 +/- 0.036 (in 3 folds),0.555 +/- 0.036 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.554 +/- 0.085 (in 3 folds),0.541 +/- 0.042 (in 3 folds),0.040 +/- 0.047 (in 3 folds),0.539,0.08,191,0,191,0.0,False
lasso_multiclass,0.519 +/- 0.022 (in 3 folds),0.519 +/- 0.022 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.504 +/- 0.033 (in 3 folds),0.022 +/- 0.049 (in 3 folds),0.503,0.011,191,0,191,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.519 +/- 0.022 (in 3 folds),0.519 +/- 0.022 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.544 +/- 0.052 (in 3 folds),0.551 +/- 0.052 (in 3 folds),0.095 +/- 0.100 (in 3 folds),0.55,0.098,191,0,191,0.0,False
xgboost,0.517 +/- 0.067 (in 3 folds),0.517 +/- 0.067 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.525 +/- 0.041 (in 3 folds),0.058 +/- 0.098 (in 3 folds),0.524,0.056,191,0,191,0.0,False
xgboost.decision_thresholds_tuned,0.517 +/- 0.067 (in 3 folds),0.517 +/- 0.067 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.524 +/- 0.043 (in 3 folds),0.480 +/- 0.063 (in 3 folds),-0.015 +/- 0.093 (in 3 folds),0.476,-0.032,191,0,191,0.0,False
linearsvm_ovr,0.515 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.488 +/- 0.036 (in 3 folds),-0.017 +/- 0.073 (in 3 folds),0.487,-0.024,191,0,191,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.515 +/- 0.030 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.541 +/- 0.069 (in 3 folds),0.495 +/- 0.053 (in 3 folds),-0.028 +/- 0.083 (in 3 folds),0.492,-0.01,191,0,191,0.0,False
ridge_cv,0.512 +/- 0.021 (in 3 folds),0.512 +/- 0.021 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.052 (in 3 folds),0.001 +/- 0.001 (in 3 folds),0.497,-0.002,191,0,191,0.0,False
ridge_cv.decision_thresholds_tuned,0.512 +/- 0.021 (in 3 folds),0.512 +/- 0.021 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.498 +/- 0.082 (in 3 folds),0.490 +/- 0.069 (in 3 folds),0.049 +/- 0.085 (in 3 folds),0.487,0.07,191,0,191,0.0,False


rf_multiclass,rf_multiclass.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.555 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.555 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.554 +/- 0.085 (in 3 folds) au-PRC (macro OvO): 0.554 +/- 0.085 (in 3 folds) Accuracy: 0.548 +/- 0.076 (in 3 folds) MCC: 0.105 +/- 0.110 (in 3 folds) Global scores: Accuracy: 0.545 MCC: 0.099 Global classification report:  precision recall f1-score support  F 0.58 0.44 0.50 99  M 0.52 0.65 0.58 92  accuracy 0.54 191  macro avg 0.55 0.55 0.54 191 weighted avg 0.55 0.54 0.54 191,Per-fold scores: ROC-AUC (weighted OvO): 0.555 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.555 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.554 +/- 0.085 (in 3 folds) au-PRC (macro OvO): 0.554 +/- 0.085 (in 3 folds) Accuracy: 0.541 +/- 0.042 (in 3 folds) MCC: 0.040 +/- 0.047 (in 3 folds) Global scores: Accuracy: 0.539 MCC: 0.080 Global classification report:  precision recall f1-score support  F 0.56 0.52 0.54 99  M 0.52 0.57 0.54 92  accuracy 0.54 191  macro avg 0.54 0.54 0.54 191 weighted avg 0.54 0.54 0.54 191,Per-fold scores: ROC-AUC (weighted OvO): 0.519 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.519 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.544 +/- 0.052 (in 3 folds) au-PRC (macro OvO): 0.544 +/- 0.052 (in 3 folds) Accuracy: 0.504 +/- 0.033 (in 3 folds) MCC: 0.022 +/- 0.049 (in 3 folds) Global scores: Accuracy: 0.503 MCC: 0.011 Global classification report:  precision recall f1-score support  F 0.52 0.43 0.48 99  M 0.49 0.58 0.53 92  accuracy 0.50 191  macro avg 0.51 0.51 0.50 191 weighted avg 0.51 0.50 0.50 191,Per-fold scores: ROC-AUC (weighted OvO): 0.519 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.519 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.544 +/- 0.052 (in 3 folds) au-PRC (macro OvO): 0.544 +/- 0.052 (in 3 folds) Accuracy: 0.551 +/- 0.052 (in 3 folds) MCC: 0.095 +/- 0.100 (in 3 folds) Global scores: Accuracy: 0.550 MCC: 0.098 Global classification report:  precision recall f1-score support  F 0.57 0.57 0.57 99  M 0.53 0.53 0.53 92  accuracy 0.55 191  macro avg 0.55 0.55 0.55 191 weighted avg 0.55 0.55 0.55 191
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.517 +/- 0.067 (in 3 folds) ROC-AUC (macro OvO): 0.517 +/- 0.067 (in 3 folds) au-PRC (weighted OvO): 0.524 +/- 0.043 (in 3 folds) au-PRC (macro OvO): 0.524 +/- 0.043 (in 3 folds) Accuracy: 0.525 +/- 0.041 (in 3 folds) MCC: 0.058 +/- 0.098 (in 3 folds) Global scores: Accuracy: 0.524 MCC: 0.056 Global classification report:  precision recall f1-score support  F 0.55 0.42 0.48 99  M 0.50 0.63 0.56 92  accuracy 0.52 191  macro avg 0.53 0.53 0.52 191 weighted avg 0.53 0.52 0.52 191,Per-fold scores: ROC-AUC (weighted OvO): 0.517 +/- 0.067 (in 3 folds) ROC-AUC (macro OvO): 0.517 +/- 0.067 (in 3 folds) au-PRC (weighted OvO): 0.524 +/- 0.043 (in 3 folds) au-PRC (macro OvO): 0.524 +/- 0.043 (in 3 folds) Accuracy: 0.480 +/- 0.063 (in 3 folds) MCC: -0.015 +/- 0.093 (in 3 folds) Global scores: Accuracy: 0.476 MCC: -0.032 Global classification report:  precision recall f1-score support  F 0.49 0.21 0.30 99  M 0.47 0.76 0.58 92  accuracy 0.48 191  macro avg 0.48 0.49 0.44 191 weighted avg 0.48 0.48 0.43 191,Per-fold scores: ROC-AUC (weighted OvO): 0.515 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.515 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.541 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.541 +/- 0.069 (in 3 folds) Accuracy: 0.488 +/- 0.036 (in 3 folds) MCC: -0.017 +/- 0.073 (in 3 folds) Global scores: Accuracy: 0.487 MCC: -0.024 Global classification report:  precision recall f1-score support  F 0.51 0.46 0.48 99  M 0.47 0.51 0.49 92  accuracy 0.49 191  macro avg 0.49 0.49 0.49 191 weighted avg 0.49 0.49 0.49 191,Per-fold scores: ROC-AUC (weighted OvO): 0.515 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.515 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.541 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.541 +/- 0.069 (in 3 folds) Accuracy: 0.495 +/- 0.053 (in 3 folds) MCC: -0.028 +/- 0.083 (in 3 folds) Global scores: Accuracy: 0.492 MCC: -0.010 Global classification report:  precision recall f1-score support  F 0.51 0.41 0.46 99  M 0.48 0.58 0.52 92  accuracy 0.49 191  macro avg 0.49 0.50 0.49 191 weighted avg 0.50 0.49 0.49 191
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.512 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.512 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.498 +/- 0.082 (in 3 folds) au-PRC (macro OvO): 0.498 +/- 0.082 (in 3 folds) Accuracy: 0.498 +/- 0.052 (in 3 folds) MCC: 0.001 +/- 0.001 (in 3 folds) Global scores: Accuracy: 0.497 MCC: -0.002 Global classification report:  precision recall f1-score support  F 0.52 0.45 0.48 99  M 0.48 0.54 0.51 92  accuracy 0.50 191  macro avg 0.50 0.50 0.50 191 weighted avg 0.50 0.50 0.50 191,Per-fold scores: ROC-AUC (weighted OvO): 0.512 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.512 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.498 +/- 0.082 (in 3 folds) au-PRC (macro OvO): 0.498 +/- 0.082 (in 3 folds) Accuracy: 0.490 +/- 0.069 (in 3 folds) MCC: 0.049 +/- 0.085 (in 3 folds) Global scores: Accuracy: 0.487 MCC: 0.070 Global classification report:  precision recall f1-score support  F 1.00 0.01 0.02 99  M 0.48 1.00 0.65 92  accuracy 0.49 191  macro avg 0.74 0.51 0.34 191 weighted avg 0.75 0.49 0.32 191,Per-fold scores: ROC-AUC (weighted OvO): 0.497 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.497 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.511 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.511 +/- 0.029 (in 3 folds) Accuracy: 0.477 +/- 0.033 (in 3 folds) MCC: -0.058 +/- 0.061 (in 3 folds) Global scores: Accuracy: 0.476 MCC: -0.035 Global classification report:  precision recall f1-score support  F 0.49 0.27 0.35 99  M 0.47 0.70 0.56 92  accuracy 0.48 191  macro avg 0.48 0.48 0.46 191 weighted avg 0.48 0.48 0.45 191,Per-fold scores: ROC-AUC (weighted OvO): 0.497 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.497 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.511 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.511 +/- 0.029 (in 3 folds) Accuracy: 0.530 +/- 0.046 (in 3 folds) MCC: 0.020 +/- 0.028 (in 3 folds) Global scores: Accuracy: 0.529 MCC: 0.072 Global classification report:  precision recall f1-score support  F 0.57 0.38 0.46 99  M 0.51 0.68 0.58 92  accuracy 0.53 191  macro avg 0.54 0.53 0.52 191 weighted avg 0.54 0.53 0.52 191
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.486 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.486 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.505 +/- 0.039 (in 3 folds) au-PRC (macro OvO): 0.505 +/- 0.039 (in 3 folds) Accuracy: 0.503 +/- 0.043 (in 3 folds) MCC: -0.054 +/- 0.101 (in 3 folds) Global scores: Accuracy: 0.503 MCC: 0.013 Global classification report:  precision recall f1-score support  F 0.53 0.40 0.46 99  M 0.49 0.61 0.54 92  accuracy 0.50 191  macro avg 0.51 0.51 0.50 191 weighted avg 0.51 0.50 0.50 191,Per-fold scores: ROC-AUC (weighted OvO): 0.486 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.486 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.505 +/- 0.039 (in 3 folds) au-PRC (macro OvO): 0.505 +/- 0.039 (in 3 folds) Accuracy: 0.520 +/- 0.050 (in 3 folds) MCC: 0.001 +/- 0.002 (in 3 folds) Global scores: Accuracy: 0.518 MCC: 0.056 Global classification report:  precision recall f1-score support  F 0.56 0.32 0.41 99  M 0.50 0.73 0.59 92  accuracy 0.52 191  macro avg 0.53 0.53 0.50 191 weighted avg 0.53 0.52 0.50 191
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.963 +/- 0.007 (in 3 folds),0.963 +/- 0.007 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.948 +/- 0.010 (in 3 folds),0.844 +/- 0.040 (in 3 folds),0.948,0.844,191.0,0.0,191.0,0.0,False
elasticnet_cv,0.954 +/- 0.023 (in 3 folds),0.954 +/- 0.023 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.822 +/- 0.058 (in 3 folds),0.227 +/- 0.394 (in 3 folds),0.822,0.394,191.0,0.0,191.0,0.0,False
linearsvm_ovr,0.950 +/- 0.024 (in 3 folds),0.950 +/- 0.024 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.947 +/- 0.025 (in 3 folds),0.843 +/- 0.084 (in 3 folds),0.948,0.844,191.0,0.0,191.0,0.0,False
rf_multiclass,0.948 +/- 0.031 (in 3 folds),0.948 +/- 0.031 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.760 +/- 0.061 (in 3 folds),0.921,0.762,191.0,0.0,191.0,0.0,False
xgboost,0.946 +/- 0.026 (in 3 folds),0.946 +/- 0.026 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.942 +/- 0.010 (in 3 folds),0.829 +/- 0.031 (in 3 folds),0.942,0.827,191.0,0.0,191.0,0.0,False
ridge_cv,0.935 +/- 0.011 (in 3 folds),0.935 +/- 0.011 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191.0,0.0,191.0,0.0,True
lasso_cv,0.929 +/- 0.026 (in 3 folds),0.929 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191.0,0.0,191.0,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191.0,0.0,191.0,0.0,True
dummy_stratified,0.415 +/- 0.006 (in 3 folds),0.415 +/- 0.006 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.607 +/- 0.017 (in 3 folds),-0.175 +/- 0.016 (in 3 folds),0.607,-0.175,191.0,0.0,191.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.963 +/- 0.007 (in 3 folds),0.963 +/- 0.007 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.948 +/- 0.010 (in 3 folds),0.844 +/- 0.040 (in 3 folds),0.948,0.844,191,0,191,0.0,False
elasticnet_cv,0.954 +/- 0.023 (in 3 folds),0.954 +/- 0.023 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.983 +/- 0.011 (in 3 folds),0.822 +/- 0.058 (in 3 folds),0.227 +/- 0.394 (in 3 folds),0.822,0.394,191,0,191,0.0,False
linearsvm_ovr,0.950 +/- 0.024 (in 3 folds),0.950 +/- 0.024 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.947 +/- 0.025 (in 3 folds),0.843 +/- 0.084 (in 3 folds),0.948,0.844,191,0,191,0.0,False
rf_multiclass,0.948 +/- 0.031 (in 3 folds),0.948 +/- 0.031 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.972 +/- 0.019 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.760 +/- 0.061 (in 3 folds),0.921,0.762,191,0,191,0.0,False
xgboost,0.946 +/- 0.026 (in 3 folds),0.946 +/- 0.026 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.967 +/- 0.035 (in 3 folds),0.942 +/- 0.010 (in 3 folds),0.829 +/- 0.031 (in 3 folds),0.942,0.827,191,0,191,0.0,False
ridge_cv,0.935 +/- 0.011 (in 3 folds),0.935 +/- 0.011 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
lasso_cv,0.929 +/- 0.026 (in 3 folds),0.929 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.780 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.78,0.0,191,0,191,0.0,True
dummy_stratified,0.415 +/- 0.006 (in 3 folds),0.415 +/- 0.006 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.607 +/- 0.017 (in 3 folds),-0.175 +/- 0.016 (in 3 folds),0.607,-0.175,191,0,191,0.0,False


lasso_multiclass,elasticnet_cv,linearsvm_ovr,rf_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.963 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.963 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.004 (in 3 folds) Accuracy: 0.948 +/- 0.010 (in 3 folds) MCC: 0.844 +/- 0.040 (in 3 folds) Global scores: Accuracy: 0.948 MCC: 0.844 Global classification report:  precision recall f1-score support  Covid19 0.92 0.83 0.88 42 Healthy/Background 0.95 0.98 0.97 149  accuracy 0.95 191  macro avg 0.94 0.91 0.92 191  weighted avg 0.95 0.95 0.95 191,Per-fold scores: ROC-AUC (weighted OvO): 0.954 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.954 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.983 +/- 0.011 (in 3 folds) au-PRC (macro OvO): 0.983 +/- 0.011 (in 3 folds) Accuracy: 0.822 +/- 0.058 (in 3 folds) MCC: 0.227 +/- 0.394 (in 3 folds) Global scores: Accuracy: 0.822 MCC: 0.394 Global classification report:  precision recall f1-score support  Covid19 1.00 0.19 0.32 42 Healthy/Background 0.81 1.00 0.90 149  accuracy 0.82 191  macro avg 0.91 0.60 0.61 191  weighted avg 0.86 0.82 0.77 191,Per-fold scores: ROC-AUC (weighted OvO): 0.950 +/- 0.024 (in 3 folds) ROC-AUC (macro OvO): 0.950 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.980 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.980 +/- 0.012 (in 3 folds) Accuracy: 0.947 +/- 0.025 (in 3 folds) MCC: 0.843 +/- 0.084 (in 3 folds) Global scores: Accuracy: 0.948 MCC: 0.844 Global classification report:  precision recall f1-score support  Covid19 0.92 0.83 0.88 42 Healthy/Background 0.95 0.98 0.97 149  accuracy 0.95 191  macro avg 0.94 0.91 0.92 191  weighted avg 0.95 0.95 0.95 191,Per-fold scores: ROC-AUC (weighted OvO): 0.948 +/- 0.031 (in 3 folds) ROC-AUC (macro OvO): 0.948 +/- 0.031 (in 3 folds) au-PRC (weighted OvO): 0.972 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.972 +/- 0.019 (in 3 folds) Accuracy: 0.921 +/- 0.017 (in 3 folds) MCC: 0.760 +/- 0.061 (in 3 folds) Global scores: Accuracy: 0.921 MCC: 0.762 Global classification report:  precision recall f1-score support  Covid19 0.97 0.67 0.79 42 Healthy/Background 0.91 0.99 0.95 149  accuracy 0.92 191  macro avg 0.94 0.83 0.87 191  weighted avg 0.93 0.92 0.92 191
,,,
,,,
,,,


xgboost,ridge_cv,lasso_cv,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.946 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.946 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.967 +/- 0.035 (in 3 folds) au-PRC (macro OvO): 0.967 +/- 0.035 (in 3 folds) Accuracy: 0.942 +/- 0.010 (in 3 folds) MCC: 0.829 +/- 0.031 (in 3 folds) Global scores: Accuracy: 0.942 MCC: 0.827 Global classification report:  precision recall f1-score support  Covid19 0.94 0.79 0.86 42 Healthy/Background 0.94 0.99 0.96 149  accuracy 0.94 191  macro avg 0.94 0.89 0.91 191  weighted avg 0.94 0.94 0.94 191,Per-fold scores: ROC-AUC (weighted OvO): 0.935 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.935 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.971 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.971 +/- 0.006 (in 3 folds) Accuracy: 0.780 +/- 0.016 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.780 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 42 Healthy/Background 0.78 1.00 0.88 149  accuracy 0.78 191  macro avg 0.39 0.50 0.44 191  weighted avg 0.61 0.78 0.68 191,Per-fold scores: ROC-AUC (weighted OvO): 0.929 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.929 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.969 +/- 0.011 (in 3 folds) au-PRC (macro OvO): 0.969 +/- 0.011 (in 3 folds) Accuracy: 0.780 +/- 0.016 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.780 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 42 Healthy/Background 0.78 1.00 0.88 149  accuracy 0.78 191  macro avg 0.39 0.50 0.44 191  weighted avg 0.61 0.78 0.68 191,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.780 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.780 +/- 0.016 (in 3 folds) Accuracy: 0.780 +/- 0.016 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.780 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 42 Healthy/Background 0.78 1.00 0.88 149  accuracy 0.78 191  macro avg 0.39 0.50 0.44 191  weighted avg 0.61 0.78 0.68 191
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.415 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.415 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.752 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.752 +/- 0.019 (in 3 folds) Accuracy: 0.607 +/- 0.017 (in 3 folds) MCC: -0.175 +/- 0.016 (in 3 folds) Global scores: Accuracy: 0.607 MCC: -0.175 Global classification report:  precision recall f1-score support  Covid19 0.08 0.07 0.07 42 Healthy/Background 0.74 0.76 0.75 149  accuracy 0.61 191  macro avg 0.41 0.41 0.41 191  weighted avg 0.60 0.61 0.60 191


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.971 +/- 0.026 (in 3 folds),0.971 +/- 0.026 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.943 +/- 0.025 (in 3 folds),0.832 +/- 0.078 (in 3 folds),0.944,0.833,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.971 +/- 0.026 (in 3 folds),0.971 +/- 0.026 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.813 +/- 0.114 (in 3 folds),0.937,0.816,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
xgboost,0.964 +/- 0.034 (in 3 folds),0.964 +/- 0.034 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.919 +/- 0.040 (in 3 folds),0.770 +/- 0.105 (in 3 folds),0.919,0.762,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
xgboost.decision_thresholds_tuned,0.964 +/- 0.034 (in 3 folds),0.964 +/- 0.034 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.929 +/- 0.043 (in 3 folds),0.805 +/- 0.115 (in 3 folds),0.93,0.801,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
lasso_multiclass,0.960 +/- 0.037 (in 3 folds),0.960 +/- 0.037 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.957 +/- 0.039 (in 3 folds),0.872 +/- 0.117 (in 3 folds),0.958,0.875,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.960 +/- 0.037 (in 3 folds),0.960 +/- 0.037 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.950 +/- 0.027 (in 3 folds),0.852 +/- 0.083 (in 3 folds),0.951,0.854,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
lasso_cv,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.778 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.778,0.0,disease.separate_past_exposures,284.0,0.0,284.0,0.0,True
lasso_cv.decision_thresholds_tuned,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.915 +/- 0.056 (in 3 folds),0.758 +/- 0.159 (in 3 folds),0.915,0.755,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
elasticnet_cv,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.813 +/- 0.071 (in 3 folds),0.221 +/- 0.383 (in 3 folds),0.813,0.358,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.940 +/- 0.038 (in 3 folds),0.819 +/- 0.115 (in 3 folds),0.94,0.823,disease.separate_past_exposures,284.0,0.0,284.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.971 +/- 0.026 (in 3 folds),0.971 +/- 0.026 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.943 +/- 0.025 (in 3 folds),0.832 +/- 0.078 (in 3 folds),0.944,0.833,disease.separate_past_exposures,284,0,284,0.0,False
rf_multiclass.decision_thresholds_tuned,0.971 +/- 0.026 (in 3 folds),0.971 +/- 0.026 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.813 +/- 0.114 (in 3 folds),0.937,0.816,disease.separate_past_exposures,284,0,284,0.0,False
xgboost,0.964 +/- 0.034 (in 3 folds),0.964 +/- 0.034 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.919 +/- 0.040 (in 3 folds),0.770 +/- 0.105 (in 3 folds),0.919,0.762,disease.separate_past_exposures,284,0,284,0.0,False
xgboost.decision_thresholds_tuned,0.964 +/- 0.034 (in 3 folds),0.964 +/- 0.034 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.987 +/- 0.014 (in 3 folds),0.929 +/- 0.043 (in 3 folds),0.805 +/- 0.115 (in 3 folds),0.93,0.801,disease.separate_past_exposures,284,0,284,0.0,False
lasso_multiclass,0.960 +/- 0.037 (in 3 folds),0.960 +/- 0.037 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.957 +/- 0.039 (in 3 folds),0.872 +/- 0.117 (in 3 folds),0.958,0.875,disease.separate_past_exposures,284,0,284,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.960 +/- 0.037 (in 3 folds),0.960 +/- 0.037 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.985 +/- 0.014 (in 3 folds),0.950 +/- 0.027 (in 3 folds),0.852 +/- 0.083 (in 3 folds),0.951,0.854,disease.separate_past_exposures,284,0,284,0.0,False
lasso_cv,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.778 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.778,0.0,disease.separate_past_exposures,284,0,284,0.0,True
lasso_cv.decision_thresholds_tuned,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.976 +/- 0.026 (in 3 folds),0.915 +/- 0.056 (in 3 folds),0.758 +/- 0.159 (in 3 folds),0.915,0.755,disease.separate_past_exposures,284,0,284,0.0,False
elasticnet_cv,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.813 +/- 0.071 (in 3 folds),0.221 +/- 0.383 (in 3 folds),0.813,0.358,disease.separate_past_exposures,284,0,284,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.956 +/- 0.039 (in 3 folds),0.956 +/- 0.039 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.982 +/- 0.016 (in 3 folds),0.940 +/- 0.038 (in 3 folds),0.819 +/- 0.115 (in 3 folds),0.94,0.823,disease.separate_past_exposures,284,0,284,0.0,False


rf_multiclass,rf_multiclass.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.971 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.971 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.988 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.988 +/- 0.012 (in 3 folds) Accuracy: 0.943 +/- 0.025 (in 3 folds) MCC: 0.832 +/- 0.078 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.944 MCC: 0.833 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.98 0.76 0.86 63 Healthy/Background 0.94 1.00 0.96 221  accuracy 0.94 284  macro avg 0.96 0.88 0.91 284  weighted avg 0.95 0.94 0.94 284,Per-fold scores: ROC-AUC (weighted OvO): 0.971 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.971 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.988 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.988 +/- 0.012 (in 3 folds) Accuracy: 0.936 +/- 0.037 (in 3 folds) MCC: 0.813 +/- 0.114 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.937 MCC: 0.816 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.86 0.86 0.86 63 Healthy/Background 0.96 0.96 0.96 221  accuracy 0.94 284  macro avg 0.91 0.91 0.91 284  weighted avg 0.94 0.94 0.94 284,Per-fold scores: ROC-AUC (weighted OvO): 0.964 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.964 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.987 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.987 +/- 0.014 (in 3 folds) Accuracy: 0.919 +/- 0.040 (in 3 folds) MCC: 0.770 +/- 0.105 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.919 MCC: 0.762 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.83 0.79 0.81 63 Healthy/Background 0.94 0.95 0.95 221  accuracy 0.92 284  macro avg 0.89 0.87 0.88 284  weighted avg 0.92 0.92 0.92 284,Per-fold scores: ROC-AUC (weighted OvO): 0.964 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.964 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.987 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.987 +/- 0.014 (in 3 folds) Accuracy: 0.929 +/- 0.043 (in 3 folds) MCC: 0.805 +/- 0.115 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.930 MCC: 0.801 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.82 0.87 0.85 63 Healthy/Background 0.96 0.95 0.95 221  accuracy 0.93 284  macro avg 0.89 0.91 0.90 284  weighted avg 0.93 0.93 0.93 284
,,,
,,,


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.960 +/- 0.037 (in 3 folds) ROC-AUC (macro OvO): 0.960 +/- 0.037 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.014 (in 3 folds) Accuracy: 0.957 +/- 0.039 (in 3 folds) MCC: 0.872 +/- 0.117 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.958 MCC: 0.875 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.98 0.83 0.90 63 Healthy/Background 0.95 1.00 0.97 221  accuracy 0.96 284  macro avg 0.97 0.91 0.94 284  weighted avg 0.96 0.96 0.96 284,Per-fold scores: ROC-AUC (weighted OvO): 0.960 +/- 0.037 (in 3 folds) ROC-AUC (macro OvO): 0.960 +/- 0.037 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.014 (in 3 folds) Accuracy: 0.950 +/- 0.027 (in 3 folds) MCC: 0.852 +/- 0.083 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.951 MCC: 0.854 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.96 0.81 0.88 63 Healthy/Background 0.95 0.99 0.97 221  accuracy 0.95 284  macro avg 0.96 0.90 0.92 284  weighted avg 0.95 0.95 0.95 284,Per-fold scores: ROC-AUC (weighted OvO): 0.956 +/- 0.039 (in 3 folds) ROC-AUC (macro OvO): 0.956 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.976 +/- 0.026 (in 3 folds) au-PRC (macro OvO): 0.976 +/- 0.026 (in 3 folds) Accuracy: 0.778 +/- 0.010 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.778 MCC: 0.000 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.00 0.00 0.00 63 Healthy/Background 0.78 1.00 0.88 221  accuracy 0.78 284  macro avg 0.39 0.50 0.44 284  weighted avg 0.61 0.78 0.68 284,Per-fold scores: ROC-AUC (weighted OvO): 0.956 +/- 0.039 (in 3 folds) ROC-AUC (macro OvO): 0.956 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.976 +/- 0.026 (in 3 folds) au-PRC (macro OvO): 0.976 +/- 0.026 (in 3 folds) Accuracy: 0.915 +/- 0.056 (in 3 folds) MCC: 0.758 +/- 0.159 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.915 MCC: 0.755 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.81 0.81 0.81 63 Healthy/Background 0.95 0.95 0.95 221  accuracy 0.92 284  macro avg 0.88 0.88 0.88 284  weighted avg 0.92 0.92 0.92 284
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.956 +/- 0.039 (in 3 folds) ROC-AUC (macro OvO): 0.956 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.982 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.982 +/- 0.016 (in 3 folds) Accuracy: 0.813 +/- 0.071 (in 3 folds) MCC: 0.221 +/- 0.383 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.813 MCC: 0.358 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 1.00 0.16 0.27 63 Healthy/Background 0.81 1.00 0.89 221  accuracy 0.81 284  macro avg 0.90 0.58 0.58 284  weighted avg 0.85 0.81 0.76 284,Per-fold scores: ROC-AUC (weighted OvO): 0.956 +/- 0.039 (in 3 folds) ROC-AUC (macro OvO): 0.956 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.982 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.982 +/- 0.016 (in 3 folds) Accuracy: 0.940 +/- 0.038 (in 3 folds) MCC: 0.819 +/- 0.115 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.940 MCC: 0.823 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.90 0.83 0.86 63 Healthy/Background 0.95 0.97 0.96 221  accuracy 0.94 284  macro avg 0.92 0.90 0.91 284  weighted avg 0.94 0.94 0.94 284,Per-fold scores: ROC-AUC (weighted OvO): 0.953 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.953 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.981 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.981 +/- 0.016 (in 3 folds) Accuracy: 0.958 +/- 0.022 (in 3 folds) MCC: 0.876 +/- 0.064 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.958 MCC: 0.875 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.95 0.86 0.90 63 Healthy/Background 0.96 0.99 0.97 221  accuracy 0.96 284  macro avg 0.95 0.92 0.94 284  weighted avg 0.96 0.96 0.96 284,Per-fold scores: ROC-AUC (weighted OvO): 0.953 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.953 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.981 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.981 +/- 0.016 (in 3 folds) Accuracy: 0.940 +/- 0.006 (in 3 folds) MCC: 0.829 +/- 0.017 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.940 MCC: 0.823 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.90 0.83 0.86 63 Healthy/Background 0.95 0.97 0.96 221  accuracy 0.94 284  macro avg 0.92 0.90 0.91 284  weighted avg 0.94 0.94 0.94 284
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.949 +/- 0.048 (in 3 folds) ROC-AUC (macro OvO): 0.949 +/- 0.048 (in 3 folds) au-PRC (weighted OvO): 0.978 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.978 +/- 0.021 (in 3 folds) Accuracy: 0.778 +/- 0.010 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.778 MCC: 0.000 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.00 0.00 0.00 63 Healthy/Background 0.78 1.00 0.88 221  accuracy 0.78 284  macro avg 0.39 0.50 0.44 284  weighted avg 0.61 0.78 0.68 284,Per-fold scores: ROC-AUC (weighted OvO): 0.949 +/- 0.048 (in 3 folds) ROC-AUC (macro OvO): 0.949 +/- 0.048 (in 3 folds) au-PRC (weighted OvO): 0.978 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.978 +/- 0.021 (in 3 folds) Accuracy: 0.827 +/- 0.095 (in 3 folds) MCC: 0.268 +/- 0.464 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.827 MCC: 0.418 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.89 0.25 0.40 63 Healthy/Background 0.82 0.99 0.90 221  accuracy 0.83 284  macro avg 0.86 0.62 0.65 284  weighted avg 0.84 0.83 0.79 284
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
xgboost,0.966 +/- 0.010 (in 3 folds),0.966 +/- 0.010 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.897 +/- 0.014 (in 3 folds),0.753 +/- 0.037 (in 3 folds),0.897,0.752,213.0,0.0,213.0,0.0,False
lasso_cv,0.965 +/- 0.027 (in 3 folds),0.965 +/- 0.027 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.907 +/- 0.052 (in 3 folds),0.771 +/- 0.134 (in 3 folds),0.906,0.772,213.0,0.0,213.0,0.0,False
rf_multiclass,0.964 +/- 0.016 (in 3 folds),0.964 +/- 0.016 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.902 +/- 0.012 (in 3 folds),0.761 +/- 0.033 (in 3 folds),0.901,0.761,213.0,0.0,213.0,0.0,False
elasticnet_cv,0.963 +/- 0.024 (in 3 folds),0.963 +/- 0.024 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.902 +/- 0.036 (in 3 folds),0.759 +/- 0.096 (in 3 folds),0.901,0.76,213.0,0.0,213.0,0.0,False
lasso_multiclass,0.961 +/- 0.030 (in 3 folds),0.961 +/- 0.030 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.925 +/- 0.031 (in 3 folds),0.824 +/- 0.073 (in 3 folds),0.925,0.823,213.0,0.0,213.0,0.0,False
ridge_cv,0.949 +/- 0.029 (in 3 folds),0.949 +/- 0.029 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.765 +/- 0.069 (in 3 folds),0.327 +/- 0.297 (in 3 folds),0.765,0.382,213.0,0.0,213.0,0.0,False
linearsvm_ovr,0.942 +/- 0.022 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.901 +/- 0.026 (in 3 folds),0.771 +/- 0.059 (in 3 folds),0.901,0.772,213.0,0.0,213.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.7,0.0,213.0,0.0,213.0,0.0,True
dummy_stratified,0.424 +/- 0.015 (in 3 folds),0.424 +/- 0.015 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.530 +/- 0.010 (in 3 folds),-0.159 +/- 0.031 (in 3 folds),0.531,-0.159,213.0,0.0,213.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
xgboost,0.966 +/- 0.010 (in 3 folds),0.966 +/- 0.010 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.987 +/- 0.003 (in 3 folds),0.897 +/- 0.014 (in 3 folds),0.753 +/- 0.037 (in 3 folds),0.897,0.752,213,0,213,0.0,False
lasso_cv,0.965 +/- 0.027 (in 3 folds),0.965 +/- 0.027 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.986 +/- 0.010 (in 3 folds),0.907 +/- 0.052 (in 3 folds),0.771 +/- 0.134 (in 3 folds),0.906,0.772,213,0,213,0.0,False
rf_multiclass,0.964 +/- 0.016 (in 3 folds),0.964 +/- 0.016 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.986 +/- 0.004 (in 3 folds),0.902 +/- 0.012 (in 3 folds),0.761 +/- 0.033 (in 3 folds),0.901,0.761,213,0,213,0.0,False
elasticnet_cv,0.963 +/- 0.024 (in 3 folds),0.963 +/- 0.024 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.902 +/- 0.036 (in 3 folds),0.759 +/- 0.096 (in 3 folds),0.901,0.76,213,0,213,0.0,False
lasso_multiclass,0.961 +/- 0.030 (in 3 folds),0.961 +/- 0.030 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.925 +/- 0.031 (in 3 folds),0.824 +/- 0.073 (in 3 folds),0.925,0.823,213,0,213,0.0,False
ridge_cv,0.949 +/- 0.029 (in 3 folds),0.949 +/- 0.029 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.765 +/- 0.069 (in 3 folds),0.327 +/- 0.297 (in 3 folds),0.765,0.382,213,0,213,0.0,False
linearsvm_ovr,0.942 +/- 0.022 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.975 +/- 0.010 (in 3 folds),0.901 +/- 0.026 (in 3 folds),0.771 +/- 0.059 (in 3 folds),0.901,0.772,213,0,213,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.699 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.7,0.0,213,0,213,0.0,True
dummy_stratified,0.424 +/- 0.015 (in 3 folds),0.424 +/- 0.015 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.530 +/- 0.010 (in 3 folds),-0.159 +/- 0.031 (in 3 folds),0.531,-0.159,213,0,213,0.0,False


xgboost,lasso_cv,rf_multiclass,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.966 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.966 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.987 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.987 +/- 0.003 (in 3 folds) Accuracy: 0.897 +/- 0.014 (in 3 folds) MCC: 0.753 +/- 0.037 (in 3 folds) Global scores: Accuracy: 0.897 MCC: 0.752 Global classification report:  precision recall f1-score support  HIV 0.84 0.81 0.83 64 Healthy/Background 0.92 0.93 0.93 149  accuracy 0.90 213  macro avg 0.88 0.87 0.88 213  weighted avg 0.90 0.90 0.90 213,Per-fold scores: ROC-AUC (weighted OvO): 0.965 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.965 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.010 (in 3 folds) Accuracy: 0.907 +/- 0.052 (in 3 folds) MCC: 0.771 +/- 0.134 (in 3 folds) Global scores: Accuracy: 0.906 MCC: 0.772 Global classification report:  precision recall f1-score support  HIV 0.88 0.80 0.84 64 Healthy/Background 0.92 0.95 0.93 149  accuracy 0.91 213  macro avg 0.90 0.87 0.89 213  weighted avg 0.91 0.91 0.90 213,Per-fold scores: ROC-AUC (weighted OvO): 0.964 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.964 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.004 (in 3 folds) Accuracy: 0.902 +/- 0.012 (in 3 folds) MCC: 0.761 +/- 0.033 (in 3 folds) Global scores: Accuracy: 0.901 MCC: 0.761 Global classification report:  precision recall f1-score support  HIV 0.86 0.80 0.83 64 Healthy/Background 0.92 0.95 0.93 149  accuracy 0.90 213  macro avg 0.89 0.87 0.88 213  weighted avg 0.90 0.90 0.90 213,Per-fold scores: ROC-AUC (weighted OvO): 0.963 +/- 0.024 (in 3 folds) ROC-AUC (macro OvO): 0.963 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.009 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.009 (in 3 folds) Accuracy: 0.902 +/- 0.036 (in 3 folds) MCC: 0.759 +/- 0.096 (in 3 folds) Global scores: Accuracy: 0.901 MCC: 0.760 Global classification report:  precision recall f1-score support  HIV 0.89 0.77 0.82 64 Healthy/Background 0.91 0.96 0.93 149  accuracy 0.90 213  macro avg 0.90 0.86 0.88 213  weighted avg 0.90 0.90 0.90 213
,,,
,,,
,,,


lasso_multiclass,ridge_cv,linearsvm_ovr,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.961 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.961 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.984 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.984 +/- 0.013 (in 3 folds) Accuracy: 0.925 +/- 0.031 (in 3 folds) MCC: 0.824 +/- 0.073 (in 3 folds) Global scores: Accuracy: 0.925 MCC: 0.823 Global classification report:  precision recall f1-score support  HIV 0.86 0.89 0.88 64 Healthy/Background 0.95 0.94 0.95 149  accuracy 0.92 213  macro avg 0.91 0.92 0.91 213  weighted avg 0.93 0.92 0.93 213,Per-fold scores: ROC-AUC (weighted OvO): 0.949 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.949 +/- 0.029 (in 3 folds) au-PRC (weighted OvO): 0.980 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.980 +/- 0.012 (in 3 folds) Accuracy: 0.765 +/- 0.069 (in 3 folds) MCC: 0.327 +/- 0.297 (in 3 folds) Global scores: Accuracy: 0.765 MCC: 0.382 Global classification report:  precision recall f1-score support  HIV 0.79 0.30 0.43 64 Healthy/Background 0.76 0.97 0.85 149  accuracy 0.77 213  macro avg 0.78 0.63 0.64 213  weighted avg 0.77 0.77 0.73 213,Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.942 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.975 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.975 +/- 0.010 (in 3 folds) Accuracy: 0.901 +/- 0.026 (in 3 folds) MCC: 0.771 +/- 0.059 (in 3 folds) Global scores: Accuracy: 0.901 MCC: 0.772 Global classification report:  precision recall f1-score support  HIV 0.81 0.88 0.84 64 Healthy/Background 0.94 0.91 0.93 149  accuracy 0.90 213  macro avg 0.88 0.89 0.89 213  weighted avg 0.90 0.90 0.90 213,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.699 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.699 +/- 0.008 (in 3 folds) Accuracy: 0.699 +/- 0.008 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.700 MCC: 0.000 Global classification report:  precision recall f1-score support  HIV 0.00 0.00 0.00 64 Healthy/Background 0.70 1.00 0.82 149  accuracy 0.70 213  macro avg 0.35 0.50 0.41 213  weighted avg 0.49 0.70 0.58 213
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.424 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.424 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.669 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.669 +/- 0.007 (in 3 folds) Accuracy: 0.530 +/- 0.010 (in 3 folds) MCC: -0.159 +/- 0.031 (in 3 folds) Global scores: Accuracy: 0.531 MCC: -0.159 Global classification report:  precision recall f1-score support  HIV 0.18 0.16 0.17 64 Healthy/Background 0.66 0.69 0.67 149  accuracy 0.53 213  macro avg 0.42 0.42 0.42 213  weighted avg 0.51 0.53 0.52 213


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.982 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.938 +/- 0.018 (in 3 folds),0.855 +/- 0.040 (in 3 folds),0.937,0.854,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.982 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.931 +/- 0.013 (in 3 folds),0.840 +/- 0.027 (in 3 folds),0.931,0.839,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_cv,0.979 +/- 0.005 (in 3 folds),0.979 +/- 0.005 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.931 +/- 0.016 (in 3 folds),0.836 +/- 0.038 (in 3 folds),0.931,0.836,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.979 +/- 0.005 (in 3 folds),0.979 +/- 0.005 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.915 +/- 0.032 (in 3 folds),0.809 +/- 0.061 (in 3 folds),0.915,0.804,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
elasticnet_cv,0.978 +/- 0.007 (in 3 folds),0.978 +/- 0.007 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.928 +/- 0.015 (in 3 folds),0.828 +/- 0.037 (in 3 folds),0.928,0.828,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.978 +/- 0.007 (in 3 folds),0.978 +/- 0.007 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.919 +/- 0.023 (in 3 folds),0.814 +/- 0.049 (in 3 folds),0.918,0.81,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
rf_multiclass,0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.915 +/- 0.015 (in 3 folds),0.800 +/- 0.036 (in 3 folds),0.915,0.799,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.893 +/- 0.021 (in 3 folds),0.772 +/- 0.015 (in 3 folds),0.893,0.765,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
linearsvm_ovr,0.967 +/- 0.016 (in 3 folds),0.967 +/- 0.016 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.913 +/- 0.031 (in 3 folds),0.800 +/- 0.068 (in 3 folds),0.912,0.798,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.967 +/- 0.016 (in 3 folds),0.967 +/- 0.016 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.903 +/- 0.062 (in 3 folds),0.804 +/- 0.095 (in 3 folds),0.903,0.783,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.982 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.938 +/- 0.018 (in 3 folds),0.855 +/- 0.040 (in 3 folds),0.937,0.854,disease.separate_past_exposures,319,0,319,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.982 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.931 +/- 0.013 (in 3 folds),0.840 +/- 0.027 (in 3 folds),0.931,0.839,disease.separate_past_exposures,319,0,319,0.0,False
lasso_cv,0.979 +/- 0.005 (in 3 folds),0.979 +/- 0.005 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.931 +/- 0.016 (in 3 folds),0.836 +/- 0.038 (in 3 folds),0.931,0.836,disease.separate_past_exposures,319,0,319,0.0,False
lasso_cv.decision_thresholds_tuned,0.979 +/- 0.005 (in 3 folds),0.979 +/- 0.005 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.915 +/- 0.032 (in 3 folds),0.809 +/- 0.061 (in 3 folds),0.915,0.804,disease.separate_past_exposures,319,0,319,0.0,False
elasticnet_cv,0.978 +/- 0.007 (in 3 folds),0.978 +/- 0.007 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.928 +/- 0.015 (in 3 folds),0.828 +/- 0.037 (in 3 folds),0.928,0.828,disease.separate_past_exposures,319,0,319,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.978 +/- 0.007 (in 3 folds),0.978 +/- 0.007 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.990 +/- 0.003 (in 3 folds),0.919 +/- 0.023 (in 3 folds),0.814 +/- 0.049 (in 3 folds),0.918,0.81,disease.separate_past_exposures,319,0,319,0.0,False
rf_multiclass,0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.915 +/- 0.015 (in 3 folds),0.800 +/- 0.036 (in 3 folds),0.915,0.799,disease.separate_past_exposures,319,0,319,0.0,False
rf_multiclass.decision_thresholds_tuned,0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.893 +/- 0.021 (in 3 folds),0.772 +/- 0.015 (in 3 folds),0.893,0.765,disease.separate_past_exposures,319,0,319,0.0,False
linearsvm_ovr,0.967 +/- 0.016 (in 3 folds),0.967 +/- 0.016 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.913 +/- 0.031 (in 3 folds),0.800 +/- 0.068 (in 3 folds),0.912,0.798,disease.separate_past_exposures,319,0,319,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.967 +/- 0.016 (in 3 folds),0.967 +/- 0.016 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.903 +/- 0.062 (in 3 folds),0.804 +/- 0.095 (in 3 folds),0.903,0.783,disease.separate_past_exposures,319,0,319,0.0,False


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.982 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.982 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.003 (in 3 folds) Accuracy: 0.938 +/- 0.018 (in 3 folds) MCC: 0.855 +/- 0.040 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.937 MCC: 0.854 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.89 0.91 0.90 98 Healthy/Background 0.96 0.95 0.95 221  accuracy 0.94 319  macro avg 0.92 0.93 0.93 319  weighted avg 0.94 0.94 0.94 319,Per-fold scores: ROC-AUC (weighted OvO): 0.982 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.982 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.003 (in 3 folds) Accuracy: 0.931 +/- 0.013 (in 3 folds) MCC: 0.840 +/- 0.027 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.931 MCC: 0.839 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.88 0.90 0.89 98 Healthy/Background 0.95 0.95 0.95 221  accuracy 0.93 319  macro avg 0.92 0.92 0.92 319  weighted avg 0.93 0.93 0.93 319,Per-fold scores: ROC-AUC (weighted OvO): 0.979 +/- 0.005 (in 3 folds) ROC-AUC (macro OvO): 0.979 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.991 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.991 +/- 0.002 (in 3 folds) Accuracy: 0.931 +/- 0.016 (in 3 folds) MCC: 0.836 +/- 0.038 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.931 MCC: 0.836 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.91 0.86 0.88 98 Healthy/Background 0.94 0.96 0.95 221  accuracy 0.93 319  macro avg 0.93 0.91 0.92 319  weighted avg 0.93 0.93 0.93 319,Per-fold scores: ROC-AUC (weighted OvO): 0.979 +/- 0.005 (in 3 folds) ROC-AUC (macro OvO): 0.979 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.991 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.991 +/- 0.002 (in 3 folds) Accuracy: 0.915 +/- 0.032 (in 3 folds) MCC: 0.809 +/- 0.061 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.915 MCC: 0.804 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.84 0.89 0.87 98 Healthy/Background 0.95 0.93 0.94 221  accuracy 0.92 319  macro avg 0.90 0.91 0.90 319  weighted avg 0.92 0.92 0.92 319
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.978 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.978 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.990 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.990 +/- 0.003 (in 3 folds) Accuracy: 0.928 +/- 0.015 (in 3 folds) MCC: 0.828 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.928 MCC: 0.828 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.92 0.84 0.88 98 Healthy/Background 0.93 0.97 0.95 221  accuracy 0.93 319  macro avg 0.93 0.90 0.91 319  weighted avg 0.93 0.93 0.93 319,Per-fold scores: ROC-AUC (weighted OvO): 0.978 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.978 +/- 0.007 (in 3 folds) au-PRC (weighted OvO): 0.990 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.990 +/- 0.003 (in 3 folds) Accuracy: 0.919 +/- 0.023 (in 3 folds) MCC: 0.814 +/- 0.049 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.918 MCC: 0.810 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.86 0.88 0.87 98 Healthy/Background 0.95 0.94 0.94 221  accuracy 0.92 319  macro avg 0.90 0.91 0.90 319  weighted avg 0.92 0.92 0.92 319,Per-fold scores: ROC-AUC (weighted OvO): 0.968 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.968 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.004 (in 3 folds) Accuracy: 0.915 +/- 0.015 (in 3 folds) MCC: 0.800 +/- 0.036 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.915 MCC: 0.799 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.88 0.84 0.86 98 Healthy/Background 0.93 0.95 0.94 221  accuracy 0.92 319  macro avg 0.91 0.89 0.90 319  weighted avg 0.91 0.92 0.91 319,Per-fold scores: ROC-AUC (weighted OvO): 0.968 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.968 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.004 (in 3 folds) Accuracy: 0.893 +/- 0.021 (in 3 folds) MCC: 0.772 +/- 0.015 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.893 MCC: 0.765 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.78 0.91 0.84 98 Healthy/Background 0.96 0.89 0.92 221  accuracy 0.89 319  macro avg 0.87 0.90 0.88 319  weighted avg 0.90 0.89 0.90 319
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.007 (in 3 folds) Accuracy: 0.913 +/- 0.031 (in 3 folds) MCC: 0.800 +/- 0.068 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.912 MCC: 0.798 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.84 0.89 0.86 98 Healthy/Background 0.95 0.92 0.94 221  accuracy 0.91 319  macro avg 0.89 0.91 0.90 319  weighted avg 0.91 0.91 0.91 319,Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.985 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.985 +/- 0.007 (in 3 folds) Accuracy: 0.903 +/- 0.062 (in 3 folds) MCC: 0.804 +/- 0.095 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.903 MCC: 0.783 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.80 0.91 0.85 98 Healthy/Background 0.96 0.90 0.93 221  accuracy 0.90 319  macro avg 0.88 0.90 0.89 319  weighted avg 0.91 0.90 0.90 319,Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.006 (in 3 folds) Accuracy: 0.797 +/- 0.095 (in 3 folds) MCC: 0.424 +/- 0.372 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.796 MCC: 0.498 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.92 0.37 0.53 98 Healthy/Background 0.78 0.99 0.87 221  accuracy 0.80 319  macro avg 0.85 0.68 0.70 319  weighted avg 0.82 0.80 0.76 319,Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.006 (in 3 folds) Accuracy: 0.900 +/- 0.022 (in 3 folds) MCC: 0.768 +/- 0.044 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.900 MCC: 0.764 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.84 0.84 0.84 98 Healthy/Background 0.93 0.93 0.93 221  accuracy 0.90 319  macro avg 0.88 0.88 0.88 319  weighted avg 0.90 0.90 0.90 319
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.004 (in 3 folds) Accuracy: 0.918 +/- 0.022 (in 3 folds) MCC: 0.807 +/- 0.052 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.918 MCC: 0.807 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.88 0.85 0.86 98 Healthy/Background 0.93 0.95 0.94 221  accuracy 0.92 319  macro avg 0.91 0.90 0.90 319  weighted avg 0.92 0.92 0.92 319,Per-fold scores: ROC-AUC (weighted OvO): 0.967 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.967 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.986 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.986 +/- 0.004 (in 3 folds) Accuracy: 0.900 +/- 0.013 (in 3 folds) MCC: 0.766 +/- 0.028 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.900 MCC: 0.766 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.83 0.85 0.84 98 Healthy/Background 0.93 0.92 0.93 221  accuracy 0.90 319  macro avg 0.88 0.89 0.88 319  weighted avg 0.90 0.90 0.90 319
,
,


---

# GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.027 (in 3 folds),0.938 +/- 0.027 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.885 +/- 0.008 (in 3 folds),0.734 +/- 0.012 (in 3 folds),0.885,0.733,217.0,0.0,217.0,0.0,False
linearsvm_ovr,0.936 +/- 0.030 (in 3 folds),0.936 +/- 0.030 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.876 +/- 0.025 (in 3 folds),0.717 +/- 0.043 (in 3 folds),0.876,0.715,217.0,0.0,217.0,0.0,False
elasticnet_cv,0.934 +/- 0.022 (in 3 folds),0.934 +/- 0.022 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.725 +/- 0.044 (in 3 folds),0.885,0.725,217.0,0.0,217.0,0.0,False
ridge_cv,0.933 +/- 0.028 (in 3 folds),0.933 +/- 0.028 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.812 +/- 0.134 (in 3 folds),0.491 +/- 0.426 (in 3 folds),0.811,0.558,217.0,0.0,217.0,0.0,False
lasso_cv,0.930 +/- 0.023 (in 3 folds),0.930 +/- 0.023 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.880 +/- 0.015 (in 3 folds),0.715 +/- 0.027 (in 3 folds),0.88,0.715,217.0,0.0,217.0,0.0,False
xgboost,0.909 +/- 0.002 (in 3 folds),0.909 +/- 0.002 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.647 +/- 0.035 (in 3 folds),0.848,0.638,217.0,0.0,217.0,0.0,False
rf_multiclass,0.899 +/- 0.027 (in 3 folds),0.899 +/- 0.027 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.857 +/- 0.044 (in 3 folds),0.660 +/- 0.091 (in 3 folds),0.857,0.657,217.0,0.0,217.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.687 +/- 0.028 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.687,0.0,217.0,0.0,217.0,0.0,True
dummy_stratified,0.394 +/- 0.034 (in 3 folds),0.394 +/- 0.034 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.498 +/- 0.040 (in 3 folds),-0.223 +/- 0.075 (in 3 folds),0.498,-0.223,217.0,0.0,217.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.938 +/- 0.027 (in 3 folds),0.938 +/- 0.027 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.904 +/- 0.040 (in 3 folds),0.885 +/- 0.008 (in 3 folds),0.734 +/- 0.012 (in 3 folds),0.885,0.733,217,0,217,0.0,False
linearsvm_ovr,0.936 +/- 0.030 (in 3 folds),0.936 +/- 0.030 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.893 +/- 0.048 (in 3 folds),0.876 +/- 0.025 (in 3 folds),0.717 +/- 0.043 (in 3 folds),0.876,0.715,217,0,217,0.0,False
elasticnet_cv,0.934 +/- 0.022 (in 3 folds),0.934 +/- 0.022 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.905 +/- 0.036 (in 3 folds),0.885 +/- 0.021 (in 3 folds),0.725 +/- 0.044 (in 3 folds),0.885,0.725,217,0,217,0.0,False
ridge_cv,0.933 +/- 0.028 (in 3 folds),0.933 +/- 0.028 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.894 +/- 0.062 (in 3 folds),0.812 +/- 0.134 (in 3 folds),0.491 +/- 0.426 (in 3 folds),0.811,0.558,217,0,217,0.0,False
lasso_cv,0.930 +/- 0.023 (in 3 folds),0.930 +/- 0.023 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.894 +/- 0.040 (in 3 folds),0.880 +/- 0.015 (in 3 folds),0.715 +/- 0.027 (in 3 folds),0.88,0.715,217,0,217,0.0,False
xgboost,0.909 +/- 0.002 (in 3 folds),0.909 +/- 0.002 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.868 +/- 0.013 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.647 +/- 0.035 (in 3 folds),0.848,0.638,217,0,217,0.0,False
rf_multiclass,0.899 +/- 0.027 (in 3 folds),0.899 +/- 0.027 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.852 +/- 0.046 (in 3 folds),0.857 +/- 0.044 (in 3 folds),0.660 +/- 0.091 (in 3 folds),0.857,0.657,217,0,217,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.313 +/- 0.028 (in 3 folds),0.687 +/- 0.028 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.687,0.0,217,0,217,0.0,True
dummy_stratified,0.394 +/- 0.034 (in 3 folds),0.394 +/- 0.034 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.294 +/- 0.028 (in 3 folds),0.498 +/- 0.040 (in 3 folds),-0.223 +/- 0.075 (in 3 folds),0.498,-0.223,217,0,217,0.0,False


lasso_multiclass,linearsvm_ovr,elasticnet_cv,ridge_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.938 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.938 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.904 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.904 +/- 0.040 (in 3 folds) Accuracy: 0.885 +/- 0.008 (in 3 folds) MCC: 0.734 +/- 0.012 (in 3 folds) Global scores: Accuracy: 0.885 MCC: 0.733 Global classification report:  precision recall f1-score support Healthy/Background 0.92 0.91 0.92 149  Lupus 0.81 0.82 0.82 68  accuracy 0.88 217  macro avg 0.87 0.87 0.87 217  weighted avg 0.89 0.88 0.89 217,Per-fold scores: ROC-AUC (weighted OvO): 0.936 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.936 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.893 +/- 0.048 (in 3 folds) au-PRC (macro OvO): 0.893 +/- 0.048 (in 3 folds) Accuracy: 0.876 +/- 0.025 (in 3 folds) MCC: 0.717 +/- 0.043 (in 3 folds) Global scores: Accuracy: 0.876 MCC: 0.715 Global classification report:  precision recall f1-score support Healthy/Background 0.92 0.90 0.91 149  Lupus 0.79 0.82 0.81 68  accuracy 0.88 217  macro avg 0.85 0.86 0.86 217  weighted avg 0.88 0.88 0.88 217,Per-fold scores: ROC-AUC (weighted OvO): 0.934 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.934 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.905 +/- 0.036 (in 3 folds) au-PRC (macro OvO): 0.905 +/- 0.036 (in 3 folds) Accuracy: 0.885 +/- 0.021 (in 3 folds) MCC: 0.725 +/- 0.044 (in 3 folds) Global scores: Accuracy: 0.885 MCC: 0.725 Global classification report:  precision recall f1-score support Healthy/Background 0.88 0.96 0.92 149  Lupus 0.89 0.72 0.80 68  accuracy 0.88 217  macro avg 0.89 0.84 0.86 217  weighted avg 0.89 0.88 0.88 217,Per-fold scores: ROC-AUC (weighted OvO): 0.933 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.933 +/- 0.028 (in 3 folds) au-PRC (weighted OvO): 0.894 +/- 0.062 (in 3 folds) au-PRC (macro OvO): 0.894 +/- 0.062 (in 3 folds) Accuracy: 0.812 +/- 0.134 (in 3 folds) MCC: 0.491 +/- 0.426 (in 3 folds) Global scores: Accuracy: 0.811 MCC: 0.558 Global classification report:  precision recall f1-score support Healthy/Background 0.78 1.00 0.88 149  Lupus 1.00 0.40 0.57 68  accuracy 0.81 217  macro avg 0.89 0.70 0.72 217  weighted avg 0.85 0.81 0.78 217
,,,
,,,
,,,


lasso_cv,xgboost,rf_multiclass,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.930 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.930 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.894 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.894 +/- 0.040 (in 3 folds) Accuracy: 0.880 +/- 0.015 (in 3 folds) MCC: 0.715 +/- 0.027 (in 3 folds) Global scores: Accuracy: 0.880 MCC: 0.715 Global classification report:  precision recall f1-score support Healthy/Background 0.89 0.94 0.92 149  Lupus 0.85 0.75 0.80 68  accuracy 0.88 217  macro avg 0.87 0.84 0.86 217  weighted avg 0.88 0.88 0.88 217,Per-fold scores: ROC-AUC (weighted OvO): 0.909 +/- 0.002 (in 3 folds) ROC-AUC (macro OvO): 0.909 +/- 0.002 (in 3 folds) au-PRC (weighted OvO): 0.868 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.868 +/- 0.013 (in 3 folds) Accuracy: 0.848 +/- 0.024 (in 3 folds) MCC: 0.647 +/- 0.035 (in 3 folds) Global scores: Accuracy: 0.848 MCC: 0.638 Global classification report:  precision recall f1-score support Healthy/Background 0.87 0.91 0.89 149  Lupus 0.79 0.71 0.74 68  accuracy 0.85 217  macro avg 0.83 0.81 0.82 217  weighted avg 0.85 0.85 0.85 217,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.899 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.852 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.852 +/- 0.046 (in 3 folds) Accuracy: 0.857 +/- 0.044 (in 3 folds) MCC: 0.660 +/- 0.091 (in 3 folds) Global scores: Accuracy: 0.857 MCC: 0.657 Global classification report:  precision recall f1-score support Healthy/Background 0.85 0.96 0.90 149  Lupus 0.88 0.63 0.74 68  accuracy 0.86 217  macro avg 0.86 0.80 0.82 217  weighted avg 0.86 0.86 0.85 217,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.313 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.313 +/- 0.028 (in 3 folds) Accuracy: 0.687 +/- 0.028 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.687 MCC: 0.000 Global classification report:  precision recall f1-score support Healthy/Background 0.69 1.00 0.81 149  Lupus 0.00 0.00 0.00 68  accuracy 0.69 217  macro avg 0.34 0.50 0.41 217  weighted avg 0.47 0.69 0.56 217
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.394 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.394 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.294 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.294 +/- 0.028 (in 3 folds) Accuracy: 0.498 +/- 0.040 (in 3 folds) MCC: -0.223 +/- 0.075 (in 3 folds) Global scores: Accuracy: 0.498 MCC: -0.223 Global classification report:  precision recall f1-score support Healthy/Background 0.62 0.67 0.65 149  Lupus 0.14 0.12 0.13 68  accuracy 0.50 217  macro avg 0.38 0.39 0.39 217  weighted avg 0.47 0.50 0.48 217


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.914 +/- 0.015 (in 3 folds),0.914 +/- 0.015 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.871 +/- 0.008 (in 3 folds),0.688 +/- 0.025 (in 3 folds),0.871,0.688,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.914 +/- 0.015 (in 3 folds),0.914 +/- 0.015 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.859 +/- 0.013 (in 3 folds),0.658 +/- 0.037 (in 3 folds),0.859,0.656,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_multiclass,0.914 +/- 0.010 (in 3 folds),0.914 +/- 0.010 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.850 +/- 0.014 (in 3 folds),0.655 +/- 0.022 (in 3 folds),0.85,0.653,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.914 +/- 0.010 (in 3 folds),0.914 +/- 0.010 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.843 +/- 0.019 (in 3 folds),0.629 +/- 0.041 (in 3 folds),0.843,0.626,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
ridge_cv,0.905 +/- 0.008 (in 3 folds),0.905 +/- 0.008 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.797 +/- 0.089 (in 3 folds),0.427 +/- 0.370 (in 3 folds),0.796,0.505,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.905 +/- 0.008 (in 3 folds),0.905 +/- 0.008 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.807 +/- 0.097 (in 3 folds),0.445 +/- 0.386 (in 3 folds),0.806,0.515,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_cv,0.903 +/- 0.005 (in 3 folds),0.903 +/- 0.005 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.632 +/- 0.091 (in 3 folds),0.85,0.634,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.903 +/- 0.005 (in 3 folds),0.903 +/- 0.005 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.846 +/- 0.029 (in 3 folds),0.623 +/- 0.080 (in 3 folds),0.846,0.624,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
linearsvm_ovr,0.899 +/- 0.004 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.834 +/- 0.009 (in 3 folds),0.615 +/- 0.017 (in 3 folds),0.834,0.613,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.899 +/- 0.004 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.825 +/- 0.026 (in 3 folds),0.625 +/- 0.049 (in 3 folds),0.824,0.601,disease.separate_past_exposures,319.0,0.0,319.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.914 +/- 0.015 (in 3 folds),0.914 +/- 0.015 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.871 +/- 0.008 (in 3 folds),0.688 +/- 0.025 (in 3 folds),0.871,0.688,disease.separate_past_exposures,319,0,319,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.914 +/- 0.015 (in 3 folds),0.914 +/- 0.015 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.873 +/- 0.019 (in 3 folds),0.859 +/- 0.013 (in 3 folds),0.658 +/- 0.037 (in 3 folds),0.859,0.656,disease.separate_past_exposures,319,0,319,0.0,False
lasso_multiclass,0.914 +/- 0.010 (in 3 folds),0.914 +/- 0.010 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.850 +/- 0.014 (in 3 folds),0.655 +/- 0.022 (in 3 folds),0.85,0.653,disease.separate_past_exposures,319,0,319,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.914 +/- 0.010 (in 3 folds),0.914 +/- 0.010 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.867 +/- 0.017 (in 3 folds),0.843 +/- 0.019 (in 3 folds),0.629 +/- 0.041 (in 3 folds),0.843,0.626,disease.separate_past_exposures,319,0,319,0.0,False
ridge_cv,0.905 +/- 0.008 (in 3 folds),0.905 +/- 0.008 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.797 +/- 0.089 (in 3 folds),0.427 +/- 0.370 (in 3 folds),0.796,0.505,disease.separate_past_exposures,319,0,319,0.0,False
ridge_cv.decision_thresholds_tuned,0.905 +/- 0.008 (in 3 folds),0.905 +/- 0.008 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.864 +/- 0.006 (in 3 folds),0.807 +/- 0.097 (in 3 folds),0.445 +/- 0.386 (in 3 folds),0.806,0.515,disease.separate_past_exposures,319,0,319,0.0,False
lasso_cv,0.903 +/- 0.005 (in 3 folds),0.903 +/- 0.005 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.632 +/- 0.091 (in 3 folds),0.85,0.634,disease.separate_past_exposures,319,0,319,0.0,False
lasso_cv.decision_thresholds_tuned,0.903 +/- 0.005 (in 3 folds),0.903 +/- 0.005 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.846 +/- 0.029 (in 3 folds),0.623 +/- 0.080 (in 3 folds),0.846,0.624,disease.separate_past_exposures,319,0,319,0.0,False
linearsvm_ovr,0.899 +/- 0.004 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.834 +/- 0.009 (in 3 folds),0.615 +/- 0.017 (in 3 folds),0.834,0.613,disease.separate_past_exposures,319,0,319,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.899 +/- 0.004 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.848 +/- 0.013 (in 3 folds),0.825 +/- 0.026 (in 3 folds),0.625 +/- 0.049 (in 3 folds),0.824,0.601,disease.separate_past_exposures,319,0,319,0.0,False


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.914 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.914 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.873 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.873 +/- 0.019 (in 3 folds) Accuracy: 0.871 +/- 0.008 (in 3 folds) MCC: 0.688 +/- 0.025 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.871 MCC: 0.688 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.87 0.96 0.91 221  Lupus 0.88 0.67 0.76 98  accuracy 0.87 319  macro avg 0.87 0.82 0.84 319  weighted avg 0.87 0.87 0.87 319,Per-fold scores: ROC-AUC (weighted OvO): 0.914 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.914 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.873 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.873 +/- 0.019 (in 3 folds) Accuracy: 0.859 +/- 0.013 (in 3 folds) MCC: 0.658 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.859 MCC: 0.656 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.86 0.95 0.90 221  Lupus 0.86 0.64 0.74 98  accuracy 0.86 319  macro avg 0.86 0.80 0.82 319  weighted avg 0.86 0.86 0.85 319,Per-fold scores: ROC-AUC (weighted OvO): 0.914 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.914 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.867 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.867 +/- 0.017 (in 3 folds) Accuracy: 0.850 +/- 0.014 (in 3 folds) MCC: 0.655 +/- 0.022 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.850 MCC: 0.653 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.90 0.88 0.89 221  Lupus 0.74 0.79 0.76 98  accuracy 0.85 319  macro avg 0.82 0.83 0.83 319  weighted avg 0.85 0.85 0.85 319,Per-fold scores: ROC-AUC (weighted OvO): 0.914 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.914 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.867 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.867 +/- 0.017 (in 3 folds) Accuracy: 0.843 +/- 0.019 (in 3 folds) MCC: 0.629 +/- 0.041 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.843 MCC: 0.626 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.88 0.90 0.89 221  Lupus 0.76 0.71 0.74 98  accuracy 0.84 319  macro avg 0.82 0.81 0.81 319  weighted avg 0.84 0.84 0.84 319
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.905 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.905 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.864 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.864 +/- 0.006 (in 3 folds) Accuracy: 0.797 +/- 0.089 (in 3 folds) MCC: 0.427 +/- 0.370 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.796 MCC: 0.505 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.77 1.00 0.87 221  Lupus 0.97 0.35 0.51 98  accuracy 0.80 319  macro avg 0.87 0.67 0.69 319  weighted avg 0.84 0.80 0.76 319,Per-fold scores: ROC-AUC (weighted OvO): 0.905 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.905 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.864 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.864 +/- 0.006 (in 3 folds) Accuracy: 0.807 +/- 0.097 (in 3 folds) MCC: 0.445 +/- 0.386 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.806 MCC: 0.515 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.80 0.96 0.87 221  Lupus 0.83 0.46 0.59 98  accuracy 0.81 319  macro avg 0.82 0.71 0.73 319  weighted avg 0.81 0.81 0.79 319,Per-fold scores: ROC-AUC (weighted OvO): 0.903 +/- 0.005 (in 3 folds) ROC-AUC (macro OvO): 0.903 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.855 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.855 +/- 0.017 (in 3 folds) Accuracy: 0.849 +/- 0.035 (in 3 folds) MCC: 0.632 +/- 0.091 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.850 MCC: 0.634 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.86 0.93 0.90 221  Lupus 0.81 0.66 0.73 98  accuracy 0.85 319  macro avg 0.84 0.80 0.81 319  weighted avg 0.85 0.85 0.84 319,Per-fold scores: ROC-AUC (weighted OvO): 0.903 +/- 0.005 (in 3 folds) ROC-AUC (macro OvO): 0.903 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.855 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.855 +/- 0.017 (in 3 folds) Accuracy: 0.846 +/- 0.029 (in 3 folds) MCC: 0.623 +/- 0.080 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.846 MCC: 0.624 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.85 0.95 0.90 221  Lupus 0.85 0.61 0.71 98  accuracy 0.85 319  macro avg 0.85 0.78 0.80 319  weighted avg 0.85 0.85 0.84 319
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.899 +/- 0.004 (in 3 folds) au-PRC (weighted OvO): 0.848 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.848 +/- 0.013 (in 3 folds) Accuracy: 0.834 +/- 0.009 (in 3 folds) MCC: 0.615 +/- 0.017 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.834 MCC: 0.613 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.89 0.87 0.88 221  Lupus 0.72 0.74 0.73 98  accuracy 0.83 319  macro avg 0.80 0.81 0.81 319  weighted avg 0.84 0.83 0.83 319,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.899 +/- 0.004 (in 3 folds) au-PRC (weighted OvO): 0.848 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.848 +/- 0.013 (in 3 folds) Accuracy: 0.825 +/- 0.026 (in 3 folds) MCC: 0.625 +/- 0.049 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.824 MCC: 0.601 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.89 0.85 0.87 221  Lupus 0.69 0.77 0.73 98  accuracy 0.82 319  macro avg 0.79 0.81 0.80 319  weighted avg 0.83 0.82 0.83 319,Per-fold scores: ROC-AUC (weighted OvO): 0.896 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.896 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.836 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.836 +/- 0.016 (in 3 folds) Accuracy: 0.840 +/- 0.004 (in 3 folds) MCC: 0.618 +/- 0.021 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.840 MCC: 0.618 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.82 0.99 0.90 221  Lupus 0.96 0.50 0.66 98  accuracy 0.84 319  macro avg 0.89 0.75 0.78 319  weighted avg 0.86 0.84 0.82 319,Per-fold scores: ROC-AUC (weighted OvO): 0.896 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.896 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.836 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.836 +/- 0.016 (in 3 folds) Accuracy: 0.830 +/- 0.014 (in 3 folds) MCC: 0.585 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.831 MCC: 0.583 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.83 0.95 0.89 221  Lupus 0.84 0.55 0.67 98  accuracy 0.83 319  macro avg 0.84 0.75 0.78 319  weighted avg 0.83 0.83 0.82 319
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.861 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.861 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.806 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.806 +/- 0.012 (in 3 folds) Accuracy: 0.831 +/- 0.028 (in 3 folds) MCC: 0.583 +/- 0.083 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.831 MCC: 0.583 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.83 0.95 0.89 221  Lupus 0.84 0.55 0.67 98  accuracy 0.83 319  macro avg 0.84 0.75 0.78 319  weighted avg 0.83 0.83 0.82 319,Per-fold scores: ROC-AUC (weighted OvO): 0.861 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.861 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.806 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.806 +/- 0.012 (in 3 folds) Accuracy: 0.815 +/- 0.041 (in 3 folds) MCC: 0.585 +/- 0.064 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.815 MCC: 0.544 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.83 0.92 0.87 221  Lupus 0.76 0.58 0.66 98  accuracy 0.82 319  macro avg 0.80 0.75 0.77 319  weighted avg 0.81 0.82 0.81 319
,
,


---

{<TargetObsColumnEnum.disease: TargetObsColumn(obs_column_name='disease', is_target_binary_for_repertoire_composition_classifier=False, limited_to_disease=None, require_metadata_columns_present=None, confusion_matrix_expanded_column_name='disease.separate_past_exposures', blended_evaluation_column_name='disease.rollup')>: PosixPath('/users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease/train_smaller_model'), <TargetObsColumnEnum.disease_all_demographics_present: TargetObsColumn(obs_column_name='disease', is_target_binary_for_repertoire_composition_classifier=False, limited_to_disease=None, require_metadata_columns_present=['age', 'sex', 'ethnicity_condensed', 'age_group'], confusion_matrix_expanded_column_name='disease.separate_past_exposures', blended_evaluation_column_name='disease.rollup')>: PosixPath('/users/maximz/code/boyd-immune-repertoire-classification/out/repertoire_stats/TCR/disease_all_demographics_present/train_smaller_model'), <TargetOb

# GeneLocus.TCR, TargetObsColumnEnum.disease trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.945 +/- 0.008 (in 3 folds),0.946 +/- 0.006 (in 3 folds),0.934 +/- 0.008 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.759 +/- 0.036 (in 3 folds),0.669 +/- 0.047 (in 3 folds),0.759,0.667,274.0,0.0,274.0,0.0,False
ridge_cv,0.944 +/- 0.018 (in 3 folds),0.945 +/- 0.016 (in 3 folds),0.928 +/- 0.016 (in 3 folds),0.934 +/- 0.012 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274.0,0.0,274.0,0.0,True
linearsvm_ovr,0.943 +/- 0.016 (in 3 folds),0.941 +/- 0.015 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.777 +/- 0.031 (in 3 folds),0.686 +/- 0.043 (in 3 folds),0.777,0.684,274.0,0.0,274.0,0.0,False
elasticnet_cv,0.941 +/- 0.015 (in 3 folds),0.941 +/- 0.013 (in 3 folds),0.912 +/- 0.010 (in 3 folds),0.917 +/- 0.007 (in 3 folds),0.730 +/- 0.060 (in 3 folds),0.595 +/- 0.103 (in 3 folds),0.73,0.594,274.0,0.0,274.0,0.0,False
rf_multiclass,0.936 +/- 0.013 (in 3 folds),0.938 +/- 0.011 (in 3 folds),0.926 +/- 0.009 (in 3 folds),0.931 +/- 0.008 (in 3 folds),0.752 +/- 0.040 (in 3 folds),0.629 +/- 0.059 (in 3 folds),0.752,0.627,274.0,0.0,274.0,0.0,False
lasso_cv,0.923 +/- 0.025 (in 3 folds),0.921 +/- 0.022 (in 3 folds),0.883 +/- 0.037 (in 3 folds),0.885 +/- 0.032 (in 3 folds),0.734 +/- 0.050 (in 3 folds),0.599 +/- 0.087 (in 3 folds),0.734,0.598,274.0,0.0,274.0,0.0,False
xgboost,0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.003 (in 3 folds),0.918 +/- 0.004 (in 3 folds),0.748 +/- 0.034 (in 3 folds),0.627 +/- 0.048 (in 3 folds),0.748,0.625,274.0,0.0,274.0,0.0,False
dummy_stratified,0.502 +/- 0.017 (in 3 folds),0.509 +/- 0.012 (in 3 folds),0.512 +/- 0.006 (in 3 folds),0.516 +/- 0.005 (in 3 folds),0.336 +/- 0.029 (in 3 folds),-0.003 +/- 0.046 (in 3 folds),0.336,-0.003,274.0,0.0,274.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274.0,0.0,274.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.945 +/- 0.008 (in 3 folds),0.946 +/- 0.006 (in 3 folds),0.934 +/- 0.008 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.759 +/- 0.036 (in 3 folds),0.669 +/- 0.047 (in 3 folds),0.759,0.667,274,0,274,0.0,False
ridge_cv,0.944 +/- 0.018 (in 3 folds),0.945 +/- 0.016 (in 3 folds),0.928 +/- 0.016 (in 3 folds),0.934 +/- 0.012 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274,0,274,0.0,True
linearsvm_ovr,0.943 +/- 0.016 (in 3 folds),0.941 +/- 0.015 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.932 +/- 0.010 (in 3 folds),0.777 +/- 0.031 (in 3 folds),0.686 +/- 0.043 (in 3 folds),0.777,0.684,274,0,274,0.0,False
elasticnet_cv,0.941 +/- 0.015 (in 3 folds),0.941 +/- 0.013 (in 3 folds),0.912 +/- 0.010 (in 3 folds),0.917 +/- 0.007 (in 3 folds),0.730 +/- 0.060 (in 3 folds),0.595 +/- 0.103 (in 3 folds),0.73,0.594,274,0,274,0.0,False
rf_multiclass,0.936 +/- 0.013 (in 3 folds),0.938 +/- 0.011 (in 3 folds),0.926 +/- 0.009 (in 3 folds),0.931 +/- 0.008 (in 3 folds),0.752 +/- 0.040 (in 3 folds),0.629 +/- 0.059 (in 3 folds),0.752,0.627,274,0,274,0.0,False
lasso_cv,0.923 +/- 0.025 (in 3 folds),0.921 +/- 0.022 (in 3 folds),0.883 +/- 0.037 (in 3 folds),0.885 +/- 0.032 (in 3 folds),0.734 +/- 0.050 (in 3 folds),0.599 +/- 0.087 (in 3 folds),0.734,0.598,274,0,274,0.0,False
xgboost,0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.008 (in 3 folds),0.916 +/- 0.003 (in 3 folds),0.918 +/- 0.004 (in 3 folds),0.748 +/- 0.034 (in 3 folds),0.627 +/- 0.048 (in 3 folds),0.748,0.625,274,0,274,0.0,False
dummy_stratified,0.502 +/- 0.017 (in 3 folds),0.509 +/- 0.012 (in 3 folds),0.512 +/- 0.006 (in 3 folds),0.516 +/- 0.005 (in 3 folds),0.336 +/- 0.029 (in 3 folds),-0.003 +/- 0.046 (in 3 folds),0.336,-0.003,274,0,274,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.474 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.474,0.0,274,0,274,0.0,True


lasso_multiclass,ridge_cv,linearsvm_ovr,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.945 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.946 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.934 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.937 +/- 0.005 (in 3 folds) Accuracy: 0.759 +/- 0.036 (in 3 folds) MCC: 0.669 +/- 0.047 (in 3 folds) Global scores: Accuracy: 0.759 MCC: 0.667 Global classification report:  precision recall f1-score support  Covid19 0.71 0.76 0.73 38  HIV 0.68 0.86 0.76 64 Healthy/Background 0.90 0.70 0.79 130  Lupus 0.65 0.79 0.71 42  accuracy 0.76 274  macro avg 0.73 0.78 0.75 274  weighted avg 0.78 0.76 0.76 274,Per-fold scores: ROC-AUC (weighted OvO): 0.944 +/- 0.018 (in 3 folds) ROC-AUC (macro OvO): 0.945 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.928 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.934 +/- 0.012 (in 3 folds) Accuracy: 0.474 +/- 0.003 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.474 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 38  HIV 0.00 0.00 0.00 64 Healthy/Background 0.47 1.00 0.64 130  Lupus 0.00 0.00 0.00 42  accuracy 0.47 274  macro avg 0.12 0.25 0.16 274  weighted avg 0.23 0.47 0.31 274,Per-fold scores: ROC-AUC (weighted OvO): 0.943 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.932 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.932 +/- 0.010 (in 3 folds) Accuracy: 0.777 +/- 0.031 (in 3 folds) MCC: 0.686 +/- 0.043 (in 3 folds) Global scores: Accuracy: 0.777 MCC: 0.684 Global classification report:  precision recall f1-score support  Covid19 0.78 0.74 0.76 38  HIV 0.68 0.88 0.77 64 Healthy/Background 0.88 0.75 0.81 130  Lupus 0.69 0.74 0.71 42  accuracy 0.78 274  macro avg 0.76 0.78 0.76 274  weighted avg 0.79 0.78 0.78 274,Per-fold scores: ROC-AUC (weighted OvO): 0.941 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.912 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.917 +/- 0.007 (in 3 folds) Accuracy: 0.730 +/- 0.060 (in 3 folds) MCC: 0.595 +/- 0.103 (in 3 folds) Global scores: Accuracy: 0.730 MCC: 0.594 Global classification report:  precision recall f1-score support  Covid19 0.77 0.45 0.57 38  HIV 0.68 0.78 0.72 64 Healthy/Background 0.74 0.82 0.78 130  Lupus 0.77 0.64 0.70 42  accuracy 0.73 274  macro avg 0.74 0.67 0.69 274  weighted avg 0.73 0.73 0.72 274
,,,
,,,
,,,


rf_multiclass,lasso_cv,xgboost,dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.936 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.938 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.926 +/- 0.009 (in 3 folds) au-PRC (macro OvO): 0.931 +/- 0.008 (in 3 folds) Accuracy: 0.752 +/- 0.040 (in 3 folds) MCC: 0.629 +/- 0.059 (in 3 folds) Global scores: Accuracy: 0.752 MCC: 0.627 Global classification report:  precision recall f1-score support  Covid19 0.86 0.66 0.75 38  HIV 0.71 0.72 0.71 64 Healthy/Background 0.75 0.82 0.78 130  Lupus 0.76 0.67 0.71 42  accuracy 0.75 274  macro avg 0.77 0.72 0.74 274  weighted avg 0.76 0.75 0.75 274,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.025 (in 3 folds) ROC-AUC (macro OvO): 0.921 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.883 +/- 0.037 (in 3 folds) au-PRC (macro OvO): 0.885 +/- 0.032 (in 3 folds) Accuracy: 0.734 +/- 0.050 (in 3 folds) MCC: 0.599 +/- 0.087 (in 3 folds) Global scores: Accuracy: 0.734 MCC: 0.598 Global classification report:  precision recall f1-score support  Covid19 0.88 0.39 0.55 38  HIV 0.68 0.80 0.73 64 Healthy/Background 0.74 0.85 0.79 130  Lupus 0.74 0.60 0.66 42  accuracy 0.73 274  macro avg 0.76 0.66 0.68 274  weighted avg 0.75 0.73 0.72 274,Per-fold scores: ROC-AUC (weighted OvO): 0.916 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.916 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.916 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.918 +/- 0.004 (in 3 folds) Accuracy: 0.748 +/- 0.034 (in 3 folds) MCC: 0.627 +/- 0.048 (in 3 folds) Global scores: Accuracy: 0.748 MCC: 0.625 Global classification report:  precision recall f1-score support  Covid19 0.81 0.66 0.72 38  HIV 0.65 0.70 0.68 64 Healthy/Background 0.78 0.81 0.80 130  Lupus 0.75 0.71 0.73 42  accuracy 0.75 274  macro avg 0.75 0.72 0.73 274  weighted avg 0.75 0.75 0.75 274,Per-fold scores: ROC-AUC (weighted OvO): 0.502 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.509 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.512 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.516 +/- 0.005 (in 3 folds) Accuracy: 0.336 +/- 0.029 (in 3 folds) MCC: -0.003 +/- 0.046 (in 3 folds) Global scores: Accuracy: 0.336 MCC: -0.003 Global classification report:  precision recall f1-score support  Covid19 0.24 0.21 0.22 38  HIV 0.30 0.30 0.30 64 Healthy/Background 0.43 0.48 0.45 130  Lupus 0.09 0.07 0.08 42  accuracy 0.34 274  macro avg 0.26 0.26 0.26 274  weighted avg 0.32 0.34 0.33 274
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.474 +/- 0.003 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.474 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 38  HIV 0.00 0.00 0.00 64 Healthy/Background 0.47 1.00 0.64 130  Lupus 0.00 0.00 0.00 42  accuracy 0.47 274  macro avg 0.12 0.25 0.16 274  weighted avg 0.23 0.47 0.31 274


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.940 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.927 +/- 0.013 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.761 +/- 0.023 (in 3 folds),0.666 +/- 0.024 (in 3 folds),0.761,0.665,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.940 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.927 +/- 0.013 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.771 +/- 0.028 (in 3 folds),0.672 +/- 0.034 (in 3 folds),0.771,0.669,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
rf_multiclass,0.940 +/- 0.008 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.931 +/- 0.006 (in 3 folds),0.934 +/- 0.003 (in 3 folds),0.761 +/- 0.010 (in 3 folds),0.642 +/- 0.014 (in 3 folds),0.761,0.641,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.940 +/- 0.008 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.931 +/- 0.006 (in 3 folds),0.934 +/- 0.003 (in 3 folds),0.756 +/- 0.045 (in 3 folds),0.660 +/- 0.061 (in 3 folds),0.756,0.656,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
linearsvm_ovr,0.937 +/- 0.014 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.923 +/- 0.019 (in 3 folds),0.766 +/- 0.043 (in 3 folds),0.666 +/- 0.053 (in 3 folds),0.766,0.664,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.937 +/- 0.014 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.923 +/- 0.019 (in 3 folds),0.723 +/- 0.050 (in 3 folds),0.618 +/- 0.046 (in 3 folds),0.722,0.6,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
xgboost,0.933 +/- 0.015 (in 3 folds),0.931 +/- 0.011 (in 3 folds),0.931 +/- 0.012 (in 3 folds),0.930 +/- 0.008 (in 3 folds),0.778 +/- 0.013 (in 3 folds),0.674 +/- 0.017 (in 3 folds),0.778,0.673,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
xgboost.decision_thresholds_tuned,0.933 +/- 0.015 (in 3 folds),0.931 +/- 0.011 (in 3 folds),0.931 +/- 0.012 (in 3 folds),0.930 +/- 0.008 (in 3 folds),0.773 +/- 0.037 (in 3 folds),0.680 +/- 0.050 (in 3 folds),0.773,0.678,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False
ridge_cv,0.933 +/- 0.014 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.914 +/- 0.017 (in 3 folds),0.917 +/- 0.016 (in 3 folds),0.469 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.469,0.0,disease.separate_past_exposures,414.0,0.0,414.0,0.0,True
ridge_cv.decision_thresholds_tuned,0.933 +/- 0.014 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.914 +/- 0.017 (in 3 folds),0.917 +/- 0.016 (in 3 folds),0.478 +/- 0.284 (in 3 folds),0.352 +/- 0.313 (in 3 folds),0.481,0.357,disease.separate_past_exposures,414.0,0.0,414.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.940 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.927 +/- 0.013 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.761 +/- 0.023 (in 3 folds),0.666 +/- 0.024 (in 3 folds),0.761,0.665,disease.separate_past_exposures,414,0,414,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.940 +/- 0.009 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.927 +/- 0.013 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.771 +/- 0.028 (in 3 folds),0.672 +/- 0.034 (in 3 folds),0.771,0.669,disease.separate_past_exposures,414,0,414,0.0,False
rf_multiclass,0.940 +/- 0.008 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.931 +/- 0.006 (in 3 folds),0.934 +/- 0.003 (in 3 folds),0.761 +/- 0.010 (in 3 folds),0.642 +/- 0.014 (in 3 folds),0.761,0.641,disease.separate_past_exposures,414,0,414,0.0,False
rf_multiclass.decision_thresholds_tuned,0.940 +/- 0.008 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.931 +/- 0.006 (in 3 folds),0.934 +/- 0.003 (in 3 folds),0.756 +/- 0.045 (in 3 folds),0.660 +/- 0.061 (in 3 folds),0.756,0.656,disease.separate_past_exposures,414,0,414,0.0,False
linearsvm_ovr,0.937 +/- 0.014 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.923 +/- 0.019 (in 3 folds),0.766 +/- 0.043 (in 3 folds),0.666 +/- 0.053 (in 3 folds),0.766,0.664,disease.separate_past_exposures,414,0,414,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.937 +/- 0.014 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.921 +/- 0.017 (in 3 folds),0.923 +/- 0.019 (in 3 folds),0.723 +/- 0.050 (in 3 folds),0.618 +/- 0.046 (in 3 folds),0.722,0.6,disease.separate_past_exposures,414,0,414,0.0,False
xgboost,0.933 +/- 0.015 (in 3 folds),0.931 +/- 0.011 (in 3 folds),0.931 +/- 0.012 (in 3 folds),0.930 +/- 0.008 (in 3 folds),0.778 +/- 0.013 (in 3 folds),0.674 +/- 0.017 (in 3 folds),0.778,0.673,disease.separate_past_exposures,414,0,414,0.0,False
xgboost.decision_thresholds_tuned,0.933 +/- 0.015 (in 3 folds),0.931 +/- 0.011 (in 3 folds),0.931 +/- 0.012 (in 3 folds),0.930 +/- 0.008 (in 3 folds),0.773 +/- 0.037 (in 3 folds),0.680 +/- 0.050 (in 3 folds),0.773,0.678,disease.separate_past_exposures,414,0,414,0.0,False
ridge_cv,0.933 +/- 0.014 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.914 +/- 0.017 (in 3 folds),0.917 +/- 0.016 (in 3 folds),0.469 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.469,0.0,disease.separate_past_exposures,414,0,414,0.0,True
ridge_cv.decision_thresholds_tuned,0.933 +/- 0.014 (in 3 folds),0.932 +/- 0.013 (in 3 folds),0.914 +/- 0.017 (in 3 folds),0.917 +/- 0.016 (in 3 folds),0.478 +/- 0.284 (in 3 folds),0.352 +/- 0.313 (in 3 folds),0.481,0.357,disease.separate_past_exposures,414,0,414,0.0,False


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.940 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.940 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.927 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.013 (in 3 folds) Accuracy: 0.761 +/- 0.023 (in 3 folds) MCC: 0.666 +/- 0.024 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.761 MCC: 0.665 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.67 0.66 0.66 58  HIV 0.73 0.87 0.79 98 Healthy/Background 0.89 0.73 0.80 194  Lupus 0.62 0.78 0.69 64  accuracy 0.76 414  macro avg 0.73 0.76 0.74 414  weighted avg 0.78 0.76 0.76 414,Per-fold scores: ROC-AUC (weighted OvO): 0.940 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.940 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.927 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.013 (in 3 folds) Accuracy: 0.771 +/- 0.028 (in 3 folds) MCC: 0.672 +/- 0.034 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.771 MCC: 0.669 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.67 0.64 0.65 58  HIV 0.75 0.85 0.80 98 Healthy/Background 0.85 0.78 0.82 194  Lupus 0.66 0.73 0.70 64  accuracy 0.77 414  macro avg 0.74 0.75 0.74 414  weighted avg 0.78 0.77 0.77 414,Per-fold scores: ROC-AUC (weighted OvO): 0.940 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.934 +/- 0.003 (in 3 folds) Accuracy: 0.761 +/- 0.010 (in 3 folds) MCC: 0.642 +/- 0.014 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.761 MCC: 0.641 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.89 0.57 0.69 58  HIV 0.74 0.77 0.75 98 Healthy/Background 0.74 0.86 0.80 194  Lupus 0.77 0.64 0.70 64  accuracy 0.76 414  macro avg 0.79 0.71 0.74 414  weighted avg 0.77 0.76 0.76 414,Per-fold scores: ROC-AUC (weighted OvO): 0.940 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.934 +/- 0.003 (in 3 folds) Accuracy: 0.756 +/- 0.045 (in 3 folds) MCC: 0.660 +/- 0.061 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.756 MCC: 0.656 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.78 0.72 0.75 58  HIV 0.69 0.86 0.77 98 Healthy/Background 0.85 0.72 0.78 194  Lupus 0.63 0.75 0.69 64  accuracy 0.76 414  macro avg 0.74 0.76 0.75 414  weighted avg 0.77 0.76 0.76 414
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.937 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.934 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.019 (in 3 folds) Accuracy: 0.766 +/- 0.043 (in 3 folds) MCC: 0.666 +/- 0.053 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.766 MCC: 0.664 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.66 0.64 0.65 58  HIV 0.72 0.86 0.79 98 Healthy/Background 0.87 0.78 0.82 194  Lupus 0.66 0.70 0.68 64  accuracy 0.77 414  macro avg 0.73 0.74 0.73 414  weighted avg 0.77 0.77 0.77 414,Per-fold scores: ROC-AUC (weighted OvO): 0.937 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.934 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.921 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.019 (in 3 folds) Accuracy: 0.723 +/- 0.050 (in 3 folds) MCC: 0.618 +/- 0.046 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.722 MCC: 0.600 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.69 0.59 0.64 58  HIV 0.65 0.71 0.68 98 Healthy/Background 0.81 0.75 0.78 194  Lupus 0.63 0.78 0.70 64  accuracy 0.72 414  macro avg 0.70 0.71 0.70 414  weighted avg 0.73 0.72 0.72 414,Per-fold scores: ROC-AUC (weighted OvO): 0.933 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.931 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.008 (in 3 folds) Accuracy: 0.778 +/- 0.013 (in 3 folds) MCC: 0.674 +/- 0.017 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.778 MCC: 0.673 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.77 0.69 0.73 58  HIV 0.74 0.81 0.77 98 Healthy/Background 0.82 0.82 0.82 194  Lupus 0.72 0.69 0.70 64  accuracy 0.78 414  macro avg 0.76 0.75 0.76 414  weighted avg 0.78 0.78 0.78 414,Per-fold scores: ROC-AUC (weighted OvO): 0.933 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.931 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.931 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.008 (in 3 folds) Accuracy: 0.773 +/- 0.037 (in 3 folds) MCC: 0.680 +/- 0.050 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.773 MCC: 0.678 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.63 0.78 0.70 58  HIV 0.74 0.83 0.78 98 Healthy/Background 0.88 0.76 0.81 194  Lupus 0.71 0.73 0.72 64  accuracy 0.77 414  macro avg 0.74 0.77 0.75 414  weighted avg 0.79 0.77 0.78 414
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned,elasticnet_cv,elasticnet_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.933 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.932 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.914 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.917 +/- 0.016 (in 3 folds) Accuracy: 0.469 +/- 0.002 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.469 MCC: 0.000 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.00 0.00 0.00 58  HIV 0.00 0.00 0.00 98 Healthy/Background 0.47 1.00 0.64 194  Lupus 0.00 0.00 0.00 64  accuracy 0.47 414  macro avg 0.12 0.25 0.16 414  weighted avg 0.22 0.47 0.30 414,Per-fold scores: ROC-AUC (weighted OvO): 0.933 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.932 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.914 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.917 +/- 0.016 (in 3 folds) Accuracy: 0.478 +/- 0.284 (in 3 folds) MCC: 0.352 +/- 0.313 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.481 MCC: 0.357 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.62 0.45 0.52 58  HIV 0.55 0.33 0.41 98 Healthy/Background 0.82 0.44 0.57 194  Lupus 0.27 0.88 0.41 64  accuracy 0.48 414  macro avg 0.56 0.52 0.48 414  weighted avg 0.64 0.48 0.50 414,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.896 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.898 +/- 0.018 (in 3 folds) Accuracy: 0.722 +/- 0.032 (in 3 folds) MCC: 0.586 +/- 0.053 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.722 MCC: 0.584 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.68 0.36 0.47 58  HIV 0.68 0.82 0.74 98 Healthy/Background 0.74 0.82 0.78 194  Lupus 0.76 0.61 0.68 64  accuracy 0.72 414  macro avg 0.72 0.65 0.67 414  weighted avg 0.72 0.72 0.71 414,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.918 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.896 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.898 +/- 0.018 (in 3 folds) Accuracy: 0.739 +/- 0.038 (in 3 folds) MCC: 0.647 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.739 MCC: 0.638 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.58 0.60 0.59 58  HIV 0.69 0.89 0.77 98 Healthy/Background 0.91 0.72 0.80 194  Lupus 0.60 0.70 0.65 64  accuracy 0.74 414  macro avg 0.70 0.73 0.70 414  weighted avg 0.77 0.74 0.74 414
,,,
,,,


lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.901 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.896 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.869 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.869 +/- 0.039 (in 3 folds) Accuracy: 0.719 +/- 0.047 (in 3 folds) MCC: 0.582 +/- 0.082 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.720 MCC: 0.580 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.72 0.31 0.43 58  HIV 0.72 0.80 0.76 98 Healthy/Background 0.74 0.84 0.78 194  Lupus 0.66 0.62 0.64 64  accuracy 0.72 414  macro avg 0.71 0.64 0.65 414  weighted avg 0.72 0.72 0.71 414,Per-fold scores: ROC-AUC (weighted OvO): 0.901 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.896 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.869 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.869 +/- 0.039 (in 3 folds) Accuracy: 0.718 +/- 0.026 (in 3 folds) MCC: 0.612 +/- 0.009 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.717 MCC: 0.598 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.58 0.62 0.60 58  HIV 0.69 0.77 0.72 98 Healthy/Background 0.83 0.72 0.77 194  Lupus 0.61 0.72 0.66 64  accuracy 0.72 414  macro avg 0.68 0.71 0.69 414  weighted avg 0.73 0.72 0.72 414
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.946 +/- 0.027 (in 3 folds),0.947 +/- 0.028 (in 3 folds),0.942 +/- 0.027 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.808 +/- 0.033 (in 3 folds),0.723 +/- 0.057 (in 3 folds),0.808,0.724,240.0,0.0,240.0,0.0,False
ridge_cv,0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.025 (in 3 folds),0.932 +/- 0.029 (in 3 folds),0.936 +/- 0.028 (in 3 folds),0.483 +/- 0.009 (in 3 folds),0.039 +/- 0.068 (in 3 folds),0.483,0.07,240.0,0.0,240.0,0.0,True
lasso_multiclass,0.944 +/- 0.024 (in 3 folds),0.946 +/- 0.022 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.944 +/- 0.015 (in 3 folds),0.817 +/- 0.033 (in 3 folds),0.741 +/- 0.051 (in 3 folds),0.817,0.741,240.0,0.0,240.0,0.0,False
elasticnet_cv,0.936 +/- 0.031 (in 3 folds),0.938 +/- 0.029 (in 3 folds),0.922 +/- 0.029 (in 3 folds),0.926 +/- 0.028 (in 3 folds),0.758 +/- 0.065 (in 3 folds),0.634 +/- 0.118 (in 3 folds),0.758,0.636,240.0,0.0,240.0,0.0,False
rf_multiclass,0.925 +/- 0.029 (in 3 folds),0.928 +/- 0.024 (in 3 folds),0.910 +/- 0.030 (in 3 folds),0.918 +/- 0.025 (in 3 folds),0.771 +/- 0.052 (in 3 folds),0.655 +/- 0.085 (in 3 folds),0.771,0.656,240.0,0.0,240.0,0.0,False
xgboost,0.919 +/- 0.013 (in 3 folds),0.921 +/- 0.011 (in 3 folds),0.912 +/- 0.013 (in 3 folds),0.917 +/- 0.012 (in 3 folds),0.762 +/- 0.048 (in 3 folds),0.649 +/- 0.079 (in 3 folds),0.762,0.649,240.0,0.0,240.0,0.0,False
dummy_stratified,0.531 +/- 0.035 (in 3 folds),0.523 +/- 0.032 (in 3 folds),0.529 +/- 0.023 (in 3 folds),0.526 +/- 0.020 (in 3 folds),0.405 +/- 0.062 (in 3 folds),0.088 +/- 0.087 (in 3 folds),0.404,0.086,240.0,0.0,240.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.479 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.479,0.0,240.0,0.0,240.0,0.0,True
"All results, sorted",,,,,,,,,,,,,
ROC-AUC (weighted OvO) per fold  ROC-AUC (macro OvO) per fold  au-PRC (weighted OvO) per fold  au-PRC (macro OvO) per fold  Accuracy per fold  MCC per fold  Accuracy global  MCC global  sample_size  n_abstentions  sample_size including abstentions  abstention_rate  missing_classes  linearsvm_ovr  0.946 +/- 0.027 (in 3 folds)  0.947 +/- 0.028 (in 3 folds)  0.942 +/- 0.027 (in 3 folds)  0.944 +/- 0.026 (in 3 folds)  0.808 +/- 0.033 (in 3 folds)  0.723 +/- 0.057 (in 3 folds)  0.808  0.724  240  0  240  0.0  False  ridge_cv  0.944 +/- 0.026 (in 3 folds)  0.944 +/- 0.025 (in 3 folds)  0.932 +/- 0.029 (in 3 folds)  0.936 +/- 0.028 (in 3 folds)  0.483 +/- 0.009 (in 3 folds)  0.039 +/- 0.068 (in 3 folds)  0.483  0.070  240  0  240  0.0  True  lasso_multiclass  0.944 +/- 0.024 (in 3 folds)  0.946 +/- 0.022 (in 3 folds)  0.940 +/- 0.016 (in 3 folds)  0.944 +/- 0.015 (in 3 folds)  0.817 +/- 0.033 (in 3 folds)  0.741 +/- 0.051 (in 3 folds)  0.817  0.741  240  0  240  0.0  False  elasticnet_cv  0.936 +/- 0.031 (in 3 folds)  0.938 +/- 0.029 (in 3 folds)  0.922 +/- 0.029 (in 3 folds)  0.926 +/- 0.028 (in 3 folds)  0.758 +/- 0.065 (in 3 folds)  0.634 +/- 0.118 (in 3 folds)  0.758  0.636  240  0  240  0.0  False  rf_multiclass  0.925 +/- 0.029 (in 3 folds)  0.928 +/- 0.024 (in 3 folds)  0.910 +/- 0.030 (in 3 folds)  0.918 +/- 0.025 (in 3 folds)  0.771 +/- 0.052 (in 3 folds)  0.655 +/- 0.085 (in 3 folds)  0.771  0.656  240  0  240  0.0  False  xgboost  0.919 +/- 0.013 (in 3 folds)  0.921 +/- 0.011 (in 3 folds)  0.912 +/- 0.013 (in 3 folds)  0.917 +/- 0.012 (in 3 folds)  0.762 +/- 0.048 (in 3 folds)  0.649 +/- 0.079 (in 3 folds)  0.762  0.649  240  0  240  0.0  False  dummy_stratified  0.531 +/- 0.035 (in 3 folds)  0.523 +/- 0.032 (in 3 folds)  0.529 +/- 0.023 (in 3 folds)  0.526 +/- 0.020 (in 3 folds)  0.405 +/- 0.062 (in 3 folds)  0.088 +/- 0.087 (in 3 folds)  0.404  0.086  240  0  240  0.0  False  dummy_most_frequent  0.500 +/- 0.000 (in 3 folds)  0.500 +/- 0.000 (in 3 folds)  0.500 +/- 0.000 (in 3 folds)  0.500 +/- 0.000 (in 3 folds)  0.479 +/- 0.016 (in 3 folds)  0.000 +/- 0.000 (in 3 folds)  0.479  0.000  240  0  240  0.0  True,,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.946 +/- 0.027 (in 3 folds),0.947 +/- 0.028 (in 3 folds),0.942 +/- 0.027 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.808 +/- 0.033 (in 3 folds),0.723 +/- 0.057 (in 3 folds),0.808,0.724,240,0,240,0.0,False
ridge_cv,0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.025 (in 3 folds),0.932 +/- 0.029 (in 3 folds),0.936 +/- 0.028 (in 3 folds),0.483 +/- 0.009 (in 3 folds),0.039 +/- 0.068 (in 3 folds),0.483,0.07,240,0,240,0.0,True
lasso_multiclass,0.944 +/- 0.024 (in 3 folds),0.946 +/- 0.022 (in 3 folds),0.940 +/- 0.016 (in 3 folds),0.944 +/- 0.015 (in 3 folds),0.817 +/- 0.033 (in 3 folds),0.741 +/- 0.051 (in 3 folds),0.817,0.741,240,0,240,0.0,False
elasticnet_cv,0.936 +/- 0.031 (in 3 folds),0.938 +/- 0.029 (in 3 folds),0.922 +/- 0.029 (in 3 folds),0.926 +/- 0.028 (in 3 folds),0.758 +/- 0.065 (in 3 folds),0.634 +/- 0.118 (in 3 folds),0.758,0.636,240,0,240,0.0,False
rf_multiclass,0.925 +/- 0.029 (in 3 folds),0.928 +/- 0.024 (in 3 folds),0.910 +/- 0.030 (in 3 folds),0.918 +/- 0.025 (in 3 folds),0.771 +/- 0.052 (in 3 folds),0.655 +/- 0.085 (in 3 folds),0.771,0.656,240,0,240,0.0,False
xgboost,0.919 +/- 0.013 (in 3 folds),0.921 +/- 0.011 (in 3 folds),0.912 +/- 0.013 (in 3 folds),0.917 +/- 0.012 (in 3 folds),0.762 +/- 0.048 (in 3 folds),0.649 +/- 0.079 (in 3 folds),0.762,0.649,240,0,240,0.0,False
dummy_stratified,0.531 +/- 0.035 (in 3 folds),0.523 +/- 0.032 (in 3 folds),0.529 +/- 0.023 (in 3 folds),0.526 +/- 0.020 (in 3 folds),0.405 +/- 0.062 (in 3 folds),0.088 +/- 0.087 (in 3 folds),0.404,0.086,240,0,240,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.479 +/- 0.016 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.479,0.0,240,0,240,0.0,True


linearsvm_ovr,ridge_cv,lasso_multiclass,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.946 +/- 0.027 (in 3 folds) ROC-AUC (macro OvO): 0.947 +/- 0.028 (in 3 folds) au-PRC (weighted OvO): 0.942 +/- 0.027 (in 3 folds) au-PRC (macro OvO): 0.944 +/- 0.026 (in 3 folds) Accuracy: 0.808 +/- 0.033 (in 3 folds) MCC: 0.723 +/- 0.057 (in 3 folds) Global scores: Accuracy: 0.808 MCC: 0.724 Global classification report:  precision recall f1-score support  Covid19 0.91 0.70 0.79 30  HIV 0.69 0.91 0.79 55 Healthy/Background 0.88 0.79 0.83 115  Lupus 0.78 0.80 0.79 40  accuracy 0.81 240  macro avg 0.82 0.80 0.80 240  weighted avg 0.82 0.81 0.81 240,Per-fold scores: ROC-AUC (weighted OvO): 0.944 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.944 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.932 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.936 +/- 0.028 (in 3 folds) Accuracy: 0.483 +/- 0.009 (in 3 folds) MCC: 0.039 +/- 0.068 (in 3 folds) Global scores: Accuracy: 0.483 MCC: 0.070 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 30  HIV 1.00 0.02 0.04 55 Healthy/Background 0.48 1.00 0.65 115  Lupus 0.00 0.00 0.00 40  accuracy 0.48 240  macro avg 0.37 0.25 0.17 240  weighted avg 0.46 0.48 0.32 240,Per-fold scores: ROC-AUC (weighted OvO): 0.944 +/- 0.024 (in 3 folds) ROC-AUC (macro OvO): 0.946 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.940 +/- 0.016 (in 3 folds) au-PRC (macro OvO): 0.944 +/- 0.015 (in 3 folds) Accuracy: 0.817 +/- 0.033 (in 3 folds) MCC: 0.741 +/- 0.051 (in 3 folds) Global scores: Accuracy: 0.817 MCC: 0.741 Global classification report:  precision recall f1-score support  Covid19 0.91 0.70 0.79 30  HIV 0.70 0.93 0.80 55 Healthy/Background 0.92 0.78 0.85 115  Lupus 0.74 0.85 0.79 40  accuracy 0.82 240  macro avg 0.82 0.81 0.81 240  weighted avg 0.84 0.82 0.82 240,Per-fold scores: ROC-AUC (weighted OvO): 0.936 +/- 0.031 (in 3 folds) ROC-AUC (macro OvO): 0.938 +/- 0.029 (in 3 folds) au-PRC (weighted OvO): 0.922 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.926 +/- 0.028 (in 3 folds) Accuracy: 0.758 +/- 0.065 (in 3 folds) MCC: 0.634 +/- 0.118 (in 3 folds) Global scores: Accuracy: 0.758 MCC: 0.636 Global classification report:  precision recall f1-score support  Covid19 0.80 0.40 0.53 30  HIV 0.71 0.87 0.78 55 Healthy/Background 0.76 0.83 0.80 115  Lupus 0.84 0.65 0.73 40  accuracy 0.76 240  macro avg 0.78 0.69 0.71 240  weighted avg 0.77 0.76 0.75 240
,,,
,,,
,,,


rf_multiclass,xgboost,dummy_stratified,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.925 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.928 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.910 +/- 0.030 (in 3 folds) au-PRC (macro OvO): 0.918 +/- 0.025 (in 3 folds) Accuracy: 0.771 +/- 0.052 (in 3 folds) MCC: 0.655 +/- 0.085 (in 3 folds) Global scores: Accuracy: 0.771 MCC: 0.656 Global classification report:  precision recall f1-score support  Covid19 0.88 0.47 0.61 30  HIV 0.71 0.82 0.76 55 Healthy/Background 0.78 0.83 0.80 115  Lupus 0.79 0.78 0.78 40  accuracy 0.77 240  macro avg 0.79 0.72 0.74 240  weighted avg 0.78 0.77 0.77 240,Per-fold scores: ROC-AUC (weighted OvO): 0.919 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.921 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.912 +/- 0.013 (in 3 folds) au-PRC (macro OvO): 0.917 +/- 0.012 (in 3 folds) Accuracy: 0.762 +/- 0.048 (in 3 folds) MCC: 0.649 +/- 0.079 (in 3 folds) Global scores: Accuracy: 0.762 MCC: 0.649 Global classification report:  precision recall f1-score support  Covid19 0.82 0.60 0.69 30  HIV 0.70 0.80 0.75 55 Healthy/Background 0.81 0.80 0.81 115  Lupus 0.69 0.72 0.71 40  accuracy 0.76 240  macro avg 0.76 0.73 0.74 240  weighted avg 0.77 0.76 0.76 240,Per-fold scores: ROC-AUC (weighted OvO): 0.531 +/- 0.035 (in 3 folds) ROC-AUC (macro OvO): 0.523 +/- 0.032 (in 3 folds) au-PRC (weighted OvO): 0.529 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.526 +/- 0.020 (in 3 folds) Accuracy: 0.405 +/- 0.062 (in 3 folds) MCC: 0.088 +/- 0.087 (in 3 folds) Global scores: Accuracy: 0.404 MCC: 0.086 Global classification report:  precision recall f1-score support  Covid19 0.06 0.03 0.04 30  HIV 0.39 0.45 0.42 55 Healthy/Background 0.53 0.60 0.57 115  Lupus 0.07 0.05 0.06 40  accuracy 0.40 240  macro avg 0.26 0.28 0.27 240  weighted avg 0.36 0.40 0.38 240,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.479 +/- 0.016 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.479 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 30  HIV 0.00 0.00 0.00 55 Healthy/Background 0.48 1.00 0.65 115  Lupus 0.00 0.00 0.00 40  accuracy 0.48 240  macro avg 0.12 0.25 0.16 240  weighted avg 0.23 0.48 0.31 240
,,,
,,,
,,,


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.947 +/- 0.013 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.942 +/- 0.023 (in 3 folds),0.796 +/- 0.038 (in 3 folds),0.707 +/- 0.051 (in 3 folds),0.796,0.706,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.947 +/- 0.013 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.942 +/- 0.023 (in 3 folds),0.774 +/- 0.046 (in 3 folds),0.687 +/- 0.070 (in 3 folds),0.774,0.683,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
lasso_multiclass,0.947 +/- 0.012 (in 3 folds),0.949 +/- 0.015 (in 3 folds),0.938 +/- 0.021 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.796 +/- 0.034 (in 3 folds),0.714 +/- 0.050 (in 3 folds),0.796,0.713,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.947 +/- 0.012 (in 3 folds),0.949 +/- 0.015 (in 3 folds),0.938 +/- 0.021 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.785 +/- 0.026 (in 3 folds),0.705 +/- 0.021 (in 3 folds),0.785,0.702,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
rf_multiclass,0.942 +/- 0.006 (in 3 folds),0.943 +/- 0.006 (in 3 folds),0.934 +/- 0.007 (in 3 folds),0.937 +/- 0.007 (in 3 folds),0.796 +/- 0.032 (in 3 folds),0.699 +/- 0.045 (in 3 folds),0.796,0.697,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.942 +/- 0.006 (in 3 folds),0.943 +/- 0.006 (in 3 folds),0.934 +/- 0.007 (in 3 folds),0.937 +/- 0.007 (in 3 folds),0.754 +/- 0.021 (in 3 folds),0.662 +/- 0.029 (in 3 folds),0.754,0.659,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
ridge_cv,0.939 +/- 0.009 (in 3 folds),0.939 +/- 0.010 (in 3 folds),0.928 +/- 0.017 (in 3 folds),0.932 +/- 0.018 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,disease.separate_past_exposures,358.0,0.0,358.0,0.0,True
ridge_cv.decision_thresholds_tuned,0.939 +/- 0.009 (in 3 folds),0.939 +/- 0.010 (in 3 folds),0.928 +/- 0.017 (in 3 folds),0.932 +/- 0.018 (in 3 folds),0.710 +/- 0.052 (in 3 folds),0.592 +/- 0.096 (in 3 folds),0.709,0.584,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
xgboost,0.927 +/- 0.010 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.765 +/- 0.025 (in 3 folds),0.654 +/- 0.042 (in 3 folds),0.765,0.654,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False
xgboost.decision_thresholds_tuned,0.927 +/- 0.010 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.774 +/- 0.047 (in 3 folds),0.679 +/- 0.074 (in 3 folds),0.774,0.679,disease.separate_past_exposures,358.0,0.0,358.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.947 +/- 0.013 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.942 +/- 0.023 (in 3 folds),0.796 +/- 0.038 (in 3 folds),0.707 +/- 0.051 (in 3 folds),0.796,0.706,disease.separate_past_exposures,358,0,358,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.947 +/- 0.013 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.942 +/- 0.023 (in 3 folds),0.774 +/- 0.046 (in 3 folds),0.687 +/- 0.070 (in 3 folds),0.774,0.683,disease.separate_past_exposures,358,0,358,0.0,False
lasso_multiclass,0.947 +/- 0.012 (in 3 folds),0.949 +/- 0.015 (in 3 folds),0.938 +/- 0.021 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.796 +/- 0.034 (in 3 folds),0.714 +/- 0.050 (in 3 folds),0.796,0.713,disease.separate_past_exposures,358,0,358,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.947 +/- 0.012 (in 3 folds),0.949 +/- 0.015 (in 3 folds),0.938 +/- 0.021 (in 3 folds),0.942 +/- 0.022 (in 3 folds),0.785 +/- 0.026 (in 3 folds),0.705 +/- 0.021 (in 3 folds),0.785,0.702,disease.separate_past_exposures,358,0,358,0.0,False
rf_multiclass,0.942 +/- 0.006 (in 3 folds),0.943 +/- 0.006 (in 3 folds),0.934 +/- 0.007 (in 3 folds),0.937 +/- 0.007 (in 3 folds),0.796 +/- 0.032 (in 3 folds),0.699 +/- 0.045 (in 3 folds),0.796,0.697,disease.separate_past_exposures,358,0,358,0.0,False
rf_multiclass.decision_thresholds_tuned,0.942 +/- 0.006 (in 3 folds),0.943 +/- 0.006 (in 3 folds),0.934 +/- 0.007 (in 3 folds),0.937 +/- 0.007 (in 3 folds),0.754 +/- 0.021 (in 3 folds),0.662 +/- 0.029 (in 3 folds),0.754,0.659,disease.separate_past_exposures,358,0,358,0.0,False
ridge_cv,0.939 +/- 0.009 (in 3 folds),0.939 +/- 0.010 (in 3 folds),0.928 +/- 0.017 (in 3 folds),0.932 +/- 0.018 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,disease.separate_past_exposures,358,0,358,0.0,True
ridge_cv.decision_thresholds_tuned,0.939 +/- 0.009 (in 3 folds),0.939 +/- 0.010 (in 3 folds),0.928 +/- 0.017 (in 3 folds),0.932 +/- 0.018 (in 3 folds),0.710 +/- 0.052 (in 3 folds),0.592 +/- 0.096 (in 3 folds),0.709,0.584,disease.separate_past_exposures,358,0,358,0.0,False
xgboost,0.927 +/- 0.010 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.765 +/- 0.025 (in 3 folds),0.654 +/- 0.042 (in 3 folds),0.765,0.654,disease.separate_past_exposures,358,0,358,0.0,False
xgboost.decision_thresholds_tuned,0.927 +/- 0.010 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.774 +/- 0.047 (in 3 folds),0.679 +/- 0.074 (in 3 folds),0.774,0.679,disease.separate_past_exposures,358,0,358,0.0,False


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.947 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.947 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.939 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.942 +/- 0.023 (in 3 folds) Accuracy: 0.796 +/- 0.038 (in 3 folds) MCC: 0.707 +/- 0.051 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.796 MCC: 0.706 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.78 0.72 0.75 43  HIV 0.74 0.89 0.81 87 Healthy/Background 0.85 0.79 0.82 165  Lupus 0.77 0.75 0.76 63  accuracy 0.80 358  macro avg 0.78 0.78 0.78 358  weighted avg 0.80 0.80 0.80 358,Per-fold scores: ROC-AUC (weighted OvO): 0.947 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.947 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.939 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.942 +/- 0.023 (in 3 folds) Accuracy: 0.774 +/- 0.046 (in 3 folds) MCC: 0.687 +/- 0.070 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.774 MCC: 0.683 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.65 0.65 0.65 43  HIV 0.73 0.86 0.79 87 Healthy/Background 0.89 0.74 0.81 165  Lupus 0.69 0.83 0.75 63  accuracy 0.77 358  macro avg 0.74 0.77 0.75 358  weighted avg 0.79 0.77 0.78 358,Per-fold scores: ROC-AUC (weighted OvO): 0.947 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.949 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.938 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.942 +/- 0.022 (in 3 folds) Accuracy: 0.796 +/- 0.034 (in 3 folds) MCC: 0.714 +/- 0.050 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.796 MCC: 0.713 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.73 0.74 0.74 43  HIV 0.75 0.92 0.82 87 Healthy/Background 0.88 0.75 0.81 165  Lupus 0.74 0.79 0.76 63  accuracy 0.80 358  macro avg 0.77 0.80 0.78 358  weighted avg 0.81 0.80 0.80 358,Per-fold scores: ROC-AUC (weighted OvO): 0.947 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.949 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.938 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.942 +/- 0.022 (in 3 folds) Accuracy: 0.785 +/- 0.026 (in 3 folds) MCC: 0.705 +/- 0.021 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.785 MCC: 0.702 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.68 0.74 0.71 43  HIV 0.74 0.92 0.82 87 Healthy/Background 0.89 0.72 0.79 165  Lupus 0.72 0.81 0.76 63  accuracy 0.78 358  macro avg 0.76 0.80 0.77 358  weighted avg 0.80 0.78 0.78 358
,,,
,,,


rf_multiclass,rf_multiclass.decision_thresholds_tuned,ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.943 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.934 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.937 +/- 0.007 (in 3 folds) Accuracy: 0.796 +/- 0.032 (in 3 folds) MCC: 0.699 +/- 0.045 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.796 MCC: 0.697 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.96 0.51 0.67 43  HIV 0.78 0.85 0.81 87 Healthy/Background 0.77 0.86 0.81 165  Lupus 0.84 0.75 0.79 63  accuracy 0.80 358  macro avg 0.84 0.74 0.77 358  weighted avg 0.81 0.80 0.79 358,Per-fold scores: ROC-AUC (weighted OvO): 0.942 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.943 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.934 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.937 +/- 0.007 (in 3 folds) Accuracy: 0.754 +/- 0.021 (in 3 folds) MCC: 0.662 +/- 0.029 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.754 MCC: 0.659 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.72 0.67 0.70 43  HIV 0.68 0.94 0.79 87 Healthy/Background 0.87 0.69 0.77 165  Lupus 0.67 0.71 0.69 63  accuracy 0.75 358  macro avg 0.74 0.76 0.74 358  weighted avg 0.77 0.75 0.75 358,Per-fold scores: ROC-AUC (weighted OvO): 0.939 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.939 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.928 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.932 +/- 0.018 (in 3 folds) Accuracy: 0.461 +/- 0.034 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.461 MCC: 0.000 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.00 0.00 0.00 43  HIV 0.00 0.00 0.00 87 Healthy/Background 0.46 1.00 0.63 165  Lupus 0.00 0.00 0.00 63  accuracy 0.46 358  macro avg 0.12 0.25 0.16 358  weighted avg 0.21 0.46 0.29 358,Per-fold scores: ROC-AUC (weighted OvO): 0.939 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.939 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.928 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.932 +/- 0.018 (in 3 folds) Accuracy: 0.710 +/- 0.052 (in 3 folds) MCC: 0.592 +/- 0.096 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.709 MCC: 0.584 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.60 0.56 0.58 43  HIV 0.61 0.86 0.71 87 Healthy/Background 0.83 0.75 0.79 165  Lupus 0.69 0.49 0.57 63  accuracy 0.71 358  macro avg 0.68 0.67 0.66 358  weighted avg 0.72 0.71 0.71 358
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned,elasticnet_cv,elasticnet_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.925 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.018 (in 3 folds) Accuracy: 0.765 +/- 0.025 (in 3 folds) MCC: 0.654 +/- 0.042 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.765 MCC: 0.654 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.68 0.49 0.57 43  HIV 0.77 0.86 0.82 87 Healthy/Background 0.81 0.83 0.82 165  Lupus 0.68 0.65 0.67 63  accuracy 0.77 358  macro avg 0.73 0.71 0.72 358  weighted avg 0.76 0.77 0.76 358,Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.925 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.018 (in 3 folds) Accuracy: 0.774 +/- 0.047 (in 3 folds) MCC: 0.679 +/- 0.074 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.774 MCC: 0.679 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.76 0.58 0.66 43  HIV 0.73 0.86 0.79 87 Healthy/Background 0.88 0.76 0.82 165  Lupus 0.65 0.81 0.72 63  accuracy 0.77 358  macro avg 0.75 0.75 0.75 358  weighted avg 0.79 0.77 0.77 358,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.922 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.908 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.911 +/- 0.015 (in 3 folds) Accuracy: 0.759 +/- 0.078 (in 3 folds) MCC: 0.645 +/- 0.105 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.760 MCC: 0.644 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.85 0.40 0.54 43  HIV 0.71 0.89 0.79 87 Healthy/Background 0.76 0.82 0.79 165  Lupus 0.83 0.68 0.75 63  accuracy 0.76 358  macro avg 0.79 0.70 0.72 358  weighted avg 0.77 0.76 0.75 358,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.007 (in 3 folds) ROC-AUC (macro OvO): 0.922 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.908 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.911 +/- 0.015 (in 3 folds) Accuracy: 0.771 +/- 0.019 (in 3 folds) MCC: 0.693 +/- 0.025 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.771 MCC: 0.691 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.62 0.74 0.67 43  HIV 0.72 0.95 0.82 87 Healthy/Background 0.95 0.69 0.80 165  Lupus 0.66 0.75 0.70 63  accuracy 0.77 358  macro avg 0.74 0.78 0.75 358  weighted avg 0.80 0.77 0.77 358
,,,
,,,


---

# GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.726 +/- 0.021 (in 3 folds),0.745 +/- 0.033 (in 3 folds),0.761 +/- 0.004 (in 3 folds),0.776 +/- 0.010 (in 3 folds),0.618 +/- 0.169 (in 3 folds),0.375 +/- 0.326 (in 3 folds),0.617,0.415,115.0,0.0,115.0,0.0,True
ridge_cv,0.723 +/- 0.009 (in 3 folds),0.750 +/- 0.018 (in 3 folds),0.746 +/- 0.021 (in 3 folds),0.762 +/- 0.032 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115.0,0.0,115.0,0.0,True
elasticnet_cv,0.721 +/- 0.017 (in 3 folds),0.738 +/- 0.018 (in 3 folds),0.759 +/- 0.007 (in 3 folds),0.775 +/- 0.006 (in 3 folds),0.567 +/- 0.114 (in 3 folds),0.178 +/- 0.308 (in 3 folds),0.565,0.296,115.0,0.0,115.0,0.0,True
lasso_multiclass,0.689 +/- 0.033 (in 3 folds),0.713 +/- 0.045 (in 3 folds),0.731 +/- 0.022 (in 3 folds),0.750 +/- 0.039 (in 3 folds),0.592 +/- 0.114 (in 3 folds),0.370 +/- 0.119 (in 3 folds),0.591,0.362,115.0,0.0,115.0,0.0,False
linearsvm_ovr,0.683 +/- 0.019 (in 3 folds),0.704 +/- 0.028 (in 3 folds),0.726 +/- 0.019 (in 3 folds),0.744 +/- 0.041 (in 3 folds),0.584 +/- 0.110 (in 3 folds),0.326 +/- 0.170 (in 3 folds),0.583,0.32,115.0,0.0,115.0,0.0,False
rf_multiclass,0.656 +/- 0.043 (in 3 folds),0.655 +/- 0.050 (in 3 folds),0.691 +/- 0.024 (in 3 folds),0.679 +/- 0.029 (in 3 folds),0.687 +/- 0.052 (in 3 folds),0.532 +/- 0.023 (in 3 folds),0.687,0.532,115.0,0.0,115.0,0.0,True
xgboost,0.610 +/- 0.080 (in 3 folds),0.619 +/- 0.088 (in 3 folds),0.695 +/- 0.038 (in 3 folds),0.695 +/- 0.043 (in 3 folds),0.653 +/- 0.079 (in 3 folds),0.431 +/- 0.094 (in 3 folds),0.652,0.431,115.0,0.0,115.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115.0,0.0,115.0,0.0,True
dummy_stratified,0.482 +/- 0.031 (in 3 folds),0.481 +/- 0.030 (in 3 folds),0.502 +/- 0.008 (in 3 folds),0.501 +/- 0.008 (in 3 folds),0.401 +/- 0.082 (in 3 folds),-0.034 +/- 0.088 (in 3 folds),0.4,-0.035,115.0,0.0,115.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.726 +/- 0.021 (in 3 folds),0.745 +/- 0.033 (in 3 folds),0.761 +/- 0.004 (in 3 folds),0.776 +/- 0.010 (in 3 folds),0.618 +/- 0.169 (in 3 folds),0.375 +/- 0.326 (in 3 folds),0.617,0.415,115,0,115,0.0,True
ridge_cv,0.723 +/- 0.009 (in 3 folds),0.750 +/- 0.018 (in 3 folds),0.746 +/- 0.021 (in 3 folds),0.762 +/- 0.032 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115,0,115,0.0,True
elasticnet_cv,0.721 +/- 0.017 (in 3 folds),0.738 +/- 0.018 (in 3 folds),0.759 +/- 0.007 (in 3 folds),0.775 +/- 0.006 (in 3 folds),0.567 +/- 0.114 (in 3 folds),0.178 +/- 0.308 (in 3 folds),0.565,0.296,115,0,115,0.0,True
lasso_multiclass,0.689 +/- 0.033 (in 3 folds),0.713 +/- 0.045 (in 3 folds),0.731 +/- 0.022 (in 3 folds),0.750 +/- 0.039 (in 3 folds),0.592 +/- 0.114 (in 3 folds),0.370 +/- 0.119 (in 3 folds),0.591,0.362,115,0,115,0.0,False
linearsvm_ovr,0.683 +/- 0.019 (in 3 folds),0.704 +/- 0.028 (in 3 folds),0.726 +/- 0.019 (in 3 folds),0.744 +/- 0.041 (in 3 folds),0.584 +/- 0.110 (in 3 folds),0.326 +/- 0.170 (in 3 folds),0.583,0.32,115,0,115,0.0,False
rf_multiclass,0.656 +/- 0.043 (in 3 folds),0.655 +/- 0.050 (in 3 folds),0.691 +/- 0.024 (in 3 folds),0.679 +/- 0.029 (in 3 folds),0.687 +/- 0.052 (in 3 folds),0.532 +/- 0.023 (in 3 folds),0.687,0.532,115,0,115,0.0,True
xgboost,0.610 +/- 0.080 (in 3 folds),0.619 +/- 0.088 (in 3 folds),0.695 +/- 0.038 (in 3 folds),0.695 +/- 0.043 (in 3 folds),0.653 +/- 0.079 (in 3 folds),0.431 +/- 0.094 (in 3 folds),0.652,0.431,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.504 +/- 0.098 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.0,115,0,115,0.0,True
dummy_stratified,0.482 +/- 0.031 (in 3 folds),0.481 +/- 0.030 (in 3 folds),0.502 +/- 0.008 (in 3 folds),0.501 +/- 0.008 (in 3 folds),0.401 +/- 0.082 (in 3 folds),-0.034 +/- 0.088 (in 3 folds),0.4,-0.035,115,0,115,0.0,True


lasso_cv,ridge_cv,elasticnet_cv,lasso_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.726 +/- 0.021 (in 3 folds) ROC-AUC (macro OvO): 0.745 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.761 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.776 +/- 0.010 (in 3 folds) Accuracy: 0.618 +/- 0.169 (in 3 folds) MCC: 0.375 +/- 0.326 (in 3 folds) Global scores: Accuracy: 0.617 MCC: 0.415 Global classification report:  precision recall f1-score support  African 1.00 0.59 0.74 22  Asian 0.00 0.00 0.00 30  Caucasian 0.57 1.00 0.72 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.62 115  macro avg 0.39 0.40 0.37 115  weighted avg 0.48 0.62 0.51 115,Per-fold scores: ROC-AUC (weighted OvO): 0.723 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.750 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.746 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.762 +/- 0.032 (in 3 folds) Accuracy: 0.504 +/- 0.098 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.504 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 22  Asian 0.00 0.00 0.00 30  Caucasian 0.50 1.00 0.67 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.50 115  macro avg 0.13 0.25 0.17 115  weighted avg 0.25 0.50 0.34 115,Per-fold scores: ROC-AUC (weighted OvO): 0.721 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.738 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.759 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.775 +/- 0.006 (in 3 folds) Accuracy: 0.567 +/- 0.114 (in 3 folds) MCC: 0.178 +/- 0.308 (in 3 folds) Global scores: Accuracy: 0.565 MCC: 0.296 Global classification report:  precision recall f1-score support  African 1.00 0.32 0.48 22  Asian 0.00 0.00 0.00 30  Caucasian 0.54 1.00 0.70 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.57 115  macro avg 0.38 0.33 0.30 115  weighted avg 0.46 0.57 0.44 115,Per-fold scores: ROC-AUC (weighted OvO): 0.689 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.713 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.731 +/- 0.022 (in 3 folds) au-PRC (macro OvO): 0.750 +/- 0.039 (in 3 folds) Accuracy: 0.592 +/- 0.114 (in 3 folds) MCC: 0.370 +/- 0.119 (in 3 folds) Global scores: Accuracy: 0.591 MCC: 0.362 Global classification report:  precision recall f1-score support  African 1.00 0.95 0.98 22  Asian 0.39 0.37 0.38 30  Caucasian 0.61 0.62 0.62 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.59 115  macro avg 0.50 0.49 0.49 115  weighted avg 0.60 0.59 0.60 115
,,,
,,,
,,,


linearsvm_ovr,rf_multiclass,xgboost,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.683 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.704 +/- 0.028 (in 3 folds) au-PRC (weighted OvO): 0.726 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.744 +/- 0.041 (in 3 folds) Accuracy: 0.584 +/- 0.110 (in 3 folds) MCC: 0.326 +/- 0.170 (in 3 folds) Global scores: Accuracy: 0.583 MCC: 0.320 Global classification report:  precision recall f1-score support  African 0.95 0.95 0.95 22  Asian 0.27 0.20 0.23 30  Caucasian 0.58 0.69 0.63 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.58 115  macro avg 0.45 0.46 0.45 115  weighted avg 0.55 0.58 0.56 115,Per-fold scores: ROC-AUC (weighted OvO): 0.656 +/- 0.043 (in 3 folds) ROC-AUC (macro OvO): 0.655 +/- 0.050 (in 3 folds) au-PRC (weighted OvO): 0.691 +/- 0.024 (in 3 folds) au-PRC (macro OvO): 0.679 +/- 0.029 (in 3 folds) Accuracy: 0.687 +/- 0.052 (in 3 folds) MCC: 0.532 +/- 0.023 (in 3 folds) Global scores: Accuracy: 0.687 MCC: 0.532 Global classification report:  precision recall f1-score support  African 0.95 0.95 0.95 22  Asian 0.50 0.03 0.06 30  Caucasian 0.63 0.98 0.77 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.69 115  macro avg 0.52 0.49 0.45 115  weighted avg 0.63 0.69 0.58 115,Per-fold scores: ROC-AUC (weighted OvO): 0.610 +/- 0.080 (in 3 folds) ROC-AUC (macro OvO): 0.619 +/- 0.088 (in 3 folds) au-PRC (weighted OvO): 0.695 +/- 0.038 (in 3 folds) au-PRC (macro OvO): 0.695 +/- 0.043 (in 3 folds) Accuracy: 0.653 +/- 0.079 (in 3 folds) MCC: 0.431 +/- 0.094 (in 3 folds) Global scores: Accuracy: 0.652 MCC: 0.431 Global classification report:  precision recall f1-score support  African 0.95 0.91 0.93 22  Asian 0.36 0.17 0.23 30  Caucasian 0.63 0.86 0.73 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.65 115  macro avg 0.49 0.48 0.47 115  weighted avg 0.59 0.65 0.61 115,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.504 +/- 0.098 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.504 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 22  Asian 0.00 0.00 0.00 30  Caucasian 0.50 1.00 0.67 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.50 115  macro avg 0.13 0.25 0.17 115  weighted avg 0.25 0.50 0.34 115
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.482 +/- 0.031 (in 3 folds) ROC-AUC (macro OvO): 0.481 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.502 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.501 +/- 0.008 (in 3 folds) Accuracy: 0.401 +/- 0.082 (in 3 folds) MCC: -0.034 +/- 0.088 (in 3 folds) Global scores: Accuracy: 0.400 MCC: -0.035 Global classification report:  precision recall f1-score support  African 0.09 0.05 0.06 22  Asian 0.15 0.13 0.14 30  Caucasian 0.53 0.71 0.60 58 Hispanic/Latino 0.00 0.00 0.00 5  accuracy 0.40 115  macro avg 0.19 0.22 0.20 115  weighted avg 0.32 0.40 0.35 115


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.751 +/- 0.030 (in 3 folds),0.766 +/- 0.021 (in 3 folds),0.747 +/- 0.028 (in 3 folds),0.764 +/- 0.028 (in 3 folds),0.584 +/- 0.083 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.588,0.0,165.0,0.0,165.0,0.0,True
ridge_cv.decision_thresholds_tuned,0.751 +/- 0.030 (in 3 folds),0.766 +/- 0.021 (in 3 folds),0.747 +/- 0.028 (in 3 folds),0.764 +/- 0.028 (in 3 folds),0.504 +/- 0.300 (in 3 folds),0.191 +/- 0.332 (in 3 folds),0.509,0.145,165.0,0.0,165.0,0.0,True
linearsvm_ovr,0.734 +/- 0.071 (in 3 folds),0.747 +/- 0.077 (in 3 folds),0.761 +/- 0.060 (in 3 folds),0.780 +/- 0.071 (in 3 folds),0.705 +/- 0.050 (in 3 folds),0.469 +/- 0.123 (in 3 folds),0.703,0.465,165.0,0.0,165.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.734 +/- 0.071 (in 3 folds),0.747 +/- 0.077 (in 3 folds),0.761 +/- 0.060 (in 3 folds),0.780 +/- 0.071 (in 3 folds),0.732 +/- 0.060 (in 3 folds),0.519 +/- 0.075 (in 3 folds),0.733,0.52,165.0,0.0,165.0,0.0,False
lasso_multiclass,0.733 +/- 0.058 (in 3 folds),0.762 +/- 0.065 (in 3 folds),0.753 +/- 0.046 (in 3 folds),0.786 +/- 0.052 (in 3 folds),0.592 +/- 0.067 (in 3 folds),0.341 +/- 0.131 (in 3 folds),0.588,0.33,165.0,0.0,165.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.733 +/- 0.058 (in 3 folds),0.762 +/- 0.065 (in 3 folds),0.753 +/- 0.046 (in 3 folds),0.786 +/- 0.052 (in 3 folds),0.738 +/- 0.052 (in 3 folds),0.544 +/- 0.037 (in 3 folds),0.739,0.544,165.0,0.0,165.0,0.0,True
rf_multiclass,0.730 +/- 0.030 (in 3 folds),0.747 +/- 0.033 (in 3 folds),0.731 +/- 0.027 (in 3 folds),0.746 +/- 0.063 (in 3 folds),0.725 +/- 0.051 (in 3 folds),0.499 +/- 0.031 (in 3 folds),0.727,0.501,165.0,0.0,165.0,0.0,True
rf_multiclass.decision_thresholds_tuned,0.730 +/- 0.030 (in 3 folds),0.747 +/- 0.033 (in 3 folds),0.731 +/- 0.027 (in 3 folds),0.746 +/- 0.063 (in 3 folds),0.713 +/- 0.061 (in 3 folds),0.477 +/- 0.074 (in 3 folds),0.715,0.473,165.0,0.0,165.0,0.0,True
elasticnet_cv,0.695 +/- 0.011 (in 3 folds),0.714 +/- 0.030 (in 3 folds),0.721 +/- 0.027 (in 3 folds),0.736 +/- 0.045 (in 3 folds),0.631 +/- 0.097 (in 3 folds),0.168 +/- 0.290 (in 3 folds),0.636,0.294,165.0,0.0,165.0,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.695 +/- 0.011 (in 3 folds),0.714 +/- 0.030 (in 3 folds),0.721 +/- 0.027 (in 3 folds),0.736 +/- 0.045 (in 3 folds),0.701 +/- 0.043 (in 3 folds),0.452 +/- 0.044 (in 3 folds),0.703,0.455,165.0,0.0,165.0,0.0,True

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.751 +/- 0.030 (in 3 folds),0.766 +/- 0.021 (in 3 folds),0.747 +/- 0.028 (in 3 folds),0.764 +/- 0.028 (in 3 folds),0.584 +/- 0.083 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.588,0.0,165,0,165,0.0,True
ridge_cv.decision_thresholds_tuned,0.751 +/- 0.030 (in 3 folds),0.766 +/- 0.021 (in 3 folds),0.747 +/- 0.028 (in 3 folds),0.764 +/- 0.028 (in 3 folds),0.504 +/- 0.300 (in 3 folds),0.191 +/- 0.332 (in 3 folds),0.509,0.145,165,0,165,0.0,True
linearsvm_ovr,0.734 +/- 0.071 (in 3 folds),0.747 +/- 0.077 (in 3 folds),0.761 +/- 0.060 (in 3 folds),0.780 +/- 0.071 (in 3 folds),0.705 +/- 0.050 (in 3 folds),0.469 +/- 0.123 (in 3 folds),0.703,0.465,165,0,165,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.734 +/- 0.071 (in 3 folds),0.747 +/- 0.077 (in 3 folds),0.761 +/- 0.060 (in 3 folds),0.780 +/- 0.071 (in 3 folds),0.732 +/- 0.060 (in 3 folds),0.519 +/- 0.075 (in 3 folds),0.733,0.52,165,0,165,0.0,False
lasso_multiclass,0.733 +/- 0.058 (in 3 folds),0.762 +/- 0.065 (in 3 folds),0.753 +/- 0.046 (in 3 folds),0.786 +/- 0.052 (in 3 folds),0.592 +/- 0.067 (in 3 folds),0.341 +/- 0.131 (in 3 folds),0.588,0.33,165,0,165,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.733 +/- 0.058 (in 3 folds),0.762 +/- 0.065 (in 3 folds),0.753 +/- 0.046 (in 3 folds),0.786 +/- 0.052 (in 3 folds),0.738 +/- 0.052 (in 3 folds),0.544 +/- 0.037 (in 3 folds),0.739,0.544,165,0,165,0.0,True
rf_multiclass,0.730 +/- 0.030 (in 3 folds),0.747 +/- 0.033 (in 3 folds),0.731 +/- 0.027 (in 3 folds),0.746 +/- 0.063 (in 3 folds),0.725 +/- 0.051 (in 3 folds),0.499 +/- 0.031 (in 3 folds),0.727,0.501,165,0,165,0.0,True
rf_multiclass.decision_thresholds_tuned,0.730 +/- 0.030 (in 3 folds),0.747 +/- 0.033 (in 3 folds),0.731 +/- 0.027 (in 3 folds),0.746 +/- 0.063 (in 3 folds),0.713 +/- 0.061 (in 3 folds),0.477 +/- 0.074 (in 3 folds),0.715,0.473,165,0,165,0.0,True
elasticnet_cv,0.695 +/- 0.011 (in 3 folds),0.714 +/- 0.030 (in 3 folds),0.721 +/- 0.027 (in 3 folds),0.736 +/- 0.045 (in 3 folds),0.631 +/- 0.097 (in 3 folds),0.168 +/- 0.290 (in 3 folds),0.636,0.294,165,0,165,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.695 +/- 0.011 (in 3 folds),0.714 +/- 0.030 (in 3 folds),0.721 +/- 0.027 (in 3 folds),0.736 +/- 0.045 (in 3 folds),0.701 +/- 0.043 (in 3 folds),0.452 +/- 0.044 (in 3 folds),0.703,0.455,165,0,165,0.0,True


ridge_cv,ridge_cv.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.751 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.766 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.747 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.764 +/- 0.028 (in 3 folds) Accuracy: 0.584 +/- 0.083 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.588 MCC: 0.000 Global classification report:  precision recall f1-score support  African 0.00 0.00 0.00 30  Asian 0.00 0.00 0.00 32  Caucasian 0.59 1.00 0.74 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.59 165  macro avg 0.15 0.25 0.19 165  weighted avg 0.35 0.59 0.44 165,Per-fold scores: ROC-AUC (weighted OvO): 0.751 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.766 +/- 0.021 (in 3 folds) au-PRC (weighted OvO): 0.747 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.764 +/- 0.028 (in 3 folds) Accuracy: 0.504 +/- 0.300 (in 3 folds) MCC: 0.191 +/- 0.332 (in 3 folds) Global scores: Accuracy: 0.509 MCC: 0.145 Global classification report:  precision recall f1-score support  African 0.29 0.60 0.39 30  Asian 0.00 0.00 0.00 32  Caucasian 0.65 0.68 0.66 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.51 165  macro avg 0.23 0.32 0.26 165  weighted avg 0.43 0.51 0.46 165,Per-fold scores: ROC-AUC (weighted OvO): 0.734 +/- 0.071 (in 3 folds) ROC-AUC (macro OvO): 0.747 +/- 0.077 (in 3 folds) au-PRC (weighted OvO): 0.761 +/- 0.060 (in 3 folds) au-PRC (macro OvO): 0.780 +/- 0.071 (in 3 folds) Accuracy: 0.705 +/- 0.050 (in 3 folds) MCC: 0.469 +/- 0.123 (in 3 folds) Global scores: Accuracy: 0.703 MCC: 0.465 Global classification report:  precision recall f1-score support  African 0.87 0.87 0.87 30  Asian 0.42 0.34 0.38 32  Caucasian 0.73 0.81 0.77 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 165  macro avg 0.51 0.51 0.50 165  weighted avg 0.67 0.70 0.68 165,Per-fold scores: ROC-AUC (weighted OvO): 0.734 +/- 0.071 (in 3 folds) ROC-AUC (macro OvO): 0.747 +/- 0.077 (in 3 folds) au-PRC (weighted OvO): 0.761 +/- 0.060 (in 3 folds) au-PRC (macro OvO): 0.780 +/- 0.071 (in 3 folds) Accuracy: 0.732 +/- 0.060 (in 3 folds) MCC: 0.519 +/- 0.075 (in 3 folds) Global scores: Accuracy: 0.733 MCC: 0.520 Global classification report:  precision recall f1-score support  African 0.96 0.87 0.91 30  Asian 0.33 0.03 0.06 32  Caucasian 0.71 0.97 0.82 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.73 165  macro avg 0.50 0.47 0.45 165  weighted avg 0.66 0.73 0.66 165
,,,
,,,


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.733 +/- 0.058 (in 3 folds) ROC-AUC (macro OvO): 0.762 +/- 0.065 (in 3 folds) au-PRC (weighted OvO): 0.753 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.786 +/- 0.052 (in 3 folds) Accuracy: 0.592 +/- 0.067 (in 3 folds) MCC: 0.341 +/- 0.131 (in 3 folds) Global scores: Accuracy: 0.588 MCC: 0.330 Global classification report:  precision recall f1-score support  African 0.87 0.87 0.87 30  Asian 0.28 0.38 0.32 32  Caucasian 0.69 0.61 0.65 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.59 165  macro avg 0.46 0.46 0.46 165  weighted avg 0.62 0.59 0.60 165,Per-fold scores: ROC-AUC (weighted OvO): 0.733 +/- 0.058 (in 3 folds) ROC-AUC (macro OvO): 0.762 +/- 0.065 (in 3 folds) au-PRC (weighted OvO): 0.753 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.786 +/- 0.052 (in 3 folds) Accuracy: 0.738 +/- 0.052 (in 3 folds) MCC: 0.544 +/- 0.037 (in 3 folds) Global scores: Accuracy: 0.739 MCC: 0.544 Global classification report:  precision recall f1-score support  African 0.96 0.87 0.91 30  Asian 0.00 0.00 0.00 32  Caucasian 0.70 0.99 0.82 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.74 165  macro avg 0.42 0.46 0.43 165  weighted avg 0.59 0.74 0.65 165,Per-fold scores: ROC-AUC (weighted OvO): 0.730 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.747 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.731 +/- 0.027 (in 3 folds) au-PRC (macro OvO): 0.746 +/- 0.063 (in 3 folds) Accuracy: 0.725 +/- 0.051 (in 3 folds) MCC: 0.499 +/- 0.031 (in 3 folds) Global scores: Accuracy: 0.727 MCC: 0.501 Global classification report:  precision recall f1-score support  African 0.96 0.83 0.89 30  Asian 0.33 0.06 0.11 32  Caucasian 0.70 0.96 0.81 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.73 165  macro avg 0.50 0.46 0.45 165  weighted avg 0.65 0.73 0.66 165,Per-fold scores: ROC-AUC (weighted OvO): 0.730 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.747 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.731 +/- 0.027 (in 3 folds) au-PRC (macro OvO): 0.746 +/- 0.063 (in 3 folds) Accuracy: 0.713 +/- 0.061 (in 3 folds) MCC: 0.477 +/- 0.074 (in 3 folds) Global scores: Accuracy: 0.715 MCC: 0.473 Global classification report:  precision recall f1-score support  African 0.93 0.83 0.88 30  Asian 0.33 0.06 0.11 32  Caucasian 0.69 0.94 0.79 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.72 165  macro avg 0.49 0.46 0.44 165  weighted avg 0.64 0.72 0.65 165
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.695 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.714 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.721 +/- 0.027 (in 3 folds) au-PRC (macro OvO): 0.736 +/- 0.045 (in 3 folds) Accuracy: 0.631 +/- 0.097 (in 3 folds) MCC: 0.168 +/- 0.290 (in 3 folds) Global scores: Accuracy: 0.636 MCC: 0.294 Global classification report:  precision recall f1-score support  African 1.00 0.27 0.42 30  Asian 0.00 0.00 0.00 32  Caucasian 0.62 1.00 0.76 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.64 165  macro avg 0.40 0.32 0.30 165  weighted avg 0.55 0.64 0.53 165,Per-fold scores: ROC-AUC (weighted OvO): 0.695 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.714 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.721 +/- 0.027 (in 3 folds) au-PRC (macro OvO): 0.736 +/- 0.045 (in 3 folds) Accuracy: 0.701 +/- 0.043 (in 3 folds) MCC: 0.452 +/- 0.044 (in 3 folds) Global scores: Accuracy: 0.703 MCC: 0.455 Global classification report:  precision recall f1-score support  African 0.85 0.77 0.81 30  Asian 0.00 0.00 0.00 32  Caucasian 0.67 0.96 0.79 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 165  macro avg 0.38 0.43 0.40 165  weighted avg 0.55 0.70 0.61 165,Per-fold scores: ROC-AUC (weighted OvO): 0.691 +/- 0.078 (in 3 folds) ROC-AUC (macro OvO): 0.695 +/- 0.092 (in 3 folds) au-PRC (weighted OvO): 0.692 +/- 0.047 (in 3 folds) au-PRC (macro OvO): 0.698 +/- 0.075 (in 3 folds) Accuracy: 0.673 +/- 0.011 (in 3 folds) MCC: 0.386 +/- 0.081 (in 3 folds) Global scores: Accuracy: 0.673 MCC: 0.385 Global classification report:  precision recall f1-score support  African 0.88 0.73 0.80 30  Asian 0.29 0.16 0.20 32  Caucasian 0.69 0.87 0.77 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.67 165  macro avg 0.47 0.44 0.44 165  weighted avg 0.62 0.67 0.64 165,Per-fold scores: ROC-AUC (weighted OvO): 0.691 +/- 0.078 (in 3 folds) ROC-AUC (macro OvO): 0.695 +/- 0.092 (in 3 folds) au-PRC (weighted OvO): 0.692 +/- 0.047 (in 3 folds) au-PRC (macro OvO): 0.698 +/- 0.075 (in 3 folds) Accuracy: 0.685 +/- 0.068 (in 3 folds) MCC: 0.412 +/- 0.145 (in 3 folds) Global scores: Accuracy: 0.685 MCC: 0.407 Global classification report:  precision recall f1-score support  African 0.88 0.73 0.80 30  Asian 0.25 0.03 0.06 32  Caucasian 0.67 0.93 0.78 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.68 165  macro avg 0.45 0.42 0.41 165  weighted avg 0.60 0.68 0.61 165
,,,
,,,


lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.691 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.710 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.723 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.735 +/- 0.048 (in 3 folds) Accuracy: 0.698 +/- 0.021 (in 3 folds) MCC: 0.353 +/- 0.307 (in 3 folds) Global scores: Accuracy: 0.697 MCC: 0.449 Global classification report:  precision recall f1-score support  African 0.95 0.63 0.76 30  Asian 0.00 0.00 0.00 32  Caucasian 0.66 0.99 0.79 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.70 165  macro avg 0.40 0.41 0.39 165  weighted avg 0.56 0.70 0.60 165,Per-fold scores: ROC-AUC (weighted OvO): 0.691 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.710 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.723 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.735 +/- 0.048 (in 3 folds) Accuracy: 0.721 +/- 0.025 (in 3 folds) MCC: 0.498 +/- 0.075 (in 3 folds) Global scores: Accuracy: 0.721 MCC: 0.502 Global classification report:  precision recall f1-score support  African 0.92 0.80 0.86 30  Asian 0.00 0.00 0.00 32  Caucasian 0.68 0.98 0.81 97 Hispanic/Latino 0.00 0.00 0.00 6  accuracy 0.72 165  macro avg 0.40 0.44 0.42 165  weighted avg 0.57 0.72 0.63 165
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.699 +/- 0.066 (in 3 folds),0.677 +/- 0.089 (in 3 folds),0.731 +/- 0.069 (in 3 folds),0.715 +/- 0.086 (in 3 folds),0.427 +/- 0.038 (in 3 folds),0.316 +/- 0.048 (in 3 folds),0.426,0.315,115.0,0.0,115.0,0.0,True
linearsvm_ovr,0.692 +/- 0.066 (in 3 folds),0.668 +/- 0.093 (in 3 folds),0.730 +/- 0.064 (in 3 folds),0.713 +/- 0.084 (in 3 folds),0.427 +/- 0.053 (in 3 folds),0.313 +/- 0.058 (in 3 folds),0.426,0.309,115.0,0.0,115.0,0.0,True
xgboost,0.689 +/- 0.053 (in 3 folds),0.675 +/- 0.057 (in 3 folds),0.722 +/- 0.057 (in 3 folds),0.709 +/- 0.063 (in 3 folds),0.384 +/- 0.067 (in 3 folds),0.272 +/- 0.075 (in 3 folds),0.383,0.258,115.0,0.0,115.0,0.0,True
rf_multiclass,0.687 +/- 0.118 (in 3 folds),0.668 +/- 0.141 (in 3 folds),0.731 +/- 0.097 (in 3 folds),0.717 +/- 0.112 (in 3 folds),0.453 +/- 0.065 (in 3 folds),0.362 +/- 0.076 (in 3 folds),0.452,0.34,115.0,0.0,115.0,0.0,True
ridge_cv,0.672 +/- 0.151 (in 3 folds),0.669 +/- 0.149 (in 3 folds),0.698 +/- 0.172 (in 3 folds),0.696 +/- 0.170 (in 3 folds),0.225 +/- 0.034 (in 3 folds),0.046 +/- 0.080 (in 3 folds),0.226,0.047,115.0,0.0,115.0,0.0,True
lasso_cv,0.667 +/- 0.020 (in 3 folds),0.654 +/- 0.028 (in 3 folds),0.722 +/- 0.019 (in 3 folds),0.713 +/- 0.028 (in 3 folds),0.340 +/- 0.062 (in 3 folds),0.257 +/- 0.077 (in 3 folds),0.339,0.205,115.0,0.0,115.0,0.0,True
elasticnet_cv,0.665 +/- 0.020 (in 3 folds),0.652 +/- 0.030 (in 3 folds),0.720 +/- 0.023 (in 3 folds),0.711 +/- 0.032 (in 3 folds),0.305 +/- 0.048 (in 3 folds),0.237 +/- 0.074 (in 3 folds),0.304,0.162,115.0,0.0,115.0,0.0,True
dummy_stratified,0.515 +/- 0.025 (in 3 folds),0.511 +/- 0.027 (in 3 folds),0.535 +/- 0.009 (in 3 folds),0.534 +/- 0.009 (in 3 folds),0.201 +/- 0.045 (in 3 folds),0.042 +/- 0.051 (in 3 folds),0.2,0.035,115.0,0.0,115.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.208 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.209,0.012,115.0,0.0,115.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.699 +/- 0.066 (in 3 folds),0.677 +/- 0.089 (in 3 folds),0.731 +/- 0.069 (in 3 folds),0.715 +/- 0.086 (in 3 folds),0.427 +/- 0.038 (in 3 folds),0.316 +/- 0.048 (in 3 folds),0.426,0.315,115,0,115,0.0,True
linearsvm_ovr,0.692 +/- 0.066 (in 3 folds),0.668 +/- 0.093 (in 3 folds),0.730 +/- 0.064 (in 3 folds),0.713 +/- 0.084 (in 3 folds),0.427 +/- 0.053 (in 3 folds),0.313 +/- 0.058 (in 3 folds),0.426,0.309,115,0,115,0.0,True
xgboost,0.689 +/- 0.053 (in 3 folds),0.675 +/- 0.057 (in 3 folds),0.722 +/- 0.057 (in 3 folds),0.709 +/- 0.063 (in 3 folds),0.384 +/- 0.067 (in 3 folds),0.272 +/- 0.075 (in 3 folds),0.383,0.258,115,0,115,0.0,True
rf_multiclass,0.687 +/- 0.118 (in 3 folds),0.668 +/- 0.141 (in 3 folds),0.731 +/- 0.097 (in 3 folds),0.717 +/- 0.112 (in 3 folds),0.453 +/- 0.065 (in 3 folds),0.362 +/- 0.076 (in 3 folds),0.452,0.34,115,0,115,0.0,True
ridge_cv,0.672 +/- 0.151 (in 3 folds),0.669 +/- 0.149 (in 3 folds),0.698 +/- 0.172 (in 3 folds),0.696 +/- 0.170 (in 3 folds),0.225 +/- 0.034 (in 3 folds),0.046 +/- 0.080 (in 3 folds),0.226,0.047,115,0,115,0.0,True
lasso_cv,0.667 +/- 0.020 (in 3 folds),0.654 +/- 0.028 (in 3 folds),0.722 +/- 0.019 (in 3 folds),0.713 +/- 0.028 (in 3 folds),0.340 +/- 0.062 (in 3 folds),0.257 +/- 0.077 (in 3 folds),0.339,0.205,115,0,115,0.0,True
elasticnet_cv,0.665 +/- 0.020 (in 3 folds),0.652 +/- 0.030 (in 3 folds),0.720 +/- 0.023 (in 3 folds),0.711 +/- 0.032 (in 3 folds),0.305 +/- 0.048 (in 3 folds),0.237 +/- 0.074 (in 3 folds),0.304,0.162,115,0,115,0.0,True
dummy_stratified,0.515 +/- 0.025 (in 3 folds),0.511 +/- 0.027 (in 3 folds),0.535 +/- 0.009 (in 3 folds),0.534 +/- 0.009 (in 3 folds),0.201 +/- 0.045 (in 3 folds),0.042 +/- 0.051 (in 3 folds),0.2,0.035,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.208 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.209,0.012,115,0,115,0.0,True


lasso_multiclass,linearsvm_ovr,xgboost,rf_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.699 +/- 0.066 (in 3 folds) ROC-AUC (macro OvO): 0.677 +/- 0.089 (in 3 folds) au-PRC (weighted OvO): 0.731 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.715 +/- 0.086 (in 3 folds) Accuracy: 0.427 +/- 0.038 (in 3 folds) MCC: 0.316 +/- 0.048 (in 3 folds) Global scores: Accuracy: 0.426 MCC: 0.315 Global classification report:  precision recall f1-score support  20-30 0.48 0.45 0.47 22  30-40 0.00 0.00 0.00 14  40-50 0.30 0.40 0.34 15  50-60 0.35 0.26 0.30 23  60-70 0.27 0.47 0.34 15  70-80 0.00 0.00 0.00 2  <20 0.91 0.83 0.87 24  accuracy 0.43 115  macro avg 0.33 0.35 0.33 115 weighted avg 0.43 0.43 0.42 115,Per-fold scores: ROC-AUC (weighted OvO): 0.692 +/- 0.066 (in 3 folds) ROC-AUC (macro OvO): 0.668 +/- 0.093 (in 3 folds) au-PRC (weighted OvO): 0.730 +/- 0.064 (in 3 folds) au-PRC (macro OvO): 0.713 +/- 0.084 (in 3 folds) Accuracy: 0.427 +/- 0.053 (in 3 folds) MCC: 0.313 +/- 0.058 (in 3 folds) Global scores: Accuracy: 0.426 MCC: 0.309 Global classification report:  precision recall f1-score support  20-30 0.46 0.50 0.48 22  30-40 0.00 0.00 0.00 14  40-50 0.31 0.33 0.32 15  50-60 0.39 0.30 0.34 23  60-70 0.29 0.40 0.33 15  70-80 0.00 0.00 0.00 2  <20 0.80 0.83 0.82 24  accuracy 0.43 115  macro avg 0.32 0.34 0.33 115 weighted avg 0.41 0.43 0.42 115,Per-fold scores: ROC-AUC (weighted OvO): 0.689 +/- 0.053 (in 3 folds) ROC-AUC (macro OvO): 0.675 +/- 0.057 (in 3 folds) au-PRC (weighted OvO): 0.722 +/- 0.057 (in 3 folds) au-PRC (macro OvO): 0.709 +/- 0.063 (in 3 folds) Accuracy: 0.384 +/- 0.067 (in 3 folds) MCC: 0.272 +/- 0.075 (in 3 folds) Global scores: Accuracy: 0.383 MCC: 0.258 Global classification report:  precision recall f1-score support  20-30 0.50 0.59 0.54 22  30-40 0.14 0.07 0.10 14  40-50 0.24 0.27 0.25 15  50-60 0.32 0.30 0.31 23  60-70 0.08 0.13 0.10 15  70-80 0.00 0.00 0.00 2  <20 0.89 0.71 0.79 24  accuracy 0.38 115  macro avg 0.31 0.30 0.30 115 weighted avg 0.40 0.38 0.39 115,Per-fold scores: ROC-AUC (weighted OvO): 0.687 +/- 0.118 (in 3 folds) ROC-AUC (macro OvO): 0.668 +/- 0.141 (in 3 folds) au-PRC (weighted OvO): 0.731 +/- 0.097 (in 3 folds) au-PRC (macro OvO): 0.717 +/- 0.112 (in 3 folds) Accuracy: 0.453 +/- 0.065 (in 3 folds) MCC: 0.362 +/- 0.076 (in 3 folds) Global scores: Accuracy: 0.452 MCC: 0.340 Global classification report:  precision recall f1-score support  20-30 0.54 0.64 0.58 22  30-40 0.00 0.00 0.00 14  40-50 0.21 0.27 0.24 15  50-60 0.42 0.43 0.43 23  60-70 0.20 0.27 0.23 15  70-80 0.00 0.00 0.00 2  <20 0.91 0.83 0.87 24  accuracy 0.45 115  macro avg 0.32 0.35 0.33 115 weighted avg 0.43 0.45 0.44 115
,,,
,,,
,,,


ridge_cv,lasso_cv,elasticnet_cv,dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.672 +/- 0.151 (in 3 folds) ROC-AUC (macro OvO): 0.669 +/- 0.149 (in 3 folds) au-PRC (weighted OvO): 0.698 +/- 0.172 (in 3 folds) au-PRC (macro OvO): 0.696 +/- 0.170 (in 3 folds) Accuracy: 0.225 +/- 0.034 (in 3 folds) MCC: 0.046 +/- 0.080 (in 3 folds) Global scores: Accuracy: 0.226 MCC: 0.047 Global classification report:  precision recall f1-score support  20-30 0.30 0.27 0.29 22  30-40 0.00 0.00 0.00 14  40-50 0.00 0.00 0.00 15  50-60 0.19 0.30 0.23 23  60-70 0.16 0.20 0.18 15  70-80 0.00 0.00 0.00 2  <20 0.26 0.42 0.32 24  accuracy 0.23 115  macro avg 0.13 0.17 0.14 115 weighted avg 0.17 0.23 0.19 115,Per-fold scores: ROC-AUC (weighted OvO): 0.667 +/- 0.020 (in 3 folds) ROC-AUC (macro OvO): 0.654 +/- 0.028 (in 3 folds) au-PRC (weighted OvO): 0.722 +/- 0.019 (in 3 folds) au-PRC (macro OvO): 0.713 +/- 0.028 (in 3 folds) Accuracy: 0.340 +/- 0.062 (in 3 folds) MCC: 0.257 +/- 0.077 (in 3 folds) Global scores: Accuracy: 0.339 MCC: 0.205 Global classification report:  precision recall f1-score support  20-30 0.56 0.41 0.47 22  30-40 0.00 0.00 0.00 14  40-50 0.10 0.13 0.11 15  50-60 0.26 0.26 0.26 23  60-70 0.19 0.33 0.24 15  70-80 0.00 0.00 0.00 2  <20 0.59 0.71 0.64 24  accuracy 0.34 115  macro avg 0.24 0.26 0.25 115 weighted avg 0.32 0.34 0.32 115,Per-fold scores: ROC-AUC (weighted OvO): 0.665 +/- 0.020 (in 3 folds) ROC-AUC (macro OvO): 0.652 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.720 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.711 +/- 0.032 (in 3 folds) Accuracy: 0.305 +/- 0.048 (in 3 folds) MCC: 0.237 +/- 0.074 (in 3 folds) Global scores: Accuracy: 0.304 MCC: 0.162 Global classification report:  precision recall f1-score support  20-30 0.50 0.27 0.35 22  30-40 0.00 0.00 0.00 14  40-50 0.08 0.13 0.10 15  50-60 0.24 0.30 0.27 23  60-70 0.14 0.20 0.17 15  70-80 0.00 0.00 0.00 2  <20 0.61 0.71 0.65 24  accuracy 0.30 115  macro avg 0.22 0.23 0.22 115 weighted avg 0.30 0.30 0.29 115,Per-fold scores: ROC-AUC (weighted OvO): 0.515 +/- 0.025 (in 3 folds) ROC-AUC (macro OvO): 0.511 +/- 0.027 (in 3 folds) au-PRC (weighted OvO): 0.535 +/- 0.009 (in 3 folds) au-PRC (macro OvO): 0.534 +/- 0.009 (in 3 folds) Accuracy: 0.201 +/- 0.045 (in 3 folds) MCC: 0.042 +/- 0.051 (in 3 folds) Global scores: Accuracy: 0.200 MCC: 0.035 Global classification report:  precision recall f1-score support  20-30 0.29 0.18 0.22 22  30-40 0.00 0.00 0.00 14  40-50 0.33 0.20 0.25 15  50-60 0.28 0.39 0.33 23  60-70 0.05 0.07 0.06 15  70-80 0.00 0.00 0.00 2  <20 0.25 0.25 0.25 24  accuracy 0.20 115  macro avg 0.17 0.16 0.16 115 weighted avg 0.21 0.20 0.20 115
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.500 +/- 0.000 (in 3 folds) Accuracy: 0.208 +/- 0.042 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.209 MCC: 0.012 Global classification report:  precision recall f1-score support  20-30 0.18 0.32 0.23 22  30-40 0.00 0.00 0.00 14  40-50 0.00 0.00 0.00 15  50-60 0.19 0.30 0.23 23  60-70 0.00 0.00 0.00 15  70-80 0.00 0.00 0.00 2  <20 0.26 0.42 0.32 24  accuracy 0.21 115  macro avg 0.09 0.15 0.11 115 weighted avg 0.13 0.21 0.16 115


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.746 +/- 0.042 (in 3 folds),0.731 +/- 0.045 (in 3 folds),0.771 +/- 0.044 (in 3 folds),0.758 +/- 0.051 (in 3 folds),0.439 +/- 0.063 (in 3 folds),0.338 +/- 0.068 (in 3 folds),0.436,0.326,165.0,0.0,165.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.746 +/- 0.042 (in 3 folds),0.731 +/- 0.045 (in 3 folds),0.771 +/- 0.044 (in 3 folds),0.758 +/- 0.051 (in 3 folds),0.437 +/- 0.085 (in 3 folds),0.360 +/- 0.087 (in 3 folds),0.436,0.317,165.0,0.0,165.0,0.0,True
lasso_multiclass,0.731 +/- 0.040 (in 3 folds),0.716 +/- 0.039 (in 3 folds),0.763 +/- 0.043 (in 3 folds),0.750 +/- 0.050 (in 3 folds),0.475 +/- 0.075 (in 3 folds),0.378 +/- 0.089 (in 3 folds),0.473,0.372,165.0,0.0,165.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.731 +/- 0.040 (in 3 folds),0.716 +/- 0.039 (in 3 folds),0.763 +/- 0.043 (in 3 folds),0.750 +/- 0.050 (in 3 folds),0.445 +/- 0.094 (in 3 folds),0.348 +/- 0.064 (in 3 folds),0.442,0.329,165.0,0.0,165.0,0.0,False
rf_multiclass,0.720 +/- 0.062 (in 3 folds),0.705 +/- 0.065 (in 3 folds),0.740 +/- 0.063 (in 3 folds),0.725 +/- 0.069 (in 3 folds),0.404 +/- 0.117 (in 3 folds),0.315 +/- 0.114 (in 3 folds),0.4,0.272,165.0,0.0,165.0,0.0,True
rf_multiclass.decision_thresholds_tuned,0.720 +/- 0.062 (in 3 folds),0.705 +/- 0.065 (in 3 folds),0.740 +/- 0.063 (in 3 folds),0.725 +/- 0.069 (in 3 folds),0.335 +/- 0.047 (in 3 folds),0.253 +/- 0.140 (in 3 folds),0.333,0.197,165.0,0.0,165.0,0.0,True
xgboost,0.713 +/- 0.046 (in 3 folds),0.701 +/- 0.039 (in 3 folds),0.746 +/- 0.040 (in 3 folds),0.736 +/- 0.041 (in 3 folds),0.389 +/- 0.061 (in 3 folds),0.290 +/- 0.070 (in 3 folds),0.388,0.265,165.0,0.0,165.0,0.0,False
xgboost.decision_thresholds_tuned,0.713 +/- 0.046 (in 3 folds),0.701 +/- 0.039 (in 3 folds),0.746 +/- 0.040 (in 3 folds),0.736 +/- 0.041 (in 3 folds),0.321 +/- 0.035 (in 3 folds),0.251 +/- 0.073 (in 3 folds),0.321,0.196,165.0,0.0,165.0,0.0,False
elasticnet_cv,0.676 +/- 0.033 (in 3 folds),0.663 +/- 0.024 (in 3 folds),0.719 +/- 0.023 (in 3 folds),0.708 +/- 0.031 (in 3 folds),0.308 +/- 0.088 (in 3 folds),0.268 +/- 0.082 (in 3 folds),0.309,0.162,165.0,0.0,165.0,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.676 +/- 0.033 (in 3 folds),0.663 +/- 0.024 (in 3 folds),0.719 +/- 0.023 (in 3 folds),0.708 +/- 0.031 (in 3 folds),0.323 +/- 0.075 (in 3 folds),0.279 +/- 0.061 (in 3 folds),0.327,0.194,165.0,0.0,165.0,0.0,True

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.746 +/- 0.042 (in 3 folds),0.731 +/- 0.045 (in 3 folds),0.771 +/- 0.044 (in 3 folds),0.758 +/- 0.051 (in 3 folds),0.439 +/- 0.063 (in 3 folds),0.338 +/- 0.068 (in 3 folds),0.436,0.326,165,0,165,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.746 +/- 0.042 (in 3 folds),0.731 +/- 0.045 (in 3 folds),0.771 +/- 0.044 (in 3 folds),0.758 +/- 0.051 (in 3 folds),0.437 +/- 0.085 (in 3 folds),0.360 +/- 0.087 (in 3 folds),0.436,0.317,165,0,165,0.0,True
lasso_multiclass,0.731 +/- 0.040 (in 3 folds),0.716 +/- 0.039 (in 3 folds),0.763 +/- 0.043 (in 3 folds),0.750 +/- 0.050 (in 3 folds),0.475 +/- 0.075 (in 3 folds),0.378 +/- 0.089 (in 3 folds),0.473,0.372,165,0,165,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.731 +/- 0.040 (in 3 folds),0.716 +/- 0.039 (in 3 folds),0.763 +/- 0.043 (in 3 folds),0.750 +/- 0.050 (in 3 folds),0.445 +/- 0.094 (in 3 folds),0.348 +/- 0.064 (in 3 folds),0.442,0.329,165,0,165,0.0,False
rf_multiclass,0.720 +/- 0.062 (in 3 folds),0.705 +/- 0.065 (in 3 folds),0.740 +/- 0.063 (in 3 folds),0.725 +/- 0.069 (in 3 folds),0.404 +/- 0.117 (in 3 folds),0.315 +/- 0.114 (in 3 folds),0.4,0.272,165,0,165,0.0,True
rf_multiclass.decision_thresholds_tuned,0.720 +/- 0.062 (in 3 folds),0.705 +/- 0.065 (in 3 folds),0.740 +/- 0.063 (in 3 folds),0.725 +/- 0.069 (in 3 folds),0.335 +/- 0.047 (in 3 folds),0.253 +/- 0.140 (in 3 folds),0.333,0.197,165,0,165,0.0,True
xgboost,0.713 +/- 0.046 (in 3 folds),0.701 +/- 0.039 (in 3 folds),0.746 +/- 0.040 (in 3 folds),0.736 +/- 0.041 (in 3 folds),0.389 +/- 0.061 (in 3 folds),0.290 +/- 0.070 (in 3 folds),0.388,0.265,165,0,165,0.0,False
xgboost.decision_thresholds_tuned,0.713 +/- 0.046 (in 3 folds),0.701 +/- 0.039 (in 3 folds),0.746 +/- 0.040 (in 3 folds),0.736 +/- 0.041 (in 3 folds),0.321 +/- 0.035 (in 3 folds),0.251 +/- 0.073 (in 3 folds),0.321,0.196,165,0,165,0.0,False
elasticnet_cv,0.676 +/- 0.033 (in 3 folds),0.663 +/- 0.024 (in 3 folds),0.719 +/- 0.023 (in 3 folds),0.708 +/- 0.031 (in 3 folds),0.308 +/- 0.088 (in 3 folds),0.268 +/- 0.082 (in 3 folds),0.309,0.162,165,0,165,0.0,True
elasticnet_cv.decision_thresholds_tuned,0.676 +/- 0.033 (in 3 folds),0.663 +/- 0.024 (in 3 folds),0.719 +/- 0.023 (in 3 folds),0.708 +/- 0.031 (in 3 folds),0.323 +/- 0.075 (in 3 folds),0.279 +/- 0.061 (in 3 folds),0.327,0.194,165,0,165,0.0,True


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.746 +/- 0.042 (in 3 folds) ROC-AUC (macro OvO): 0.731 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.771 +/- 0.044 (in 3 folds) au-PRC (macro OvO): 0.758 +/- 0.051 (in 3 folds) Accuracy: 0.439 +/- 0.063 (in 3 folds) MCC: 0.338 +/- 0.068 (in 3 folds) Global scores: Accuracy: 0.436 MCC: 0.326 Global classification report:  precision recall f1-score support  20-30 0.61 0.47 0.53 30  30-40 0.22 0.22 0.22 18  40-50 0.24 0.29 0.26 24  50-60 0.27 0.22 0.24 32  60-70 0.36 0.42 0.38 24  70-80 0.00 0.00 0.00 2  <20 0.83 0.86 0.85 35  accuracy 0.44 165  macro avg 0.36 0.35 0.36 165 weighted avg 0.45 0.44 0.44 165,Per-fold scores: ROC-AUC (weighted OvO): 0.746 +/- 0.042 (in 3 folds) ROC-AUC (macro OvO): 0.731 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.771 +/- 0.044 (in 3 folds) au-PRC (macro OvO): 0.758 +/- 0.051 (in 3 folds) Accuracy: 0.437 +/- 0.085 (in 3 folds) MCC: 0.360 +/- 0.087 (in 3 folds) Global scores: Accuracy: 0.436 MCC: 0.317 Global classification report:  precision recall f1-score support  20-30 0.57 0.57 0.57 30  30-40 0.00 0.00 0.00 18  40-50 0.22 0.17 0.19 24  50-60 0.30 0.41 0.35 32  60-70 0.24 0.33 0.28 24  70-80 0.00 0.00 0.00 2  <20 0.81 0.86 0.83 35  accuracy 0.44 165  macro avg 0.31 0.33 0.32 165 weighted avg 0.40 0.44 0.41 165,Per-fold scores: ROC-AUC (weighted OvO): 0.731 +/- 0.040 (in 3 folds) ROC-AUC (macro OvO): 0.716 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.763 +/- 0.043 (in 3 folds) au-PRC (macro OvO): 0.750 +/- 0.050 (in 3 folds) Accuracy: 0.475 +/- 0.075 (in 3 folds) MCC: 0.378 +/- 0.089 (in 3 folds) Global scores: Accuracy: 0.473 MCC: 0.372 Global classification report:  precision recall f1-score support  20-30 0.70 0.47 0.56 30  30-40 0.30 0.33 0.32 18  40-50 0.26 0.33 0.29 24  50-60 0.30 0.25 0.27 32  60-70 0.39 0.50 0.44 24  70-80 0.00 0.00 0.00 2  <20 0.94 0.86 0.90 35  accuracy 0.47 165  macro avg 0.41 0.39 0.40 165 weighted avg 0.51 0.47 0.48 165,Per-fold scores: ROC-AUC (weighted OvO): 0.731 +/- 0.040 (in 3 folds) ROC-AUC (macro OvO): 0.716 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.763 +/- 0.043 (in 3 folds) au-PRC (macro OvO): 0.750 +/- 0.050 (in 3 folds) Accuracy: 0.445 +/- 0.094 (in 3 folds) MCC: 0.348 +/- 0.064 (in 3 folds) Global scores: Accuracy: 0.442 MCC: 0.329 Global classification report:  precision recall f1-score support  20-30 0.47 0.60 0.53 30  30-40 0.20 0.06 0.09 18  40-50 0.20 0.17 0.18 24  50-60 0.29 0.44 0.35 32  60-70 0.38 0.25 0.30 24  70-80 0.00 0.00 0.00 2  <20 0.97 0.86 0.91 35  accuracy 0.44 165  macro avg 0.36 0.34 0.34 165 weighted avg 0.45 0.44 0.44 165
,,,
,,,


rf_multiclass,rf_multiclass.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.720 +/- 0.062 (in 3 folds) ROC-AUC (macro OvO): 0.705 +/- 0.065 (in 3 folds) au-PRC (weighted OvO): 0.740 +/- 0.063 (in 3 folds) au-PRC (macro OvO): 0.725 +/- 0.069 (in 3 folds) Accuracy: 0.404 +/- 0.117 (in 3 folds) MCC: 0.315 +/- 0.114 (in 3 folds) Global scores: Accuracy: 0.400 MCC: 0.272 Global classification report:  precision recall f1-score support  20-30 0.47 0.63 0.54 30  30-40 0.14 0.06 0.08 18  40-50 0.14 0.12 0.13 24  50-60 0.22 0.25 0.24 32  60-70 0.25 0.25 0.25 24  70-80 0.00 0.00 0.00 2  <20 0.81 0.83 0.82 35  accuracy 0.40 165  macro avg 0.29 0.31 0.29 165 weighted avg 0.37 0.40 0.38 165,Per-fold scores: ROC-AUC (weighted OvO): 0.720 +/- 0.062 (in 3 folds) ROC-AUC (macro OvO): 0.705 +/- 0.065 (in 3 folds) au-PRC (weighted OvO): 0.740 +/- 0.063 (in 3 folds) au-PRC (macro OvO): 0.725 +/- 0.069 (in 3 folds) Accuracy: 0.335 +/- 0.047 (in 3 folds) MCC: 0.253 +/- 0.140 (in 3 folds) Global scores: Accuracy: 0.333 MCC: 0.197 Global classification report:  precision recall f1-score support  20-30 0.44 0.40 0.42 30  30-40 0.20 0.06 0.09 18  40-50 0.03 0.04 0.04 24  50-60 0.14 0.12 0.13 32  60-70 0.19 0.33 0.24 24  70-80 0.00 0.00 0.00 2  <20 0.88 0.83 0.85 35  accuracy 0.33 165  macro avg 0.27 0.25 0.25 165 weighted avg 0.35 0.33 0.33 165,Per-fold scores: ROC-AUC (weighted OvO): 0.713 +/- 0.046 (in 3 folds) ROC-AUC (macro OvO): 0.701 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.746 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.736 +/- 0.041 (in 3 folds) Accuracy: 0.389 +/- 0.061 (in 3 folds) MCC: 0.290 +/- 0.070 (in 3 folds) Global scores: Accuracy: 0.388 MCC: 0.265 Global classification report:  precision recall f1-score support  20-30 0.50 0.50 0.50 30  30-40 0.20 0.17 0.18 18  40-50 0.14 0.12 0.13 24  50-60 0.23 0.19 0.21 32  60-70 0.31 0.46 0.37 24  70-80 0.00 0.00 0.00 2  <20 0.76 0.74 0.75 35  accuracy 0.39 165  macro avg 0.31 0.31 0.31 165 weighted avg 0.38 0.39 0.38 165,Per-fold scores: ROC-AUC (weighted OvO): 0.713 +/- 0.046 (in 3 folds) ROC-AUC (macro OvO): 0.701 +/- 0.039 (in 3 folds) au-PRC (weighted OvO): 0.746 +/- 0.040 (in 3 folds) au-PRC (macro OvO): 0.736 +/- 0.041 (in 3 folds) Accuracy: 0.321 +/- 0.035 (in 3 folds) MCC: 0.251 +/- 0.073 (in 3 folds) Global scores: Accuracy: 0.321 MCC: 0.196 Global classification report:  precision recall f1-score support  20-30 0.57 0.27 0.36 30  30-40 0.21 0.22 0.22 18  40-50 0.00 0.00 0.00 24  50-60 0.15 0.12 0.14 32  60-70 0.16 0.42 0.23 24  70-80 0.00 0.00 0.00 2  <20 0.75 0.77 0.76 35  accuracy 0.32 165  macro avg 0.26 0.26 0.24 165 weighted avg 0.34 0.32 0.31 165
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.719 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.708 +/- 0.031 (in 3 folds) Accuracy: 0.308 +/- 0.088 (in 3 folds) MCC: 0.268 +/- 0.082 (in 3 folds) Global scores: Accuracy: 0.309 MCC: 0.162 Global classification report:  precision recall f1-score support  20-30 0.27 0.13 0.18 30  30-40 0.00 0.00 0.00 18  40-50 0.15 0.21 0.18 24  50-60 0.12 0.16 0.14 32  60-70 0.23 0.29 0.25 24  70-80 0.00 0.00 0.00 2  <20 0.67 0.86 0.75 35  accuracy 0.31 165  macro avg 0.20 0.24 0.21 165 weighted avg 0.27 0.31 0.28 165,Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.719 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.708 +/- 0.031 (in 3 folds) Accuracy: 0.323 +/- 0.075 (in 3 folds) MCC: 0.279 +/- 0.061 (in 3 folds) Global scores: Accuracy: 0.327 MCC: 0.194 Global classification report:  precision recall f1-score support  20-30 0.67 0.27 0.38 30  30-40 0.10 0.06 0.07 18  40-50 0.00 0.00 0.00 24  50-60 0.22 0.56 0.32 32  60-70 0.16 0.21 0.18 24  70-80 0.00 0.00 0.00 2  <20 0.76 0.63 0.69 35  accuracy 0.33 165  macro avg 0.27 0.25 0.23 165 weighted avg 0.36 0.33 0.31 165,Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.718 +/- 0.020 (in 3 folds) au-PRC (macro OvO): 0.705 +/- 0.028 (in 3 folds) Accuracy: 0.344 +/- 0.043 (in 3 folds) MCC: 0.305 +/- 0.035 (in 3 folds) Global scores: Accuracy: 0.345 MCC: 0.207 Global classification report:  precision recall f1-score support  20-30 0.47 0.30 0.37 30  30-40 0.00 0.00 0.00 18  40-50 0.19 0.25 0.21 24  50-60 0.15 0.16 0.15 32  60-70 0.20 0.29 0.24 24  70-80 0.00 0.00 0.00 2  <20 0.67 0.86 0.75 35  accuracy 0.35 165  macro avg 0.24 0.27 0.25 165 weighted avg 0.31 0.35 0.32 165,Per-fold scores: ROC-AUC (weighted OvO): 0.676 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.663 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.718 +/- 0.020 (in 3 folds) au-PRC (macro OvO): 0.705 +/- 0.028 (in 3 folds) Accuracy: 0.342 +/- 0.074 (in 3 folds) MCC: 0.304 +/- 0.057 (in 3 folds) Global scores: Accuracy: 0.345 MCC: 0.218 Global classification report:  precision recall f1-score support  20-30 0.67 0.33 0.44 30  30-40 0.11 0.06 0.07 18  40-50 0.00 0.00 0.00 24  50-60 0.22 0.56 0.32 32  60-70 0.16 0.21 0.18 24  70-80 0.00 0.00 0.00 2  <20 0.82 0.66 0.73 35  accuracy 0.35 165  macro avg 0.28 0.26 0.25 165 weighted avg 0.37 0.35 0.33 165
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.633 +/- 0.120 (in 3 folds) ROC-AUC (macro OvO): 0.622 +/- 0.109 (in 3 folds) au-PRC (weighted OvO): 0.647 +/- 0.129 (in 3 folds) au-PRC (macro OvO): 0.636 +/- 0.119 (in 3 folds) Accuracy: 0.159 +/- 0.060 (in 3 folds) MCC: 0.044 +/- 0.077 (in 3 folds) Global scores: Accuracy: 0.158 MCC: -0.038 Global classification report:  precision recall f1-score support  20-30 0.27 0.20 0.23 30  30-40 0.00 0.00 0.00 18  40-50 0.00 0.00 0.00 24  50-60 0.09 0.16 0.11 32  60-70 0.14 0.21 0.16 24  70-80 0.00 0.00 0.00 2  <20 0.20 0.29 0.24 35  accuracy 0.16 165  macro avg 0.10 0.12 0.11 165 weighted avg 0.13 0.16 0.14 165,Per-fold scores: ROC-AUC (weighted OvO): 0.633 +/- 0.120 (in 3 folds) ROC-AUC (macro OvO): 0.622 +/- 0.109 (in 3 folds) au-PRC (weighted OvO): 0.647 +/- 0.129 (in 3 folds) au-PRC (macro OvO): 0.636 +/- 0.119 (in 3 folds) Accuracy: 0.238 +/- 0.111 (in 3 folds) MCC: 0.110 +/- 0.190 (in 3 folds) Global scores: Accuracy: 0.242 MCC: 0.075 Global classification report:  precision recall f1-score support  20-30 0.75 0.20 0.32 30  30-40 0.00 0.00 0.00 18  40-50 0.12 0.25 0.16 24  50-60 0.14 0.16 0.15 32  60-70 0.00 0.00 0.00 24  70-80 0.00 0.00 0.00 2  <20 0.32 0.66 0.43 35  accuracy 0.24 165  macro avg 0.19 0.18 0.15 165 weighted avg 0.25 0.24 0.20 165
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.804 +/- 0.085 (in 3 folds),0.804 +/- 0.085 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.706 +/- 0.092 (in 3 folds),0.353 +/- 0.252 (in 3 folds),0.704,0.315,115.0,0.0,115.0,0.0,False
lasso_multiclass,0.786 +/- 0.136 (in 3 folds),0.786 +/- 0.136 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.705 +/- 0.059 (in 3 folds),0.393 +/- 0.164 (in 3 folds),0.704,0.373,115.0,0.0,115.0,0.0,False
linearsvm_ovr,0.776 +/- 0.159 (in 3 folds),0.776 +/- 0.159 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.723 +/- 0.059 (in 3 folds),0.419 +/- 0.162 (in 3 folds),0.722,0.402,115.0,0.0,115.0,0.0,False
xgboost,0.734 +/- 0.117 (in 3 folds),0.734 +/- 0.117 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.671 +/- 0.051 (in 3 folds),0.270 +/- 0.139 (in 3 folds),0.67,0.239,115.0,0.0,115.0,0.0,False
elasticnet_cv,0.721 +/- 0.123 (in 3 folds),0.721 +/- 0.123 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115.0,0.0,115.0,0.0,False
lasso_cv,0.720 +/- 0.121 (in 3 folds),0.720 +/- 0.121 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115.0,0.0,115.0,0.0,False
ridge_cv,0.682 +/- 0.201 (in 3 folds),0.682 +/- 0.201 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.644 +/- 0.057 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.643,-0.005,115.0,0.0,115.0,0.0,False
dummy_stratified,0.549 +/- 0.015 (in 3 folds),0.549 +/- 0.015 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.565 +/- 0.025 (in 3 folds),0.096 +/- 0.035 (in 3 folds),0.565,0.085,115.0,0.0,115.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.652,0.0,115.0,0.0,115.0,0.0,True
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.804 +/- 0.085 (in 3 folds),0.804 +/- 0.085 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.896 +/- 0.053 (in 3 folds),0.706 +/- 0.092 (in 3 folds),0.353 +/- 0.252 (in 3 folds),0.704,0.315,115,0,115,0.0,False
lasso_multiclass,0.786 +/- 0.136 (in 3 folds),0.786 +/- 0.136 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.893 +/- 0.067 (in 3 folds),0.705 +/- 0.059 (in 3 folds),0.393 +/- 0.164 (in 3 folds),0.704,0.373,115,0,115,0.0,False
linearsvm_ovr,0.776 +/- 0.159 (in 3 folds),0.776 +/- 0.159 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.884 +/- 0.084 (in 3 folds),0.723 +/- 0.059 (in 3 folds),0.419 +/- 0.162 (in 3 folds),0.722,0.402,115,0,115,0.0,False
xgboost,0.734 +/- 0.117 (in 3 folds),0.734 +/- 0.117 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.850 +/- 0.069 (in 3 folds),0.671 +/- 0.051 (in 3 folds),0.270 +/- 0.139 (in 3 folds),0.67,0.239,115,0,115,0.0,False
elasticnet_cv,0.721 +/- 0.123 (in 3 folds),0.721 +/- 0.123 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115,0,115,0.0,False
lasso_cv,0.720 +/- 0.121 (in 3 folds),0.720 +/- 0.121 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.869 +/- 0.046 (in 3 folds),0.670 +/- 0.031 (in 3 folds),0.093 +/- 0.161 (in 3 folds),0.67,0.182,115,0,115,0.0,False
ridge_cv,0.682 +/- 0.201 (in 3 folds),0.682 +/- 0.201 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.795 +/- 0.189 (in 3 folds),0.644 +/- 0.057 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.643,-0.005,115,0,115,0.0,False
dummy_stratified,0.549 +/- 0.015 (in 3 folds),0.549 +/- 0.015 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.677 +/- 0.049 (in 3 folds),0.565 +/- 0.025 (in 3 folds),0.096 +/- 0.035 (in 3 folds),0.565,0.085,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.653 +/- 0.058 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.652,0.0,115,0,115,0.0,True


rf_multiclass,lasso_multiclass,linearsvm_ovr,xgboost
Per-fold scores: ROC-AUC (weighted OvO): 0.804 +/- 0.085 (in 3 folds) ROC-AUC (macro OvO): 0.804 +/- 0.085 (in 3 folds) au-PRC (weighted OvO): 0.896 +/- 0.053 (in 3 folds) au-PRC (macro OvO): 0.896 +/- 0.053 (in 3 folds) Accuracy: 0.706 +/- 0.092 (in 3 folds) MCC: 0.353 +/- 0.252 (in 3 folds) Global scores: Accuracy: 0.704 MCC: 0.315 Global classification report:  precision recall f1-score support  50+ 0.60 0.45 0.51 40  under 50 0.74 0.84 0.79 75  accuracy 0.70 115  macro avg 0.67 0.65 0.65 115 weighted avg 0.69 0.70 0.69 115,Per-fold scores: ROC-AUC (weighted OvO): 0.786 +/- 0.136 (in 3 folds) ROC-AUC (macro OvO): 0.786 +/- 0.136 (in 3 folds) au-PRC (weighted OvO): 0.893 +/- 0.067 (in 3 folds) au-PRC (macro OvO): 0.893 +/- 0.067 (in 3 folds) Accuracy: 0.705 +/- 0.059 (in 3 folds) MCC: 0.393 +/- 0.164 (in 3 folds) Global scores: Accuracy: 0.704 MCC: 0.373 Global classification report:  precision recall f1-score support  50+ 0.57 0.65 0.60 40  under 50 0.80 0.73 0.76 75  accuracy 0.70 115  macro avg 0.68 0.69 0.68 115 weighted avg 0.72 0.70 0.71 115,Per-fold scores: ROC-AUC (weighted OvO): 0.776 +/- 0.159 (in 3 folds) ROC-AUC (macro OvO): 0.776 +/- 0.159 (in 3 folds) au-PRC (weighted OvO): 0.884 +/- 0.084 (in 3 folds) au-PRC (macro OvO): 0.884 +/- 0.084 (in 3 folds) Accuracy: 0.723 +/- 0.059 (in 3 folds) MCC: 0.419 +/- 0.162 (in 3 folds) Global scores: Accuracy: 0.722 MCC: 0.402 Global classification report:  precision recall f1-score support  50+ 0.59 0.65 0.62 40  under 50 0.80 0.76 0.78 75  accuracy 0.72 115  macro avg 0.70 0.71 0.70 115 weighted avg 0.73 0.72 0.72 115,Per-fold scores: ROC-AUC (weighted OvO): 0.734 +/- 0.117 (in 3 folds) ROC-AUC (macro OvO): 0.734 +/- 0.117 (in 3 folds) au-PRC (weighted OvO): 0.850 +/- 0.069 (in 3 folds) au-PRC (macro OvO): 0.850 +/- 0.069 (in 3 folds) Accuracy: 0.671 +/- 0.051 (in 3 folds) MCC: 0.270 +/- 0.139 (in 3 folds) Global scores: Accuracy: 0.670 MCC: 0.239 Global classification report:  precision recall f1-score support  50+ 0.53 0.42 0.47 40  under 50 0.72 0.80 0.76 75  accuracy 0.67 115  macro avg 0.63 0.61 0.62 115 weighted avg 0.66 0.67 0.66 115
,,,
,,,
,,,


elasticnet_cv,lasso_cv,ridge_cv,dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.721 +/- 0.123 (in 3 folds) ROC-AUC (macro OvO): 0.721 +/- 0.123 (in 3 folds) au-PRC (weighted OvO): 0.869 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.869 +/- 0.046 (in 3 folds) Accuracy: 0.670 +/- 0.031 (in 3 folds) MCC: 0.093 +/- 0.161 (in 3 folds) Global scores: Accuracy: 0.670 MCC: 0.182 Global classification report:  precision recall f1-score support  50+ 1.00 0.05 0.10 40  under 50 0.66 1.00 0.80 75  accuracy 0.67 115  macro avg 0.83 0.53 0.45 115 weighted avg 0.78 0.67 0.55 115,Per-fold scores: ROC-AUC (weighted OvO): 0.720 +/- 0.121 (in 3 folds) ROC-AUC (macro OvO): 0.720 +/- 0.121 (in 3 folds) au-PRC (weighted OvO): 0.869 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.869 +/- 0.046 (in 3 folds) Accuracy: 0.670 +/- 0.031 (in 3 folds) MCC: 0.093 +/- 0.161 (in 3 folds) Global scores: Accuracy: 0.670 MCC: 0.182 Global classification report:  precision recall f1-score support  50+ 1.00 0.05 0.10 40  under 50 0.66 1.00 0.80 75  accuracy 0.67 115  macro avg 0.83 0.53 0.45 115 weighted avg 0.78 0.67 0.55 115,Per-fold scores: ROC-AUC (weighted OvO): 0.682 +/- 0.201 (in 3 folds) ROC-AUC (macro OvO): 0.682 +/- 0.201 (in 3 folds) au-PRC (weighted OvO): 0.795 +/- 0.189 (in 3 folds) au-PRC (macro OvO): 0.795 +/- 0.189 (in 3 folds) Accuracy: 0.644 +/- 0.057 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.643 MCC: -0.005 Global classification report:  precision recall f1-score support  50+ 0.33 0.03 0.05 40  under 50 0.65 0.97 0.78 75  accuracy 0.64 115  macro avg 0.49 0.50 0.41 115 weighted avg 0.54 0.64 0.53 115,Per-fold scores: ROC-AUC (weighted OvO): 0.549 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.549 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.677 +/- 0.049 (in 3 folds) au-PRC (macro OvO): 0.677 +/- 0.049 (in 3 folds) Accuracy: 0.565 +/- 0.025 (in 3 folds) MCC: 0.096 +/- 0.035 (in 3 folds) Global scores: Accuracy: 0.565 MCC: 0.085 Global classification report:  precision recall f1-score support  50+ 0.40 0.47 0.43 40  under 50 0.69 0.61 0.65 75  accuracy 0.57 115  macro avg 0.54 0.54 0.54 115 weighted avg 0.59 0.57 0.57 115
,,,
,,,
,,,


dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.653 +/- 0.058 (in 3 folds) au-PRC (macro OvO): 0.653 +/- 0.058 (in 3 folds) Accuracy: 0.653 +/- 0.058 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.652 MCC: 0.000 Global classification report:  precision recall f1-score support  50+ 0.00 0.00 0.00 40  under 50 0.65 1.00 0.79 75  accuracy 0.65 115  macro avg 0.33 0.50 0.39 115 weighted avg 0.43 0.65 0.51 115


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.779 +/- 0.046 (in 3 folds),0.779 +/- 0.046 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.695 +/- 0.098 (in 3 folds),0.375 +/- 0.201 (in 3 folds),0.691,0.366,165.0,0.0,165.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.779 +/- 0.046 (in 3 folds),0.779 +/- 0.046 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.661 +/- 0.047 (in 3 folds),0.350 +/- 0.111 (in 3 folds),0.661,0.342,165.0,0.0,165.0,0.0,False
lasso_multiclass,0.776 +/- 0.051 (in 3 folds),0.776 +/- 0.051 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.669 +/- 0.076 (in 3 folds),0.323 +/- 0.138 (in 3 folds),0.667,0.315,165.0,0.0,165.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.776 +/- 0.051 (in 3 folds),0.776 +/- 0.051 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.636 +/- 0.099 (in 3 folds),0.382 +/- 0.139 (in 3 folds),0.63,0.362,165.0,0.0,165.0,0.0,False
rf_multiclass,0.774 +/- 0.058 (in 3 folds),0.774 +/- 0.058 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.671 +/- 0.050 (in 3 folds),0.258 +/- 0.076 (in 3 folds),0.673,0.23,165.0,0.0,165.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.774 +/- 0.058 (in 3 folds),0.774 +/- 0.058 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.694 +/- 0.057 (in 3 folds),0.380 +/- 0.182 (in 3 folds),0.691,0.38,165.0,0.0,165.0,0.0,False
xgboost,0.743 +/- 0.061 (in 3 folds),0.743 +/- 0.061 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.652 +/- 0.052 (in 3 folds),0.233 +/- 0.087 (in 3 folds),0.655,0.216,165.0,0.0,165.0,0.0,False
xgboost.decision_thresholds_tuned,0.743 +/- 0.061 (in 3 folds),0.743 +/- 0.061 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.707 +/- 0.082 (in 3 folds),0.429 +/- 0.135 (in 3 folds),0.703,0.412,165.0,0.0,165.0,0.0,False
elasticnet_cv,0.695 +/- 0.088 (in 3 folds),0.695 +/- 0.088 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.667 +/- 0.010 (in 3 folds),0.103 +/- 0.179 (in 3 folds),0.667,0.185,165.0,0.0,165.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.695 +/- 0.088 (in 3 folds),0.695 +/- 0.088 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.607 +/- 0.120 (in 3 folds),0.425 +/- 0.118 (in 3 folds),0.6,0.408,165.0,0.0,165.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.779 +/- 0.046 (in 3 folds),0.779 +/- 0.046 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.695 +/- 0.098 (in 3 folds),0.375 +/- 0.201 (in 3 folds),0.691,0.366,165,0,165,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.779 +/- 0.046 (in 3 folds),0.779 +/- 0.046 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.886 +/- 0.010 (in 3 folds),0.661 +/- 0.047 (in 3 folds),0.350 +/- 0.111 (in 3 folds),0.661,0.342,165,0,165,0.0,False
lasso_multiclass,0.776 +/- 0.051 (in 3 folds),0.776 +/- 0.051 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.669 +/- 0.076 (in 3 folds),0.323 +/- 0.138 (in 3 folds),0.667,0.315,165,0,165,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.776 +/- 0.051 (in 3 folds),0.776 +/- 0.051 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.885 +/- 0.014 (in 3 folds),0.636 +/- 0.099 (in 3 folds),0.382 +/- 0.139 (in 3 folds),0.63,0.362,165,0,165,0.0,False
rf_multiclass,0.774 +/- 0.058 (in 3 folds),0.774 +/- 0.058 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.671 +/- 0.050 (in 3 folds),0.258 +/- 0.076 (in 3 folds),0.673,0.23,165,0,165,0.0,False
rf_multiclass.decision_thresholds_tuned,0.774 +/- 0.058 (in 3 folds),0.774 +/- 0.058 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.882 +/- 0.017 (in 3 folds),0.694 +/- 0.057 (in 3 folds),0.380 +/- 0.182 (in 3 folds),0.691,0.38,165,0,165,0.0,False
xgboost,0.743 +/- 0.061 (in 3 folds),0.743 +/- 0.061 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.652 +/- 0.052 (in 3 folds),0.233 +/- 0.087 (in 3 folds),0.655,0.216,165,0,165,0.0,False
xgboost.decision_thresholds_tuned,0.743 +/- 0.061 (in 3 folds),0.743 +/- 0.061 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.849 +/- 0.035 (in 3 folds),0.707 +/- 0.082 (in 3 folds),0.429 +/- 0.135 (in 3 folds),0.703,0.412,165,0,165,0.0,False
elasticnet_cv,0.695 +/- 0.088 (in 3 folds),0.695 +/- 0.088 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.667 +/- 0.010 (in 3 folds),0.103 +/- 0.179 (in 3 folds),0.667,0.185,165,0,165,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.695 +/- 0.088 (in 3 folds),0.695 +/- 0.088 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.852 +/- 0.031 (in 3 folds),0.607 +/- 0.120 (in 3 folds),0.425 +/- 0.118 (in 3 folds),0.6,0.408,165,0,165,0.0,False


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.779 +/- 0.046 (in 3 folds) ROC-AUC (macro OvO): 0.779 +/- 0.046 (in 3 folds) au-PRC (weighted OvO): 0.886 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.886 +/- 0.010 (in 3 folds) Accuracy: 0.695 +/- 0.098 (in 3 folds) MCC: 0.375 +/- 0.201 (in 3 folds) Global scores: Accuracy: 0.691 MCC: 0.366 Global classification report:  precision recall f1-score support  50+ 0.55 0.69 0.61 58  under 50 0.80 0.69 0.74 107  accuracy 0.69 165  macro avg 0.68 0.69 0.68 165 weighted avg 0.71 0.69 0.70 165,Per-fold scores: ROC-AUC (weighted OvO): 0.779 +/- 0.046 (in 3 folds) ROC-AUC (macro OvO): 0.779 +/- 0.046 (in 3 folds) au-PRC (weighted OvO): 0.886 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.886 +/- 0.010 (in 3 folds) Accuracy: 0.661 +/- 0.047 (in 3 folds) MCC: 0.350 +/- 0.111 (in 3 folds) Global scores: Accuracy: 0.661 MCC: 0.342 Global classification report:  precision recall f1-score support  50+ 0.51 0.74 0.61 58  under 50 0.81 0.62 0.70 107  accuracy 0.66 165  macro avg 0.66 0.68 0.65 165 weighted avg 0.71 0.66 0.67 165,Per-fold scores: ROC-AUC (weighted OvO): 0.776 +/- 0.051 (in 3 folds) ROC-AUC (macro OvO): 0.776 +/- 0.051 (in 3 folds) au-PRC (weighted OvO): 0.885 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.885 +/- 0.014 (in 3 folds) Accuracy: 0.669 +/- 0.076 (in 3 folds) MCC: 0.323 +/- 0.138 (in 3 folds) Global scores: Accuracy: 0.667 MCC: 0.315 Global classification report:  precision recall f1-score support  50+ 0.52 0.66 0.58 58  under 50 0.78 0.67 0.72 107  accuracy 0.67 165  macro avg 0.65 0.66 0.65 165 weighted avg 0.69 0.67 0.67 165,Per-fold scores: ROC-AUC (weighted OvO): 0.776 +/- 0.051 (in 3 folds) ROC-AUC (macro OvO): 0.776 +/- 0.051 (in 3 folds) au-PRC (weighted OvO): 0.885 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.885 +/- 0.014 (in 3 folds) Accuracy: 0.636 +/- 0.099 (in 3 folds) MCC: 0.382 +/- 0.139 (in 3 folds) Global scores: Accuracy: 0.630 MCC: 0.362 Global classification report:  precision recall f1-score support  50+ 0.49 0.86 0.62 58  under 50 0.87 0.50 0.64 107  accuracy 0.63 165  macro avg 0.68 0.68 0.63 165 weighted avg 0.74 0.63 0.63 165
,,,
,,,


rf_multiclass,rf_multiclass.decision_thresholds_tuned,xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.774 +/- 0.058 (in 3 folds) ROC-AUC (macro OvO): 0.774 +/- 0.058 (in 3 folds) au-PRC (weighted OvO): 0.882 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.882 +/- 0.017 (in 3 folds) Accuracy: 0.671 +/- 0.050 (in 3 folds) MCC: 0.258 +/- 0.076 (in 3 folds) Global scores: Accuracy: 0.673 MCC: 0.230 Global classification report:  precision recall f1-score support  50+ 0.55 0.36 0.44 58  under 50 0.71 0.84 0.77 107  accuracy 0.67 165  macro avg 0.63 0.60 0.60 165 weighted avg 0.65 0.67 0.65 165,Per-fold scores: ROC-AUC (weighted OvO): 0.774 +/- 0.058 (in 3 folds) ROC-AUC (macro OvO): 0.774 +/- 0.058 (in 3 folds) au-PRC (weighted OvO): 0.882 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.882 +/- 0.017 (in 3 folds) Accuracy: 0.694 +/- 0.057 (in 3 folds) MCC: 0.380 +/- 0.182 (in 3 folds) Global scores: Accuracy: 0.691 MCC: 0.380 Global classification report:  precision recall f1-score support  50+ 0.55 0.72 0.62 58  under 50 0.82 0.67 0.74 107  accuracy 0.69 165  macro avg 0.68 0.70 0.68 165 weighted avg 0.72 0.69 0.70 165,Per-fold scores: ROC-AUC (weighted OvO): 0.743 +/- 0.061 (in 3 folds) ROC-AUC (macro OvO): 0.743 +/- 0.061 (in 3 folds) au-PRC (weighted OvO): 0.849 +/- 0.035 (in 3 folds) au-PRC (macro OvO): 0.849 +/- 0.035 (in 3 folds) Accuracy: 0.652 +/- 0.052 (in 3 folds) MCC: 0.233 +/- 0.087 (in 3 folds) Global scores: Accuracy: 0.655 MCC: 0.216 Global classification report:  precision recall f1-score support  50+ 0.51 0.43 0.47 58  under 50 0.72 0.78 0.74 107  accuracy 0.65 165  macro avg 0.61 0.60 0.61 165 weighted avg 0.64 0.65 0.65 165,Per-fold scores: ROC-AUC (weighted OvO): 0.743 +/- 0.061 (in 3 folds) ROC-AUC (macro OvO): 0.743 +/- 0.061 (in 3 folds) au-PRC (weighted OvO): 0.849 +/- 0.035 (in 3 folds) au-PRC (macro OvO): 0.849 +/- 0.035 (in 3 folds) Accuracy: 0.707 +/- 0.082 (in 3 folds) MCC: 0.429 +/- 0.135 (in 3 folds) Global scores: Accuracy: 0.703 MCC: 0.412 Global classification report:  precision recall f1-score support  50+ 0.56 0.76 0.64 58  under 50 0.84 0.67 0.75 107  accuracy 0.70 165  macro avg 0.70 0.72 0.69 165 weighted avg 0.74 0.70 0.71 165
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.695 +/- 0.088 (in 3 folds) ROC-AUC (macro OvO): 0.695 +/- 0.088 (in 3 folds) au-PRC (weighted OvO): 0.852 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.852 +/- 0.031 (in 3 folds) Accuracy: 0.667 +/- 0.010 (in 3 folds) MCC: 0.103 +/- 0.179 (in 3 folds) Global scores: Accuracy: 0.667 MCC: 0.185 Global classification report:  precision recall f1-score support  50+ 1.00 0.05 0.10 58  under 50 0.66 1.00 0.80 107  accuracy 0.67 165  macro avg 0.83 0.53 0.45 165 weighted avg 0.78 0.67 0.55 165,Per-fold scores: ROC-AUC (weighted OvO): 0.695 +/- 0.088 (in 3 folds) ROC-AUC (macro OvO): 0.695 +/- 0.088 (in 3 folds) au-PRC (weighted OvO): 0.852 +/- 0.031 (in 3 folds) au-PRC (macro OvO): 0.852 +/- 0.031 (in 3 folds) Accuracy: 0.607 +/- 0.120 (in 3 folds) MCC: 0.425 +/- 0.118 (in 3 folds) Global scores: Accuracy: 0.600 MCC: 0.408 Global classification report:  precision recall f1-score support  50+ 0.47 0.98 0.63 58  under 50 0.98 0.39 0.56 107  accuracy 0.60 165  macro avg 0.72 0.69 0.60 165 weighted avg 0.80 0.60 0.59 165,Per-fold scores: ROC-AUC (weighted OvO): 0.693 +/- 0.085 (in 3 folds) ROC-AUC (macro OvO): 0.693 +/- 0.085 (in 3 folds) au-PRC (weighted OvO): 0.851 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.851 +/- 0.028 (in 3 folds) Accuracy: 0.667 +/- 0.010 (in 3 folds) MCC: 0.103 +/- 0.179 (in 3 folds) Global scores: Accuracy: 0.667 MCC: 0.185 Global classification report:  precision recall f1-score support  50+ 1.00 0.05 0.10 58  under 50 0.66 1.00 0.80 107  accuracy 0.67 165  macro avg 0.83 0.53 0.45 165 weighted avg 0.78 0.67 0.55 165,Per-fold scores: ROC-AUC (weighted OvO): 0.693 +/- 0.085 (in 3 folds) ROC-AUC (macro OvO): 0.693 +/- 0.085 (in 3 folds) au-PRC (weighted OvO): 0.851 +/- 0.028 (in 3 folds) au-PRC (macro OvO): 0.851 +/- 0.028 (in 3 folds) Accuracy: 0.629 +/- 0.096 (in 3 folds) MCC: 0.435 +/- 0.063 (in 3 folds) Global scores: Accuracy: 0.624 MCC: 0.410 Global classification report:  precision recall f1-score support  50+ 0.48 0.95 0.64 58  under 50 0.94 0.45 0.61 107  accuracy 0.62 165  macro avg 0.71 0.70 0.62 165 weighted avg 0.78 0.62 0.62 165
,,,
,,,


ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.667 +/- 0.157 (in 3 folds) ROC-AUC (macro OvO): 0.667 +/- 0.157 (in 3 folds) au-PRC (weighted OvO): 0.787 +/- 0.163 (in 3 folds) au-PRC (macro OvO): 0.787 +/- 0.163 (in 3 folds) Accuracy: 0.669 +/- 0.065 (in 3 folds) MCC: 0.115 +/- 0.199 (in 3 folds) Global scores: Accuracy: 0.673 MCC: 0.185 Global classification report:  precision recall f1-score support  50+ 0.70 0.12 0.21 58  under 50 0.67 0.97 0.79 107  accuracy 0.67 165  macro avg 0.69 0.55 0.50 165 weighted avg 0.68 0.67 0.59 165,Per-fold scores: ROC-AUC (weighted OvO): 0.667 +/- 0.157 (in 3 folds) ROC-AUC (macro OvO): 0.667 +/- 0.157 (in 3 folds) au-PRC (weighted OvO): 0.787 +/- 0.163 (in 3 folds) au-PRC (macro OvO): 0.787 +/- 0.163 (in 3 folds) Accuracy: 0.618 +/- 0.054 (in 3 folds) MCC: 0.119 +/- 0.206 (in 3 folds) Global scores: Accuracy: 0.618 MCC: 0.112 Global classification report:  precision recall f1-score support  50+ 0.44 0.33 0.38 58  under 50 0.68 0.78 0.72 107  accuracy 0.62 165  macro avg 0.56 0.55 0.55 165 weighted avg 0.60 0.62 0.60 165
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.978 +/- 0.022 (in 3 folds),0.978 +/- 0.022 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115.0,0.0,115.0,0.0,True
linearsvm_ovr,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.948 +/- 0.026 (in 3 folds),0.838 +/- 0.061 (in 3 folds),0.948,0.827,115.0,0.0,115.0,0.0,False
elasticnet_cv,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.912 +/- 0.069 (in 3 folds),0.553 +/- 0.482 (in 3 folds),0.913,0.673,115.0,0.0,115.0,0.0,False
lasso_multiclass,0.968 +/- 0.038 (in 3 folds),0.968 +/- 0.038 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.965 +/- 0.016 (in 3 folds),0.888 +/- 0.046 (in 3 folds),0.965,0.885,115.0,0.0,115.0,0.0,False
rf_multiclass,0.958 +/- 0.019 (in 3 folds),0.958 +/- 0.019 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.922 +/- 0.024 (in 3 folds),0.702 +/- 0.097 (in 3 folds),0.922,0.706,115.0,0.0,115.0,0.0,False
lasso_cv,0.937 +/- 0.032 (in 3 folds),0.937 +/- 0.032 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.886 +/- 0.057 (in 3 folds),0.285 +/- 0.493 (in 3 folds),0.887,0.555,115.0,0.0,115.0,0.0,False
xgboost,0.936 +/- 0.037 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.939 +/- 0.014 (in 3 folds),0.761 +/- 0.118 (in 3 folds),0.939,0.778,115.0,0.0,115.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115.0,0.0,115.0,0.0,True
dummy_stratified,0.425 +/- 0.106 (in 3 folds),0.425 +/- 0.106 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.633 +/- 0.115 (in 3 folds),-0.127 +/- 0.165 (in 3 folds),0.635,-0.138,115.0,0.0,115.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.978 +/- 0.022 (in 3 folds),0.978 +/- 0.022 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.937 +/- 0.063 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115,0,115,0.0,True
linearsvm_ovr,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.925 +/- 0.072 (in 3 folds),0.948 +/- 0.026 (in 3 folds),0.838 +/- 0.061 (in 3 folds),0.948,0.827,115,0,115,0.0,False
elasticnet_cv,0.971 +/- 0.030 (in 3 folds),0.971 +/- 0.030 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.945 +/- 0.049 (in 3 folds),0.912 +/- 0.069 (in 3 folds),0.553 +/- 0.482 (in 3 folds),0.913,0.673,115,0,115,0.0,False
lasso_multiclass,0.968 +/- 0.038 (in 3 folds),0.968 +/- 0.038 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.932 +/- 0.064 (in 3 folds),0.965 +/- 0.016 (in 3 folds),0.888 +/- 0.046 (in 3 folds),0.965,0.885,115,0,115,0.0,False
rf_multiclass,0.958 +/- 0.019 (in 3 folds),0.958 +/- 0.019 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.809 +/- 0.138 (in 3 folds),0.922 +/- 0.024 (in 3 folds),0.702 +/- 0.097 (in 3 folds),0.922,0.706,115,0,115,0.0,False
lasso_cv,0.937 +/- 0.032 (in 3 folds),0.937 +/- 0.032 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.860 +/- 0.096 (in 3 folds),0.886 +/- 0.057 (in 3 folds),0.285 +/- 0.493 (in 3 folds),0.887,0.555,115,0,115,0.0,False
xgboost,0.936 +/- 0.037 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.846 +/- 0.082 (in 3 folds),0.939 +/- 0.014 (in 3 folds),0.761 +/- 0.118 (in 3 folds),0.939,0.778,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.174 +/- 0.052 (in 3 folds),0.826 +/- 0.052 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.826,0.0,115,0,115,0.0,True
dummy_stratified,0.425 +/- 0.106 (in 3 folds),0.425 +/- 0.106 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.171 +/- 0.046 (in 3 folds),0.633 +/- 0.115 (in 3 folds),-0.127 +/- 0.165 (in 3 folds),0.635,-0.138,115,0,115,0.0,False


ridge_cv,linearsvm_ovr,elasticnet_cv,lasso_multiclass
Per-fold scores: ROC-AUC (weighted OvO): 0.978 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.978 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.937 +/- 0.063 (in 3 folds) au-PRC (macro OvO): 0.937 +/- 0.063 (in 3 folds) Accuracy: 0.826 +/- 0.052 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.826 MCC: 0.000 Global classification report:  precision recall f1-score support  18+ 0.83 1.00 0.90 95  under 18 0.00 0.00 0.00 20  accuracy 0.83 115  macro avg 0.41 0.50 0.45 115 weighted avg 0.68 0.83 0.75 115,Per-fold scores: ROC-AUC (weighted OvO): 0.971 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.971 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.925 +/- 0.072 (in 3 folds) au-PRC (macro OvO): 0.925 +/- 0.072 (in 3 folds) Accuracy: 0.948 +/- 0.026 (in 3 folds) MCC: 0.838 +/- 0.061 (in 3 folds) Global scores: Accuracy: 0.948 MCC: 0.827 Global classification report:  precision recall f1-score support  18+ 0.98 0.96 0.97 95  under 18 0.82 0.90 0.86 20  accuracy 0.95 115  macro avg 0.90 0.93 0.91 115 weighted avg 0.95 0.95 0.95 115,Per-fold scores: ROC-AUC (weighted OvO): 0.971 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.971 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.945 +/- 0.049 (in 3 folds) au-PRC (macro OvO): 0.945 +/- 0.049 (in 3 folds) Accuracy: 0.912 +/- 0.069 (in 3 folds) MCC: 0.553 +/- 0.482 (in 3 folds) Global scores: Accuracy: 0.913 MCC: 0.673 Global classification report:  precision recall f1-score support  18+ 0.90 1.00 0.95 95  under 18 1.00 0.50 0.67 20  accuracy 0.91 115  macro avg 0.95 0.75 0.81 115 weighted avg 0.92 0.91 0.90 115,Per-fold scores: ROC-AUC (weighted OvO): 0.968 +/- 0.038 (in 3 folds) ROC-AUC (macro OvO): 0.968 +/- 0.038 (in 3 folds) au-PRC (weighted OvO): 0.932 +/- 0.064 (in 3 folds) au-PRC (macro OvO): 0.932 +/- 0.064 (in 3 folds) Accuracy: 0.965 +/- 0.016 (in 3 folds) MCC: 0.888 +/- 0.046 (in 3 folds) Global scores: Accuracy: 0.965 MCC: 0.885 Global classification report:  precision recall f1-score support  18+ 0.99 0.97 0.98 95  under 18 0.86 0.95 0.90 20  accuracy 0.97 115  macro avg 0.93 0.96 0.94 115 weighted avg 0.97 0.97 0.97 115
,,,
,,,
,,,


rf_multiclass,lasso_cv,xgboost,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.958 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.958 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.809 +/- 0.138 (in 3 folds) au-PRC (macro OvO): 0.809 +/- 0.138 (in 3 folds) Accuracy: 0.922 +/- 0.024 (in 3 folds) MCC: 0.702 +/- 0.097 (in 3 folds) Global scores: Accuracy: 0.922 MCC: 0.706 Global classification report:  precision recall f1-score support  18+ 0.92 0.99 0.95 95  under 18 0.92 0.60 0.73 20  accuracy 0.92 115  macro avg 0.92 0.79 0.84 115 weighted avg 0.92 0.92 0.91 115,Per-fold scores: ROC-AUC (weighted OvO): 0.937 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.937 +/- 0.032 (in 3 folds) au-PRC (weighted OvO): 0.860 +/- 0.096 (in 3 folds) au-PRC (macro OvO): 0.860 +/- 0.096 (in 3 folds) Accuracy: 0.886 +/- 0.057 (in 3 folds) MCC: 0.285 +/- 0.493 (in 3 folds) Global scores: Accuracy: 0.887 MCC: 0.555 Global classification report:  precision recall f1-score support  18+ 0.88 1.00 0.94 95  under 18 1.00 0.35 0.52 20  accuracy 0.89 115  macro avg 0.94 0.68 0.73 115 weighted avg 0.90 0.89 0.86 115,Per-fold scores: ROC-AUC (weighted OvO): 0.936 +/- 0.037 (in 3 folds) ROC-AUC (macro OvO): 0.936 +/- 0.037 (in 3 folds) au-PRC (weighted OvO): 0.846 +/- 0.082 (in 3 folds) au-PRC (macro OvO): 0.846 +/- 0.082 (in 3 folds) Accuracy: 0.939 +/- 0.014 (in 3 folds) MCC: 0.761 +/- 0.118 (in 3 folds) Global scores: Accuracy: 0.939 MCC: 0.778 Global classification report:  precision recall f1-score support  18+ 0.95 0.98 0.96 95  under 18 0.88 0.75 0.81 20  accuracy 0.94 115  macro avg 0.92 0.86 0.89 115 weighted avg 0.94 0.94 0.94 115,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.174 +/- 0.052 (in 3 folds) au-PRC (macro OvO): 0.174 +/- 0.052 (in 3 folds) Accuracy: 0.826 +/- 0.052 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.826 MCC: 0.000 Global classification report:  precision recall f1-score support  18+ 0.83 1.00 0.90 95  under 18 0.00 0.00 0.00 20  accuracy 0.83 115  macro avg 0.41 0.50 0.45 115 weighted avg 0.68 0.83 0.75 115
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.425 +/- 0.106 (in 3 folds) ROC-AUC (macro OvO): 0.425 +/- 0.106 (in 3 folds) au-PRC (weighted OvO): 0.171 +/- 0.046 (in 3 folds) au-PRC (macro OvO): 0.171 +/- 0.046 (in 3 folds) Accuracy: 0.633 +/- 0.115 (in 3 folds) MCC: -0.127 +/- 0.165 (in 3 folds) Global scores: Accuracy: 0.635 MCC: -0.138 Global classification report:  precision recall f1-score support  18+ 0.80 0.75 0.77 95  under 18 0.08 0.10 0.09 20  accuracy 0.63 115  macro avg 0.44 0.42 0.43 115 weighted avg 0.67 0.63 0.65 115


## Apply train-smaller model -- Test set performance - With and without tuning on validation set



---

# GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.569 +/- 0.023 (in 3 folds),0.569 +/- 0.023 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.496 +/- 0.053 (in 3 folds),0.003 +/- 0.063 (in 3 folds),0.496,-0.011,115.0,0.0,115.0,0.0,False
rf_multiclass,0.558 +/- 0.046 (in 3 folds),0.558 +/- 0.046 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.539 +/- 0.001 (in 3 folds),0.153 +/- 0.072 (in 3 folds),0.539,0.075,115.0,0.0,115.0,0.0,False
linearsvm_ovr,0.552 +/- 0.023 (in 3 folds),0.552 +/- 0.023 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.496 +/- 0.054 (in 3 folds),-0.004 +/- 0.077 (in 3 folds),0.496,-0.012,115.0,0.0,115.0,0.0,False
lasso_cv,0.521 +/- 0.036 (in 3 folds),0.521 +/- 0.036 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115.0,0.0,115.0,0.0,False
ridge_cv,0.519 +/- 0.032 (in 3 folds),0.519 +/- 0.032 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115.0,0.0,115.0,0.0,False
xgboost,0.517 +/- 0.030 (in 3 folds),0.517 +/- 0.030 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.522 +/- 0.064 (in 3 folds),0.133 +/- 0.145 (in 3 folds),0.522,0.041,115.0,0.0,115.0,0.0,False
elasticnet_cv,0.502 +/- 0.062 (in 3 folds),0.502 +/- 0.062 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115.0,0.0,115.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115.0,0.0,115.0,0.0,False
dummy_stratified,0.497 +/- 0.052 (in 3 folds),0.497 +/- 0.052 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.522 +/- 0.016 (in 3 folds),-0.006 +/- 0.102 (in 3 folds),0.522,0.041,115.0,0.0,115.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.569 +/- 0.023 (in 3 folds),0.569 +/- 0.023 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.613 +/- 0.148 (in 3 folds),0.496 +/- 0.053 (in 3 folds),0.003 +/- 0.063 (in 3 folds),0.496,-0.011,115,0,115,0.0,False
rf_multiclass,0.558 +/- 0.046 (in 3 folds),0.558 +/- 0.046 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.590 +/- 0.197 (in 3 folds),0.539 +/- 0.001 (in 3 folds),0.153 +/- 0.072 (in 3 folds),0.539,0.075,115,0,115,0.0,False
linearsvm_ovr,0.552 +/- 0.023 (in 3 folds),0.552 +/- 0.023 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.607 +/- 0.158 (in 3 folds),0.496 +/- 0.054 (in 3 folds),-0.004 +/- 0.077 (in 3 folds),0.496,-0.012,115,0,115,0.0,False
lasso_cv,0.521 +/- 0.036 (in 3 folds),0.521 +/- 0.036 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.542 +/- 0.216 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115,0,115,0.0,False
ridge_cv,0.519 +/- 0.032 (in 3 folds),0.519 +/- 0.032 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.536 +/- 0.207 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115,0,115,0.0,False
xgboost,0.517 +/- 0.030 (in 3 folds),0.517 +/- 0.030 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.565 +/- 0.204 (in 3 folds),0.522 +/- 0.064 (in 3 folds),0.133 +/- 0.145 (in 3 folds),0.522,0.041,115,0,115,0.0,False
elasticnet_cv,0.502 +/- 0.062 (in 3 folds),0.502 +/- 0.062 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.566 +/- 0.184 (in 3 folds),0.461 +/- 0.092 (in 3 folds),0.017 +/- 0.030 (in 3 folds),0.461,-0.132,115,0,115,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.513 +/- 0.168 (in 3 folds),0.384 +/- 0.092 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.383,-0.257,115,0,115,0.0,False
dummy_stratified,0.497 +/- 0.052 (in 3 folds),0.497 +/- 0.052 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.513 +/- 0.152 (in 3 folds),0.522 +/- 0.016 (in 3 folds),-0.006 +/- 0.102 (in 3 folds),0.522,0.041,115,0,115,0.0,False


lasso_multiclass,rf_multiclass,linearsvm_ovr,lasso_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.569 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.569 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.613 +/- 0.148 (in 3 folds) au-PRC (macro OvO): 0.613 +/- 0.148 (in 3 folds) Accuracy: 0.496 +/- 0.053 (in 3 folds) MCC: 0.003 +/- 0.063 (in 3 folds) Global scores: Accuracy: 0.496 MCC: -0.011 Global classification report:  precision recall f1-score support  F 0.48 0.45 0.46 56  M 0.51 0.54 0.52 59  accuracy 0.50 115  macro avg 0.49 0.49 0.49 115 weighted avg 0.49 0.50 0.49 115,Per-fold scores: ROC-AUC (weighted OvO): 0.558 +/- 0.046 (in 3 folds) ROC-AUC (macro OvO): 0.558 +/- 0.046 (in 3 folds) au-PRC (weighted OvO): 0.590 +/- 0.197 (in 3 folds) au-PRC (macro OvO): 0.590 +/- 0.197 (in 3 folds) Accuracy: 0.539 +/- 0.001 (in 3 folds) MCC: 0.153 +/- 0.072 (in 3 folds) Global scores: Accuracy: 0.539 MCC: 0.075 Global classification report:  precision recall f1-score support  F 0.53 0.45 0.49 56  M 0.54 0.63 0.58 59  accuracy 0.54 115  macro avg 0.54 0.54 0.53 115 weighted avg 0.54 0.54 0.54 115,Per-fold scores: ROC-AUC (weighted OvO): 0.552 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.552 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.607 +/- 0.158 (in 3 folds) au-PRC (macro OvO): 0.607 +/- 0.158 (in 3 folds) Accuracy: 0.496 +/- 0.054 (in 3 folds) MCC: -0.004 +/- 0.077 (in 3 folds) Global scores: Accuracy: 0.496 MCC: -0.012 Global classification report:  precision recall f1-score support  F 0.48 0.43 0.45 56  M 0.51 0.56 0.53 59  accuracy 0.50 115  macro avg 0.49 0.49 0.49 115 weighted avg 0.49 0.50 0.49 115,Per-fold scores: ROC-AUC (weighted OvO): 0.521 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.521 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.542 +/- 0.216 (in 3 folds) au-PRC (macro OvO): 0.542 +/- 0.216 (in 3 folds) Accuracy: 0.461 +/- 0.092 (in 3 folds) MCC: 0.017 +/- 0.030 (in 3 folds) Global scores: Accuracy: 0.461 MCC: -0.132 Global classification report:  precision recall f1-score support  F 0.33 0.11 0.16 56  M 0.48 0.80 0.60 59  accuracy 0.46 115  macro avg 0.41 0.45 0.38 115 weighted avg 0.41 0.46 0.39 115
,,,
,,,
,,,


ridge_cv,xgboost,elasticnet_cv,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.519 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.519 +/- 0.032 (in 3 folds) au-PRC (weighted OvO): 0.536 +/- 0.207 (in 3 folds) au-PRC (macro OvO): 0.536 +/- 0.207 (in 3 folds) Accuracy: 0.384 +/- 0.092 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.383 MCC: -0.257 Global classification report:  precision recall f1-score support  F 0.31 0.21 0.25 56  M 0.42 0.54 0.47 59  accuracy 0.38 115  macro avg 0.36 0.38 0.36 115 weighted avg 0.37 0.38 0.37 115,Per-fold scores: ROC-AUC (weighted OvO): 0.517 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.517 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.565 +/- 0.204 (in 3 folds) au-PRC (macro OvO): 0.565 +/- 0.204 (in 3 folds) Accuracy: 0.522 +/- 0.064 (in 3 folds) MCC: 0.133 +/- 0.145 (in 3 folds) Global scores: Accuracy: 0.522 MCC: 0.041 Global classification report:  precision recall f1-score support  F 0.51 0.46 0.49 56  M 0.53 0.58 0.55 59  accuracy 0.52 115  macro avg 0.52 0.52 0.52 115 weighted avg 0.52 0.52 0.52 115,Per-fold scores: ROC-AUC (weighted OvO): 0.502 +/- 0.062 (in 3 folds) ROC-AUC (macro OvO): 0.502 +/- 0.062 (in 3 folds) au-PRC (weighted OvO): 0.566 +/- 0.184 (in 3 folds) au-PRC (macro OvO): 0.566 +/- 0.184 (in 3 folds) Accuracy: 0.461 +/- 0.092 (in 3 folds) MCC: 0.017 +/- 0.030 (in 3 folds) Global scores: Accuracy: 0.461 MCC: -0.132 Global classification report:  precision recall f1-score support  F 0.33 0.11 0.16 56  M 0.48 0.80 0.60 59  accuracy 0.46 115  macro avg 0.41 0.45 0.38 115 weighted avg 0.41 0.46 0.39 115,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.513 +/- 0.168 (in 3 folds) au-PRC (macro OvO): 0.513 +/- 0.168 (in 3 folds) Accuracy: 0.384 +/- 0.092 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.383 MCC: -0.257 Global classification report:  precision recall f1-score support  F 0.31 0.21 0.25 56  M 0.42 0.54 0.47 59  accuracy 0.38 115  macro avg 0.36 0.38 0.36 115 weighted avg 0.37 0.38 0.37 115
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.497 +/- 0.052 (in 3 folds) ROC-AUC (macro OvO): 0.497 +/- 0.052 (in 3 folds) au-PRC (weighted OvO): 0.513 +/- 0.152 (in 3 folds) au-PRC (macro OvO): 0.513 +/- 0.152 (in 3 folds) Accuracy: 0.522 +/- 0.016 (in 3 folds) MCC: -0.006 +/- 0.102 (in 3 folds) Global scores: Accuracy: 0.522 MCC: 0.041 Global classification report:  precision recall f1-score support  F 0.51 0.46 0.49 56  M 0.53 0.58 0.55 59  accuracy 0.52 115  macro avg 0.52 0.52 0.52 115 weighted avg 0.52 0.52 0.52 115


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.495 +/- 0.009 (in 3 folds),0.495 +/- 0.009 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.543 +/- 0.068 (in 3 folds),-0.005 +/- 0.008 (in 3 folds),0.539,0.033,165.0,0.0,165.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.495 +/- 0.009 (in 3 folds),0.495 +/- 0.009 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.463 +/- 0.073 (in 3 folds),-0.006 +/- 0.011 (in 3 folds),0.467,-0.067,165.0,0.0,165.0,0.0,False
ridge_cv,0.490 +/- 0.017 (in 3 folds),0.490 +/- 0.017 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.549 +/- 0.062 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.545,0.072,165.0,0.0,165.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.490 +/- 0.017 (in 3 folds),0.490 +/- 0.017 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.543 +/- 0.068 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.539,0.0,165.0,0.0,165.0,0.0,True
elasticnet_cv,0.467 +/- 0.045 (in 3 folds),0.467 +/- 0.045 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.537 +/- 0.075 (in 3 folds),-0.016 +/- 0.028 (in 3 folds),0.533,0.014,165.0,0.0,165.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.467 +/- 0.045 (in 3 folds),0.467 +/- 0.045 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.456 +/- 0.085 (in 3 folds),-0.056 +/- 0.081 (in 3 folds),0.461,-0.082,165.0,0.0,165.0,0.0,False
linearsvm_ovr,0.464 +/- 0.033 (in 3 folds),0.464 +/- 0.033 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.445 +/- 0.048 (in 3 folds),-0.099 +/- 0.113 (in 3 folds),0.442,-0.11,165.0,0.0,165.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.464 +/- 0.033 (in 3 folds),0.464 +/- 0.033 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.440 +/- 0.079 (in 3 folds),-0.109 +/- 0.101 (in 3 folds),0.442,-0.104,165.0,0.0,165.0,0.0,False
lasso_multiclass,0.457 +/- 0.053 (in 3 folds),0.457 +/- 0.053 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.449 +/- 0.080 (in 3 folds),-0.090 +/- 0.173 (in 3 folds),0.448,-0.097,165.0,0.0,165.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.457 +/- 0.053 (in 3 folds),0.457 +/- 0.053 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.452 +/- 0.063 (in 3 folds),-0.046 +/- 0.040 (in 3 folds),0.455,-0.079,165.0,0.0,165.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.495 +/- 0.009 (in 3 folds),0.495 +/- 0.009 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.543 +/- 0.068 (in 3 folds),-0.005 +/- 0.008 (in 3 folds),0.539,0.033,165,0,165,0.0,False
lasso_cv.decision_thresholds_tuned,0.495 +/- 0.009 (in 3 folds),0.495 +/- 0.009 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.556 +/- 0.056 (in 3 folds),0.463 +/- 0.073 (in 3 folds),-0.006 +/- 0.011 (in 3 folds),0.467,-0.067,165,0,165,0.0,False
ridge_cv,0.490 +/- 0.017 (in 3 folds),0.490 +/- 0.017 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.549 +/- 0.062 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.545,0.072,165,0,165,0.0,False
ridge_cv.decision_thresholds_tuned,0.490 +/- 0.017 (in 3 folds),0.490 +/- 0.017 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.554 +/- 0.057 (in 3 folds),0.543 +/- 0.068 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.539,0.0,165,0,165,0.0,True
elasticnet_cv,0.467 +/- 0.045 (in 3 folds),0.467 +/- 0.045 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.537 +/- 0.075 (in 3 folds),-0.016 +/- 0.028 (in 3 folds),0.533,0.014,165,0,165,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.467 +/- 0.045 (in 3 folds),0.467 +/- 0.045 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.536 +/- 0.022 (in 3 folds),0.456 +/- 0.085 (in 3 folds),-0.056 +/- 0.081 (in 3 folds),0.461,-0.082,165,0,165,0.0,False
linearsvm_ovr,0.464 +/- 0.033 (in 3 folds),0.464 +/- 0.033 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.445 +/- 0.048 (in 3 folds),-0.099 +/- 0.113 (in 3 folds),0.442,-0.11,165,0,165,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.464 +/- 0.033 (in 3 folds),0.464 +/- 0.033 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.534 +/- 0.061 (in 3 folds),0.440 +/- 0.079 (in 3 folds),-0.109 +/- 0.101 (in 3 folds),0.442,-0.104,165,0,165,0.0,False
lasso_multiclass,0.457 +/- 0.053 (in 3 folds),0.457 +/- 0.053 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.449 +/- 0.080 (in 3 folds),-0.090 +/- 0.173 (in 3 folds),0.448,-0.097,165,0,165,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.457 +/- 0.053 (in 3 folds),0.457 +/- 0.053 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.530 +/- 0.072 (in 3 folds),0.452 +/- 0.063 (in 3 folds),-0.046 +/- 0.040 (in 3 folds),0.455,-0.079,165,0,165,0.0,False


lasso_cv,lasso_cv.decision_thresholds_tuned,ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.495 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.495 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.556 +/- 0.056 (in 3 folds) au-PRC (macro OvO): 0.556 +/- 0.056 (in 3 folds) Accuracy: 0.543 +/- 0.068 (in 3 folds) MCC: -0.005 +/- 0.008 (in 3 folds) Global scores: Accuracy: 0.539 MCC: 0.033 Global classification report:  precision recall f1-score support  F 0.50 0.16 0.24 76  M 0.55 0.87 0.67 89  accuracy 0.54 165  macro avg 0.52 0.51 0.45 165 weighted avg 0.52 0.54 0.47 165,Per-fold scores: ROC-AUC (weighted OvO): 0.495 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.495 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.556 +/- 0.056 (in 3 folds) au-PRC (macro OvO): 0.556 +/- 0.056 (in 3 folds) Accuracy: 0.463 +/- 0.073 (in 3 folds) MCC: -0.006 +/- 0.011 (in 3 folds) Global scores: Accuracy: 0.467 MCC: -0.067 Global classification report:  precision recall f1-score support  F 0.43 0.46 0.44 76  M 0.51 0.47 0.49 89  accuracy 0.47 165  macro avg 0.47 0.47 0.47 165 weighted avg 0.47 0.47 0.47 165,Per-fold scores: ROC-AUC (weighted OvO): 0.490 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.490 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.554 +/- 0.057 (in 3 folds) au-PRC (macro OvO): 0.554 +/- 0.057 (in 3 folds) Accuracy: 0.549 +/- 0.062 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.545 MCC: 0.072 Global classification report:  precision recall f1-score support  F 0.51 0.39 0.44 76  M 0.57 0.67 0.62 89  accuracy 0.55 165  macro avg 0.54 0.53 0.53 165 weighted avg 0.54 0.55 0.54 165,Per-fold scores: ROC-AUC (weighted OvO): 0.490 +/- 0.017 (in 3 folds) ROC-AUC (macro OvO): 0.490 +/- 0.017 (in 3 folds) au-PRC (weighted OvO): 0.554 +/- 0.057 (in 3 folds) au-PRC (macro OvO): 0.554 +/- 0.057 (in 3 folds) Accuracy: 0.543 +/- 0.068 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.539 MCC: 0.000 Global classification report:  precision recall f1-score support  F 0.00 0.00 0.00 76  M 0.54 1.00 0.70 89  accuracy 0.54 165  macro avg 0.27 0.50 0.35 165 weighted avg 0.29 0.54 0.38 165
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.467 +/- 0.045 (in 3 folds) ROC-AUC (macro OvO): 0.467 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.536 +/- 0.022 (in 3 folds) au-PRC (macro OvO): 0.536 +/- 0.022 (in 3 folds) Accuracy: 0.537 +/- 0.075 (in 3 folds) MCC: -0.016 +/- 0.028 (in 3 folds) Global scores: Accuracy: 0.533 MCC: 0.014 Global classification report:  precision recall f1-score support  F 0.48 0.14 0.22 76  M 0.54 0.87 0.67 89  accuracy 0.53 165  macro avg 0.51 0.50 0.44 165 weighted avg 0.51 0.53 0.46 165,Per-fold scores: ROC-AUC (weighted OvO): 0.467 +/- 0.045 (in 3 folds) ROC-AUC (macro OvO): 0.467 +/- 0.045 (in 3 folds) au-PRC (weighted OvO): 0.536 +/- 0.022 (in 3 folds) au-PRC (macro OvO): 0.536 +/- 0.022 (in 3 folds) Accuracy: 0.456 +/- 0.085 (in 3 folds) MCC: -0.056 +/- 0.081 (in 3 folds) Global scores: Accuracy: 0.461 MCC: -0.082 Global classification report:  precision recall f1-score support  F 0.42 0.43 0.43 76  M 0.50 0.48 0.49 89  accuracy 0.46 165  macro avg 0.46 0.46 0.46 165 weighted avg 0.46 0.46 0.46 165,Per-fold scores: ROC-AUC (weighted OvO): 0.464 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.464 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.534 +/- 0.061 (in 3 folds) au-PRC (macro OvO): 0.534 +/- 0.061 (in 3 folds) Accuracy: 0.445 +/- 0.048 (in 3 folds) MCC: -0.099 +/- 0.113 (in 3 folds) Global scores: Accuracy: 0.442 MCC: -0.110 Global classification report:  precision recall f1-score support  F 0.41 0.47 0.44 76  M 0.48 0.42 0.45 89  accuracy 0.44 165  macro avg 0.44 0.44 0.44 165 weighted avg 0.45 0.44 0.44 165,Per-fold scores: ROC-AUC (weighted OvO): 0.464 +/- 0.033 (in 3 folds) ROC-AUC (macro OvO): 0.464 +/- 0.033 (in 3 folds) au-PRC (weighted OvO): 0.534 +/- 0.061 (in 3 folds) au-PRC (macro OvO): 0.534 +/- 0.061 (in 3 folds) Accuracy: 0.440 +/- 0.079 (in 3 folds) MCC: -0.109 +/- 0.101 (in 3 folds) Global scores: Accuracy: 0.442 MCC: -0.104 Global classification report:  precision recall f1-score support  F 0.42 0.53 0.47 76  M 0.48 0.37 0.42 89  accuracy 0.44 165  macro avg 0.45 0.45 0.44 165 weighted avg 0.45 0.44 0.44 165
,,,
,,,


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.457 +/- 0.053 (in 3 folds) ROC-AUC (macro OvO): 0.457 +/- 0.053 (in 3 folds) au-PRC (weighted OvO): 0.530 +/- 0.072 (in 3 folds) au-PRC (macro OvO): 0.530 +/- 0.072 (in 3 folds) Accuracy: 0.449 +/- 0.080 (in 3 folds) MCC: -0.090 +/- 0.173 (in 3 folds) Global scores: Accuracy: 0.448 MCC: -0.097 Global classification report:  precision recall f1-score support  F 0.42 0.49 0.45 76  M 0.49 0.42 0.45 89  accuracy 0.45 165  macro avg 0.45 0.45 0.45 165 weighted avg 0.45 0.45 0.45 165,Per-fold scores: ROC-AUC (weighted OvO): 0.457 +/- 0.053 (in 3 folds) ROC-AUC (macro OvO): 0.457 +/- 0.053 (in 3 folds) au-PRC (weighted OvO): 0.530 +/- 0.072 (in 3 folds) au-PRC (macro OvO): 0.530 +/- 0.072 (in 3 folds) Accuracy: 0.452 +/- 0.063 (in 3 folds) MCC: -0.046 +/- 0.040 (in 3 folds) Global scores: Accuracy: 0.455 MCC: -0.079 Global classification report:  precision recall f1-score support  F 0.43 0.54 0.48 76  M 0.49 0.38 0.43 89  accuracy 0.45 165  macro avg 0.46 0.46 0.45 165 weighted avg 0.46 0.45 0.45 165,Per-fold scores: ROC-AUC (weighted OvO): 0.433 +/- 0.074 (in 3 folds) ROC-AUC (macro OvO): 0.433 +/- 0.074 (in 3 folds) au-PRC (weighted OvO): 0.513 +/- 0.076 (in 3 folds) au-PRC (macro OvO): 0.513 +/- 0.076 (in 3 folds) Accuracy: 0.456 +/- 0.056 (in 3 folds) MCC: -0.095 +/- 0.114 (in 3 folds) Global scores: Accuracy: 0.455 MCC: -0.098 Global classification report:  precision recall f1-score support  F 0.41 0.41 0.41 76  M 0.49 0.49 0.49 89  accuracy 0.45 165  macro avg 0.45 0.45 0.45 165 weighted avg 0.45 0.45 0.45 165,Per-fold scores: ROC-AUC (weighted OvO): 0.433 +/- 0.074 (in 3 folds) ROC-AUC (macro OvO): 0.433 +/- 0.074 (in 3 folds) au-PRC (weighted OvO): 0.513 +/- 0.076 (in 3 folds) au-PRC (macro OvO): 0.513 +/- 0.076 (in 3 folds) Accuracy: 0.478 +/- 0.048 (in 3 folds) MCC: -0.049 +/- 0.101 (in 3 folds) Global scores: Accuracy: 0.479 MCC: -0.019 Global classification report:  precision recall f1-score support  F 0.45 0.64 0.53 76  M 0.53 0.34 0.41 89  accuracy 0.48 165  macro avg 0.49 0.49 0.47 165 weighted avg 0.49 0.48 0.47 165
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.429 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.429 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.543 +/- 0.051 (in 3 folds) au-PRC (macro OvO): 0.543 +/- 0.051 (in 3 folds) Accuracy: 0.443 +/- 0.028 (in 3 folds) MCC: -0.114 +/- 0.062 (in 3 folds) Global scores: Accuracy: 0.442 MCC: -0.120 Global classification report:  precision recall f1-score support  F 0.40 0.41 0.40 76  M 0.48 0.47 0.48 89  accuracy 0.44 165  macro avg 0.44 0.44 0.44 165 weighted avg 0.44 0.44 0.44 165,Per-fold scores: ROC-AUC (weighted OvO): 0.429 +/- 0.019 (in 3 folds) ROC-AUC (macro OvO): 0.429 +/- 0.019 (in 3 folds) au-PRC (weighted OvO): 0.543 +/- 0.051 (in 3 folds) au-PRC (macro OvO): 0.543 +/- 0.051 (in 3 folds) Accuracy: 0.484 +/- 0.102 (in 3 folds) MCC: -0.101 +/- 0.103 (in 3 folds) Global scores: Accuracy: 0.479 MCC: -0.086 Global classification report:  precision recall f1-score support  F 0.39 0.24 0.30 76  M 0.51 0.69 0.59 89  accuracy 0.48 165  macro avg 0.45 0.46 0.44 165 weighted avg 0.46 0.48 0.45 165
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.803 +/- 0.049 (in 3 folds),0.190 +/- 0.329 (in 3 folds),0.804,0.324,168.0,0.0,168.0,0.0,False
lasso_multiclass,0.991 +/- 0.005 (in 3 folds),0.991 +/- 0.005 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.952 +/- 0.028 (in 3 folds),0.861 +/- 0.086 (in 3 folds),0.952,0.862,168.0,0.0,168.0,0.0,False
rf_multiclass,0.990 +/- 0.004 (in 3 folds),0.990 +/- 0.004 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.868 +/- 0.069 (in 3 folds),0.578 +/- 0.233 (in 3 folds),0.869,0.595,168.0,0.0,168.0,0.0,False
elasticnet_cv,0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.874 +/- 0.084 (in 3 folds),0.577 +/- 0.320 (in 3 folds),0.875,0.613,168.0,0.0,168.0,0.0,False
xgboost,0.981 +/- 0.020 (in 3 folds),0.981 +/- 0.020 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.910 +/- 0.048 (in 3 folds),0.727 +/- 0.156 (in 3 folds),0.911,0.732,168.0,0.0,168.0,0.0,False
lasso_cv,0.980 +/- 0.025 (in 3 folds),0.980 +/- 0.025 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.904 +/- 0.090 (in 3 folds),0.670 +/- 0.355 (in 3 folds),0.905,0.713,168.0,0.0,168.0,0.0,False
linearsvm_ovr,0.974 +/- 0.026 (in 3 folds),0.974 +/- 0.026 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.850 +/- 0.044 (in 3 folds),0.946,0.844,168.0,0.0,168.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.774,0.0,168.0,0.0,168.0,0.0,True
dummy_stratified,0.407 +/- 0.032 (in 3 folds),0.407 +/- 0.032 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.572 +/- 0.025 (in 3 folds),-0.180 +/- 0.063 (in 3 folds),0.571,-0.181,168.0,0.0,168.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.803 +/- 0.049 (in 3 folds),0.190 +/- 0.329 (in 3 folds),0.804,0.324,168,0,168,0.0,False
lasso_multiclass,0.991 +/- 0.005 (in 3 folds),0.991 +/- 0.005 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.952 +/- 0.028 (in 3 folds),0.861 +/- 0.086 (in 3 folds),0.952,0.862,168,0,168,0.0,False
rf_multiclass,0.990 +/- 0.004 (in 3 folds),0.990 +/- 0.004 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.997 +/- 0.001 (in 3 folds),0.868 +/- 0.069 (in 3 folds),0.578 +/- 0.233 (in 3 folds),0.869,0.595,168,0,168,0.0,False
elasticnet_cv,0.988 +/- 0.012 (in 3 folds),0.988 +/- 0.012 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.874 +/- 0.084 (in 3 folds),0.577 +/- 0.320 (in 3 folds),0.875,0.613,168,0,168,0.0,False
xgboost,0.981 +/- 0.020 (in 3 folds),0.981 +/- 0.020 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.910 +/- 0.048 (in 3 folds),0.727 +/- 0.156 (in 3 folds),0.911,0.732,168,0,168,0.0,False
lasso_cv,0.980 +/- 0.025 (in 3 folds),0.980 +/- 0.025 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.904 +/- 0.090 (in 3 folds),0.670 +/- 0.355 (in 3 folds),0.905,0.713,168,0,168,0.0,False
linearsvm_ovr,0.974 +/- 0.026 (in 3 folds),0.974 +/- 0.026 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.991 +/- 0.010 (in 3 folds),0.947 +/- 0.017 (in 3 folds),0.850 +/- 0.044 (in 3 folds),0.946,0.844,168,0,168,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.774 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.774,0.0,168,0,168,0.0,True
dummy_stratified,0.407 +/- 0.032 (in 3 folds),0.407 +/- 0.032 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.743 +/- 0.018 (in 3 folds),0.572 +/- 0.025 (in 3 folds),-0.180 +/- 0.063 (in 3 folds),0.571,-0.181,168,0,168,0.0,False


ridge_cv,lasso_multiclass,rf_multiclass,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.997 +/- 0.003 (in 3 folds) ROC-AUC (macro OvO): 0.997 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.999 +/- 0.001 (in 3 folds) au-PRC (macro OvO): 0.999 +/- 0.001 (in 3 folds) Accuracy: 0.803 +/- 0.049 (in 3 folds) MCC: 0.190 +/- 0.329 (in 3 folds) Global scores: Accuracy: 0.804 MCC: 0.324 Global classification report:  precision recall f1-score support  Covid19 1.00 0.13 0.23 38 Healthy/Background 0.80 1.00 0.89 130  accuracy 0.80 168  macro avg 0.90 0.57 0.56 168  weighted avg 0.84 0.80 0.74 168,Per-fold scores: ROC-AUC (weighted OvO): 0.991 +/- 0.005 (in 3 folds) ROC-AUC (macro OvO): 0.991 +/- 0.005 (in 3 folds) au-PRC (weighted OvO): 0.997 +/- 0.001 (in 3 folds) au-PRC (macro OvO): 0.997 +/- 0.001 (in 3 folds) Accuracy: 0.952 +/- 0.028 (in 3 folds) MCC: 0.861 +/- 0.086 (in 3 folds) Global scores: Accuracy: 0.952 MCC: 0.862 Global classification report:  precision recall f1-score support  Covid19 0.92 0.87 0.89 38 Healthy/Background 0.96 0.98 0.97 130  accuracy 0.95 168  macro avg 0.94 0.92 0.93 168  weighted avg 0.95 0.95 0.95 168,Per-fold scores: ROC-AUC (weighted OvO): 0.990 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.990 +/- 0.004 (in 3 folds) au-PRC (weighted OvO): 0.997 +/- 0.001 (in 3 folds) au-PRC (macro OvO): 0.997 +/- 0.001 (in 3 folds) Accuracy: 0.868 +/- 0.069 (in 3 folds) MCC: 0.578 +/- 0.233 (in 3 folds) Global scores: Accuracy: 0.869 MCC: 0.595 Global classification report:  precision recall f1-score support  Covid19 0.94 0.45 0.61 38 Healthy/Background 0.86 0.99 0.92 130  accuracy 0.87 168  macro avg 0.90 0.72 0.76 168  weighted avg 0.88 0.87 0.85 168,Per-fold scores: ROC-AUC (weighted OvO): 0.988 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.988 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.997 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.997 +/- 0.003 (in 3 folds) Accuracy: 0.874 +/- 0.084 (in 3 folds) MCC: 0.577 +/- 0.320 (in 3 folds) Global scores: Accuracy: 0.875 MCC: 0.613 Global classification report:  precision recall f1-score support  Covid19 0.90 0.50 0.64 38 Healthy/Background 0.87 0.98 0.92 130  accuracy 0.88 168  macro avg 0.89 0.74 0.78 168  weighted avg 0.88 0.88 0.86 168
,,,
,,,
,,,


xgboost,lasso_cv,linearsvm_ovr,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.981 +/- 0.020 (in 3 folds) ROC-AUC (macro OvO): 0.981 +/- 0.020 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.006 (in 3 folds) Accuracy: 0.910 +/- 0.048 (in 3 folds) MCC: 0.727 +/- 0.156 (in 3 folds) Global scores: Accuracy: 0.911 MCC: 0.732 Global classification report:  precision recall f1-score support  Covid19 0.93 0.66 0.77 38 Healthy/Background 0.91 0.98 0.94 130  accuracy 0.91 168  macro avg 0.92 0.82 0.86 168  weighted avg 0.91 0.91 0.90 168,Per-fold scores: ROC-AUC (weighted OvO): 0.980 +/- 0.025 (in 3 folds) ROC-AUC (macro OvO): 0.980 +/- 0.025 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.007 (in 3 folds) Accuracy: 0.904 +/- 0.090 (in 3 folds) MCC: 0.670 +/- 0.355 (in 3 folds) Global scores: Accuracy: 0.905 MCC: 0.713 Global classification report:  precision recall f1-score support  Covid19 0.92 0.63 0.75 38 Healthy/Background 0.90 0.98 0.94 130  accuracy 0.90 168  macro avg 0.91 0.81 0.85 168  weighted avg 0.91 0.90 0.90 168,Per-fold scores: ROC-AUC (weighted OvO): 0.974 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.974 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.991 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.991 +/- 0.010 (in 3 folds) Accuracy: 0.947 +/- 0.017 (in 3 folds) MCC: 0.850 +/- 0.044 (in 3 folds) Global scores: Accuracy: 0.946 MCC: 0.844 Global classification report:  precision recall f1-score support  Covid19 0.91 0.84 0.88 38 Healthy/Background 0.95 0.98 0.97 130  accuracy 0.95 168  macro avg 0.93 0.91 0.92 168  weighted avg 0.95 0.95 0.95 168,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.774 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.774 +/- 0.007 (in 3 folds) Accuracy: 0.774 +/- 0.007 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.774 MCC: 0.000 Global classification report:  precision recall f1-score support  Covid19 0.00 0.00 0.00 38 Healthy/Background 0.77 1.00 0.87 130  accuracy 0.77 168  macro avg 0.39 0.50 0.44 168  weighted avg 0.60 0.77 0.68 168
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.407 +/- 0.032 (in 3 folds) ROC-AUC (macro OvO): 0.407 +/- 0.032 (in 3 folds) au-PRC (weighted OvO): 0.743 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.743 +/- 0.018 (in 3 folds) Accuracy: 0.572 +/- 0.025 (in 3 folds) MCC: -0.180 +/- 0.063 (in 3 folds) Global scores: Accuracy: 0.571 MCC: -0.181 Global classification report:  precision recall f1-score support  Covid19 0.10 0.11 0.10 38 Healthy/Background 0.73 0.71 0.72 130  accuracy 0.57 168  macro avg 0.41 0.41 0.41 168  weighted avg 0.59 0.57 0.58 168


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.983 +/- 0.006 (in 3 folds),0.983 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.897 +/- 0.035 (in 3 folds),0.696 +/- 0.105 (in 3 folds),0.897,0.695,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.983 +/- 0.006 (in 3 folds),0.983 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.933 +/- 0.014 (in 3 folds),0.820 +/- 0.037 (in 3 folds),0.933,0.82,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
ridge_cv,0.983 +/- 0.003 (in 3 folds),0.983 +/- 0.003 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.798 +/- 0.039 (in 3 folds),0.236 +/- 0.258 (in 3 folds),0.798,0.309,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.983 +/- 0.003 (in 3 folds),0.983 +/- 0.003 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.940 +/- 0.024 (in 3 folds),0.835 +/- 0.069 (in 3 folds),0.94,0.835,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
lasso_multiclass,0.982 +/- 0.006 (in 3 folds),0.982 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.940 +/- 0.013 (in 3 folds),0.836 +/- 0.033 (in 3 folds),0.94,0.835,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.982 +/- 0.006 (in 3 folds),0.982 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.913 +/- 0.026 (in 3 folds),0.778 +/- 0.039 (in 3 folds),0.913,0.772,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
rf_multiclass,0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.897 +/- 0.018 (in 3 folds),0.697 +/- 0.057 (in 3 folds),0.897,0.698,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
rf_multiclass.decision_thresholds_tuned,0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.928 +/- 0.021 (in 3 folds),0.801 +/- 0.064 (in 3 folds),0.929,0.798,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
lasso_cv,0.979 +/- 0.001 (in 3 folds),0.979 +/- 0.001 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.901 +/- 0.048 (in 3 folds),0.706 +/- 0.148 (in 3 folds),0.901,0.704,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.979 +/- 0.001 (in 3 folds),0.979 +/- 0.001 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.917 +/- 0.020 (in 3 folds),0.762 +/- 0.056 (in 3 folds),0.917,0.761,disease.separate_past_exposures,252.0,0.0,252.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.983 +/- 0.006 (in 3 folds),0.983 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.897 +/- 0.035 (in 3 folds),0.696 +/- 0.105 (in 3 folds),0.897,0.695,disease.separate_past_exposures,252,0,252,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.983 +/- 0.006 (in 3 folds),0.983 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.933 +/- 0.014 (in 3 folds),0.820 +/- 0.037 (in 3 folds),0.933,0.82,disease.separate_past_exposures,252,0,252,0.0,False
ridge_cv,0.983 +/- 0.003 (in 3 folds),0.983 +/- 0.003 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.798 +/- 0.039 (in 3 folds),0.236 +/- 0.258 (in 3 folds),0.798,0.309,disease.separate_past_exposures,252,0,252,0.0,False
ridge_cv.decision_thresholds_tuned,0.983 +/- 0.003 (in 3 folds),0.983 +/- 0.003 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.995 +/- 0.000 (in 3 folds),0.940 +/- 0.024 (in 3 folds),0.835 +/- 0.069 (in 3 folds),0.94,0.835,disease.separate_past_exposures,252,0,252,0.0,False
lasso_multiclass,0.982 +/- 0.006 (in 3 folds),0.982 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.940 +/- 0.013 (in 3 folds),0.836 +/- 0.033 (in 3 folds),0.94,0.835,disease.separate_past_exposures,252,0,252,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.982 +/- 0.006 (in 3 folds),0.982 +/- 0.006 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.913 +/- 0.026 (in 3 folds),0.778 +/- 0.039 (in 3 folds),0.913,0.772,disease.separate_past_exposures,252,0,252,0.0,False
rf_multiclass,0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.897 +/- 0.018 (in 3 folds),0.697 +/- 0.057 (in 3 folds),0.897,0.698,disease.separate_past_exposures,252,0,252,0.0,False
rf_multiclass.decision_thresholds_tuned,0.980 +/- 0.012 (in 3 folds),0.980 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.928 +/- 0.021 (in 3 folds),0.801 +/- 0.064 (in 3 folds),0.929,0.798,disease.separate_past_exposures,252,0,252,0.0,False
lasso_cv,0.979 +/- 0.001 (in 3 folds),0.979 +/- 0.001 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.901 +/- 0.048 (in 3 folds),0.706 +/- 0.148 (in 3 folds),0.901,0.704,disease.separate_past_exposures,252,0,252,0.0,False
lasso_cv.decision_thresholds_tuned,0.979 +/- 0.001 (in 3 folds),0.979 +/- 0.001 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.994 +/- 0.000 (in 3 folds),0.917 +/- 0.020 (in 3 folds),0.762 +/- 0.056 (in 3 folds),0.917,0.761,disease.separate_past_exposures,252,0,252,0.0,False


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,ridge_cv,ridge_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.983 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.983 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.002 (in 3 folds) Accuracy: 0.897 +/- 0.035 (in 3 folds) MCC: 0.696 +/- 0.105 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.897 MCC: 0.695 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.97 0.57 0.72 58 Healthy/Background 0.89 0.99 0.94 194  accuracy 0.90 252  macro avg 0.93 0.78 0.83 252  weighted avg 0.90 0.90 0.89 252,Per-fold scores: ROC-AUC (weighted OvO): 0.983 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.983 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.002 (in 3 folds) Accuracy: 0.933 +/- 0.014 (in 3 folds) MCC: 0.820 +/- 0.037 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.933 MCC: 0.820 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.82 0.91 0.86 58 Healthy/Background 0.97 0.94 0.96 194  accuracy 0.93 252  macro avg 0.89 0.93 0.91 252  weighted avg 0.94 0.93 0.93 252,Per-fold scores: ROC-AUC (weighted OvO): 0.983 +/- 0.003 (in 3 folds) ROC-AUC (macro OvO): 0.983 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.000 (in 3 folds) Accuracy: 0.798 +/- 0.039 (in 3 folds) MCC: 0.236 +/- 0.258 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.798 MCC: 0.309 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 1.00 0.12 0.22 58 Healthy/Background 0.79 1.00 0.88 194  accuracy 0.80 252  macro avg 0.90 0.56 0.55 252  weighted avg 0.84 0.80 0.73 252,Per-fold scores: ROC-AUC (weighted OvO): 0.983 +/- 0.003 (in 3 folds) ROC-AUC (macro OvO): 0.983 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.000 (in 3 folds) Accuracy: 0.940 +/- 0.024 (in 3 folds) MCC: 0.835 +/- 0.069 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.940 MCC: 0.835 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.85 0.90 0.87 58 Healthy/Background 0.97 0.95 0.96 194  accuracy 0.94 252  macro avg 0.91 0.93 0.92 252  weighted avg 0.94 0.94 0.94 252
,,,
,,,


lasso_multiclass,lasso_multiclass.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.982 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.982 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.002 (in 3 folds) Accuracy: 0.940 +/- 0.013 (in 3 folds) MCC: 0.836 +/- 0.033 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.940 MCC: 0.835 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.85 0.90 0.87 58 Healthy/Background 0.97 0.95 0.96 194  accuracy 0.94 252  macro avg 0.91 0.93 0.92 252  weighted avg 0.94 0.94 0.94 252,Per-fold scores: ROC-AUC (weighted OvO): 0.982 +/- 0.006 (in 3 folds) ROC-AUC (macro OvO): 0.982 +/- 0.006 (in 3 folds) au-PRC (weighted OvO): 0.995 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.995 +/- 0.002 (in 3 folds) Accuracy: 0.913 +/- 0.026 (in 3 folds) MCC: 0.778 +/- 0.039 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.913 MCC: 0.772 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.76 0.90 0.83 58 Healthy/Background 0.97 0.92 0.94 194  accuracy 0.91 252  macro avg 0.87 0.91 0.88 252  weighted avg 0.92 0.91 0.92 252,Per-fold scores: ROC-AUC (weighted OvO): 0.980 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.980 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.994 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.994 +/- 0.004 (in 3 folds) Accuracy: 0.897 +/- 0.018 (in 3 folds) MCC: 0.697 +/- 0.057 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.897 MCC: 0.698 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 1.00 0.55 0.71 58 Healthy/Background 0.88 1.00 0.94 194  accuracy 0.90 252  macro avg 0.94 0.78 0.82 252  weighted avg 0.91 0.90 0.89 252,Per-fold scores: ROC-AUC (weighted OvO): 0.980 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.980 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.994 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.994 +/- 0.004 (in 3 folds) Accuracy: 0.928 +/- 0.021 (in 3 folds) MCC: 0.801 +/- 0.064 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.929 MCC: 0.798 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.84 0.84 0.84 58 Healthy/Background 0.95 0.95 0.95 194  accuracy 0.93 252  macro avg 0.90 0.90 0.90 252  weighted avg 0.93 0.93 0.93 252
,,,
,,,


lasso_cv,lasso_cv.decision_thresholds_tuned,linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.979 +/- 0.001 (in 3 folds) ROC-AUC (macro OvO): 0.979 +/- 0.001 (in 3 folds) au-PRC (weighted OvO): 0.994 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.994 +/- 0.000 (in 3 folds) Accuracy: 0.901 +/- 0.048 (in 3 folds) MCC: 0.706 +/- 0.148 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.901 MCC: 0.704 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.92 0.62 0.74 58 Healthy/Background 0.90 0.98 0.94 194  accuracy 0.90 252  macro avg 0.91 0.80 0.84 252  weighted avg 0.90 0.90 0.89 252,Per-fold scores: ROC-AUC (weighted OvO): 0.979 +/- 0.001 (in 3 folds) ROC-AUC (macro OvO): 0.979 +/- 0.001 (in 3 folds) au-PRC (weighted OvO): 0.994 +/- 0.000 (in 3 folds) au-PRC (macro OvO): 0.994 +/- 0.000 (in 3 folds) Accuracy: 0.917 +/- 0.020 (in 3 folds) MCC: 0.762 +/- 0.056 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.917 MCC: 0.761 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.84 0.79 0.81 58 Healthy/Background 0.94 0.95 0.95 194  accuracy 0.92 252  macro avg 0.89 0.87 0.88 252  weighted avg 0.92 0.92 0.92 252,Per-fold scores: ROC-AUC (weighted OvO): 0.974 +/- 0.024 (in 3 folds) ROC-AUC (macro OvO): 0.974 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.008 (in 3 folds) Accuracy: 0.941 +/- 0.012 (in 3 folds) MCC: 0.836 +/- 0.029 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.940 MCC: 0.833 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.86 0.88 0.87 58 Healthy/Background 0.96 0.96 0.96 194  accuracy 0.94 252  macro avg 0.91 0.92 0.92 252  weighted avg 0.94 0.94 0.94 252,Per-fold scores: ROC-AUC (weighted OvO): 0.974 +/- 0.024 (in 3 folds) ROC-AUC (macro OvO): 0.974 +/- 0.024 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.008 (in 3 folds) Accuracy: 0.933 +/- 0.030 (in 3 folds) MCC: 0.805 +/- 0.088 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.933 MCC: 0.804 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.90 0.79 0.84 58 Healthy/Background 0.94 0.97 0.96 194  accuracy 0.93 252  macro avg 0.92 0.88 0.90 252  weighted avg 0.93 0.93 0.93 252
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.973 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.973 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.004 (in 3 folds) Accuracy: 0.913 +/- 0.008 (in 3 folds) MCC: 0.745 +/- 0.024 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.913 MCC: 0.742 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.91 0.69 0.78 58 Healthy/Background 0.91 0.98 0.95 194  accuracy 0.91 252  macro avg 0.91 0.83 0.86 252  weighted avg 0.91 0.91 0.91 252,Per-fold scores: ROC-AUC (weighted OvO): 0.973 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.973 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.992 +/- 0.004 (in 3 folds) au-PRC (macro OvO): 0.992 +/- 0.004 (in 3 folds) Accuracy: 0.901 +/- 0.029 (in 3 folds) MCC: 0.719 +/- 0.076 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.901 MCC: 0.718 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  Covid19 0.79 0.78 0.78 58 Healthy/Background 0.93 0.94 0.94 194  accuracy 0.90 252  macro avg 0.86 0.86 0.86 252  weighted avg 0.90 0.90 0.90 252
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.932 +/- 0.036 (in 3 folds),0.932 +/- 0.036 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.851 +/- 0.017 (in 3 folds),0.698 +/- 0.055 (in 3 folds),0.851,0.695,194.0,0.0,194.0,0.0,False
linearsvm_ovr,0.932 +/- 0.028 (in 3 folds),0.932 +/- 0.028 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.861 +/- 0.030 (in 3 folds),0.709 +/- 0.078 (in 3 folds),0.861,0.708,194.0,0.0,194.0,0.0,False
lasso_cv,0.924 +/- 0.022 (in 3 folds),0.924 +/- 0.022 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194.0,0.0,194.0,0.0,False
elasticnet_cv,0.922 +/- 0.026 (in 3 folds),0.922 +/- 0.026 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194.0,0.0,194.0,0.0,False
ridge_cv,0.921 +/- 0.036 (in 3 folds),0.921 +/- 0.036 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.753 +/- 0.081 (in 3 folds),0.348 +/- 0.304 (in 3 folds),0.753,0.4,194.0,0.0,194.0,0.0,False
xgboost,0.911 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.830 +/- 0.028 (in 3 folds),0.614 +/- 0.053 (in 3 folds),0.83,0.611,194.0,0.0,194.0,0.0,False
rf_multiclass,0.899 +/- 0.034 (in 3 folds),0.899 +/- 0.034 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.794 +/- 0.039 (in 3 folds),0.524 +/- 0.080 (in 3 folds),0.794,0.521,194.0,0.0,194.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.67,0.0,194.0,0.0,194.0,0.0,True
dummy_stratified,0.467 +/- 0.016 (in 3 folds),0.467 +/- 0.016 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.541 +/- 0.012 (in 3 folds),-0.068 +/- 0.032 (in 3 folds),0.541,-0.067,194.0,0.0,194.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.932 +/- 0.036 (in 3 folds),0.932 +/- 0.036 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.972 +/- 0.015 (in 3 folds),0.851 +/- 0.017 (in 3 folds),0.698 +/- 0.055 (in 3 folds),0.851,0.695,194,0,194,0.0,False
linearsvm_ovr,0.932 +/- 0.028 (in 3 folds),0.932 +/- 0.028 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.861 +/- 0.030 (in 3 folds),0.709 +/- 0.078 (in 3 folds),0.861,0.708,194,0,194,0.0,False
lasso_cv,0.924 +/- 0.022 (in 3 folds),0.924 +/- 0.022 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.968 +/- 0.009 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194,0,194,0.0,False
elasticnet_cv,0.922 +/- 0.026 (in 3 folds),0.922 +/- 0.026 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.967 +/- 0.011 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.680 +/- 0.056 (in 3 folds),0.845,0.677,194,0,194,0.0,False
ridge_cv,0.921 +/- 0.036 (in 3 folds),0.921 +/- 0.036 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.967 +/- 0.014 (in 3 folds),0.753 +/- 0.081 (in 3 folds),0.348 +/- 0.304 (in 3 folds),0.753,0.4,194,0,194,0.0,False
xgboost,0.911 +/- 0.018 (in 3 folds),0.911 +/- 0.018 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.960 +/- 0.008 (in 3 folds),0.830 +/- 0.028 (in 3 folds),0.614 +/- 0.053 (in 3 folds),0.83,0.611,194,0,194,0.0,False
rf_multiclass,0.899 +/- 0.034 (in 3 folds),0.899 +/- 0.034 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.957 +/- 0.017 (in 3 folds),0.794 +/- 0.039 (in 3 folds),0.524 +/- 0.080 (in 3 folds),0.794,0.521,194,0,194,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.670 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.67,0.0,194,0,194,0.0,True
dummy_stratified,0.467 +/- 0.016 (in 3 folds),0.467 +/- 0.016 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.541 +/- 0.012 (in 3 folds),-0.068 +/- 0.032 (in 3 folds),0.541,-0.067,194,0,194,0.0,False


lasso_multiclass,linearsvm_ovr,lasso_cv,elasticnet_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.932 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.932 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.972 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.972 +/- 0.015 (in 3 folds) Accuracy: 0.851 +/- 0.017 (in 3 folds) MCC: 0.698 +/- 0.055 (in 3 folds) Global scores: Accuracy: 0.851 MCC: 0.695 Global classification report:  precision recall f1-score support  HIV 0.72 0.91 0.80 64 Healthy/Background 0.95 0.82 0.88 130  accuracy 0.85 194  macro avg 0.83 0.86 0.84 194  weighted avg 0.87 0.85 0.85 194,Per-fold scores: ROC-AUC (weighted OvO): 0.932 +/- 0.028 (in 3 folds) ROC-AUC (macro OvO): 0.932 +/- 0.028 (in 3 folds) au-PRC (weighted OvO): 0.971 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.971 +/- 0.012 (in 3 folds) Accuracy: 0.861 +/- 0.030 (in 3 folds) MCC: 0.709 +/- 0.078 (in 3 folds) Global scores: Accuracy: 0.861 MCC: 0.708 Global classification report:  precision recall f1-score support  HIV 0.74 0.89 0.81 64 Healthy/Background 0.94 0.85 0.89 130  accuracy 0.86 194  macro avg 0.84 0.87 0.85 194  weighted avg 0.87 0.86 0.86 194,Per-fold scores: ROC-AUC (weighted OvO): 0.924 +/- 0.022 (in 3 folds) ROC-AUC (macro OvO): 0.924 +/- 0.022 (in 3 folds) au-PRC (weighted OvO): 0.968 +/- 0.009 (in 3 folds) au-PRC (macro OvO): 0.968 +/- 0.009 (in 3 folds) Accuracy: 0.845 +/- 0.014 (in 3 folds) MCC: 0.680 +/- 0.056 (in 3 folds) Global scores: Accuracy: 0.845 MCC: 0.677 Global classification report:  precision recall f1-score support  HIV 0.72 0.88 0.79 64 Healthy/Background 0.93 0.83 0.88 130  accuracy 0.85 194  macro avg 0.82 0.85 0.83 194  weighted avg 0.86 0.85 0.85 194,Per-fold scores: ROC-AUC (weighted OvO): 0.922 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.922 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.967 +/- 0.011 (in 3 folds) au-PRC (macro OvO): 0.967 +/- 0.011 (in 3 folds) Accuracy: 0.845 +/- 0.014 (in 3 folds) MCC: 0.680 +/- 0.056 (in 3 folds) Global scores: Accuracy: 0.845 MCC: 0.677 Global classification report:  precision recall f1-score support  HIV 0.72 0.88 0.79 64 Healthy/Background 0.93 0.83 0.88 130  accuracy 0.85 194  macro avg 0.82 0.85 0.83 194  weighted avg 0.86 0.85 0.85 194
,,,
,,,
,,,


ridge_cv,xgboost,rf_multiclass,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.921 +/- 0.036 (in 3 folds) ROC-AUC (macro OvO): 0.921 +/- 0.036 (in 3 folds) au-PRC (weighted OvO): 0.967 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.967 +/- 0.014 (in 3 folds) Accuracy: 0.753 +/- 0.081 (in 3 folds) MCC: 0.348 +/- 0.304 (in 3 folds) Global scores: Accuracy: 0.753 MCC: 0.400 Global classification report:  precision recall f1-score support  HIV 0.81 0.33 0.47 64 Healthy/Background 0.74 0.96 0.84 130  accuracy 0.75 194  macro avg 0.78 0.64 0.65 194  weighted avg 0.77 0.75 0.72 194,Per-fold scores: ROC-AUC (weighted OvO): 0.911 +/- 0.018 (in 3 folds) ROC-AUC (macro OvO): 0.911 +/- 0.018 (in 3 folds) au-PRC (weighted OvO): 0.960 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.960 +/- 0.008 (in 3 folds) Accuracy: 0.830 +/- 0.028 (in 3 folds) MCC: 0.614 +/- 0.053 (in 3 folds) Global scores: Accuracy: 0.830 MCC: 0.611 Global classification report:  precision recall f1-score support  HIV 0.75 0.72 0.74 64 Healthy/Background 0.86 0.88 0.87 130  accuracy 0.83 194  macro avg 0.81 0.80 0.81 194  weighted avg 0.83 0.83 0.83 194,Per-fold scores: ROC-AUC (weighted OvO): 0.899 +/- 0.034 (in 3 folds) ROC-AUC (macro OvO): 0.899 +/- 0.034 (in 3 folds) au-PRC (weighted OvO): 0.957 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.957 +/- 0.017 (in 3 folds) Accuracy: 0.794 +/- 0.039 (in 3 folds) MCC: 0.524 +/- 0.080 (in 3 folds) Global scores: Accuracy: 0.794 MCC: 0.521 Global classification report:  precision recall f1-score support  HIV 0.71 0.62 0.67 64 Healthy/Background 0.83 0.88 0.85 130  accuracy 0.79 194  macro avg 0.77 0.75 0.76 194  weighted avg 0.79 0.79 0.79 194,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.670 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.670 +/- 0.008 (in 3 folds) Accuracy: 0.670 +/- 0.008 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.670 MCC: 0.000 Global classification report:  precision recall f1-score support  HIV 0.00 0.00 0.00 64 Healthy/Background 0.67 1.00 0.80 130  accuracy 0.67 194  macro avg 0.34 0.50 0.40 194  weighted avg 0.45 0.67 0.54 194
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.467 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.467 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.656 +/- 0.003 (in 3 folds) au-PRC (macro OvO): 0.656 +/- 0.003 (in 3 folds) Accuracy: 0.541 +/- 0.012 (in 3 folds) MCC: -0.068 +/- 0.032 (in 3 folds) Global scores: Accuracy: 0.541 MCC: -0.067 Global classification report:  precision recall f1-score support  HIV 0.28 0.25 0.26 64 Healthy/Background 0.65 0.68 0.67 130  accuracy 0.54 194  macro avg 0.47 0.47 0.47 194  weighted avg 0.53 0.54 0.53 194


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.934 +/- 0.016 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.747 +/- 0.073 (in 3 folds),0.335 +/- 0.291 (in 3 folds),0.747,0.392,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.934 +/- 0.016 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.774 +/- 0.097 (in 3 folds),0.410 +/- 0.356 (in 3 folds),0.774,0.464,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
lasso_multiclass,0.930 +/- 0.014 (in 3 folds),0.930 +/- 0.014 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.857 +/- 0.056 (in 3 folds),0.714 +/- 0.099 (in 3 folds),0.856,0.71,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.930 +/- 0.014 (in 3 folds),0.930 +/- 0.014 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.833 +/- 0.081 (in 3 folds),0.676 +/- 0.126 (in 3 folds),0.832,0.654,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
linearsvm_ovr,0.928 +/- 0.011 (in 3 folds),0.928 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.853 +/- 0.028 (in 3 folds),0.690 +/- 0.052 (in 3 folds),0.853,0.689,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.928 +/- 0.011 (in 3 folds),0.928 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.826 +/- 0.050 (in 3 folds),0.653 +/- 0.067 (in 3 folds),0.825,0.642,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
lasso_cv,0.927 +/- 0.026 (in 3 folds),0.927 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.863 +/- 0.035 (in 3 folds),0.706 +/- 0.071 (in 3 folds),0.863,0.705,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.927 +/- 0.026 (in 3 folds),0.927 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.840 +/- 0.081 (in 3 folds),0.700 +/- 0.116 (in 3 folds),0.839,0.686,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
elasticnet_cv,0.927 +/- 0.023 (in 3 folds),0.927 +/- 0.023 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.832 +/- 0.005 (in 3 folds),0.628 +/- 0.017 (in 3 folds),0.832,0.627,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.927 +/- 0.023 (in 3 folds),0.927 +/- 0.023 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.829 +/- 0.082 (in 3 folds),0.676 +/- 0.132 (in 3 folds),0.829,0.662,disease.separate_past_exposures,292.0,0.0,292.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.934 +/- 0.016 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.747 +/- 0.073 (in 3 folds),0.335 +/- 0.291 (in 3 folds),0.747,0.392,disease.separate_past_exposures,292,0,292,0.0,False
ridge_cv.decision_thresholds_tuned,0.934 +/- 0.016 (in 3 folds),0.934 +/- 0.016 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.774 +/- 0.097 (in 3 folds),0.410 +/- 0.356 (in 3 folds),0.774,0.464,disease.separate_past_exposures,292,0,292,0.0,False
lasso_multiclass,0.930 +/- 0.014 (in 3 folds),0.930 +/- 0.014 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.857 +/- 0.056 (in 3 folds),0.714 +/- 0.099 (in 3 folds),0.856,0.71,disease.separate_past_exposures,292,0,292,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.930 +/- 0.014 (in 3 folds),0.930 +/- 0.014 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.970 +/- 0.007 (in 3 folds),0.833 +/- 0.081 (in 3 folds),0.676 +/- 0.126 (in 3 folds),0.832,0.654,disease.separate_past_exposures,292,0,292,0.0,False
linearsvm_ovr,0.928 +/- 0.011 (in 3 folds),0.928 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.853 +/- 0.028 (in 3 folds),0.690 +/- 0.052 (in 3 folds),0.853,0.689,disease.separate_past_exposures,292,0,292,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.928 +/- 0.011 (in 3 folds),0.928 +/- 0.011 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.826 +/- 0.050 (in 3 folds),0.653 +/- 0.067 (in 3 folds),0.825,0.642,disease.separate_past_exposures,292,0,292,0.0,False
lasso_cv,0.927 +/- 0.026 (in 3 folds),0.927 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.863 +/- 0.035 (in 3 folds),0.706 +/- 0.071 (in 3 folds),0.863,0.705,disease.separate_past_exposures,292,0,292,0.0,False
lasso_cv.decision_thresholds_tuned,0.927 +/- 0.026 (in 3 folds),0.927 +/- 0.026 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.840 +/- 0.081 (in 3 folds),0.700 +/- 0.116 (in 3 folds),0.839,0.686,disease.separate_past_exposures,292,0,292,0.0,False
elasticnet_cv,0.927 +/- 0.023 (in 3 folds),0.927 +/- 0.023 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.832 +/- 0.005 (in 3 folds),0.628 +/- 0.017 (in 3 folds),0.832,0.627,disease.separate_past_exposures,292,0,292,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.927 +/- 0.023 (in 3 folds),0.927 +/- 0.023 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.968 +/- 0.010 (in 3 folds),0.829 +/- 0.082 (in 3 folds),0.676 +/- 0.132 (in 3 folds),0.829,0.662,disease.separate_past_exposures,292,0,292,0.0,False


ridge_cv,ridge_cv.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.934 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.934 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.972 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.972 +/- 0.007 (in 3 folds) Accuracy: 0.747 +/- 0.073 (in 3 folds) MCC: 0.335 +/- 0.291 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.747 MCC: 0.392 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.80 0.33 0.46 98 Healthy/Background 0.74 0.96 0.83 194  accuracy 0.75 292  macro avg 0.77 0.64 0.65 292  weighted avg 0.76 0.75 0.71 292,Per-fold scores: ROC-AUC (weighted OvO): 0.934 +/- 0.016 (in 3 folds) ROC-AUC (macro OvO): 0.934 +/- 0.016 (in 3 folds) au-PRC (weighted OvO): 0.972 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.972 +/- 0.007 (in 3 folds) Accuracy: 0.774 +/- 0.097 (in 3 folds) MCC: 0.410 +/- 0.356 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.774 MCC: 0.464 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.76 0.48 0.59 98 Healthy/Background 0.78 0.92 0.84 194  accuracy 0.77 292  macro avg 0.77 0.70 0.72 292  weighted avg 0.77 0.77 0.76 292,Per-fold scores: ROC-AUC (weighted OvO): 0.930 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.930 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.970 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.970 +/- 0.007 (in 3 folds) Accuracy: 0.857 +/- 0.056 (in 3 folds) MCC: 0.714 +/- 0.099 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.856 MCC: 0.710 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.73 0.92 0.81 98 Healthy/Background 0.95 0.82 0.88 194  accuracy 0.86 292  macro avg 0.84 0.87 0.85 292  weighted avg 0.88 0.86 0.86 292,Per-fold scores: ROC-AUC (weighted OvO): 0.930 +/- 0.014 (in 3 folds) ROC-AUC (macro OvO): 0.930 +/- 0.014 (in 3 folds) au-PRC (weighted OvO): 0.970 +/- 0.007 (in 3 folds) au-PRC (macro OvO): 0.970 +/- 0.007 (in 3 folds) Accuracy: 0.833 +/- 0.081 (in 3 folds) MCC: 0.676 +/- 0.126 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.832 MCC: 0.654 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.70 0.87 0.78 98 Healthy/Background 0.92 0.81 0.87 194  accuracy 0.83 292  macro avg 0.81 0.84 0.82 292  weighted avg 0.85 0.83 0.84 292
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,lasso_cv,lasso_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.928 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.928 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.968 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.968 +/- 0.006 (in 3 folds) Accuracy: 0.853 +/- 0.028 (in 3 folds) MCC: 0.690 +/- 0.052 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.853 MCC: 0.689 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.74 0.87 0.80 98 Healthy/Background 0.93 0.85 0.88 194  accuracy 0.85 292  macro avg 0.83 0.86 0.84 292  weighted avg 0.86 0.85 0.86 292,Per-fold scores: ROC-AUC (weighted OvO): 0.928 +/- 0.011 (in 3 folds) ROC-AUC (macro OvO): 0.928 +/- 0.011 (in 3 folds) au-PRC (weighted OvO): 0.968 +/- 0.006 (in 3 folds) au-PRC (macro OvO): 0.968 +/- 0.006 (in 3 folds) Accuracy: 0.826 +/- 0.050 (in 3 folds) MCC: 0.653 +/- 0.067 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.825 MCC: 0.642 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.69 0.87 0.77 98 Healthy/Background 0.92 0.80 0.86 194  accuracy 0.83 292  macro avg 0.81 0.84 0.81 292  weighted avg 0.85 0.83 0.83 292,Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.927 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.969 +/- 0.011 (in 3 folds) au-PRC (macro OvO): 0.969 +/- 0.011 (in 3 folds) Accuracy: 0.863 +/- 0.035 (in 3 folds) MCC: 0.706 +/- 0.071 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.863 MCC: 0.705 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.76 0.86 0.81 98 Healthy/Background 0.92 0.87 0.89 194  accuracy 0.86 292  macro avg 0.84 0.86 0.85 292  weighted avg 0.87 0.86 0.86 292,Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.927 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.969 +/- 0.011 (in 3 folds) au-PRC (macro OvO): 0.969 +/- 0.011 (in 3 folds) Accuracy: 0.840 +/- 0.081 (in 3 folds) MCC: 0.700 +/- 0.116 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.839 MCC: 0.686 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.69 0.93 0.79 98 Healthy/Background 0.96 0.79 0.87 194  accuracy 0.84 292  macro avg 0.83 0.86 0.83 292  weighted avg 0.87 0.84 0.84 292
,,,
,,,


elasticnet_cv,elasticnet_cv.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.927 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.968 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.968 +/- 0.010 (in 3 folds) Accuracy: 0.832 +/- 0.005 (in 3 folds) MCC: 0.628 +/- 0.017 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.832 MCC: 0.627 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.74 0.77 0.75 98 Healthy/Background 0.88 0.87 0.87 194  accuracy 0.83 292  macro avg 0.81 0.82 0.81 292  weighted avg 0.83 0.83 0.83 292,Per-fold scores: ROC-AUC (weighted OvO): 0.927 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.927 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.968 +/- 0.010 (in 3 folds) au-PRC (macro OvO): 0.968 +/- 0.010 (in 3 folds) Accuracy: 0.829 +/- 0.082 (in 3 folds) MCC: 0.676 +/- 0.132 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.829 MCC: 0.662 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.68 0.91 0.78 98 Healthy/Background 0.94 0.79 0.86 194  accuracy 0.83 292  macro avg 0.81 0.85 0.82 292  weighted avg 0.86 0.83 0.83 292,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.923 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.963 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.963 +/- 0.014 (in 3 folds) Accuracy: 0.839 +/- 0.056 (in 3 folds) MCC: 0.635 +/- 0.126 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.839 MCC: 0.632 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.79 0.71 0.75 98 Healthy/Background 0.86 0.90 0.88 194  accuracy 0.84 292  macro avg 0.82 0.81 0.82 292  weighted avg 0.84 0.84 0.84 292,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.030 (in 3 folds) ROC-AUC (macro OvO): 0.923 +/- 0.030 (in 3 folds) au-PRC (weighted OvO): 0.963 +/- 0.014 (in 3 folds) au-PRC (macro OvO): 0.963 +/- 0.014 (in 3 folds) Accuracy: 0.809 +/- 0.074 (in 3 folds) MCC: 0.651 +/- 0.075 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.808 MCC: 0.639 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.65 0.93 0.76 98 Healthy/Background 0.95 0.75 0.84 194  accuracy 0.81 292  macro avg 0.80 0.84 0.80 292  weighted avg 0.85 0.81 0.81 292
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.923 +/- 0.029 (in 3 folds) au-PRC (weighted OvO): 0.963 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.963 +/- 0.015 (in 3 folds) Accuracy: 0.836 +/- 0.029 (in 3 folds) MCC: 0.645 +/- 0.039 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.836 MCC: 0.638 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.74 0.79 0.76 98 Healthy/Background 0.89 0.86 0.87 194  accuracy 0.84 292  macro avg 0.81 0.82 0.82 292  weighted avg 0.84 0.84 0.84 292,Per-fold scores: ROC-AUC (weighted OvO): 0.923 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.923 +/- 0.029 (in 3 folds) au-PRC (weighted OvO): 0.963 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.963 +/- 0.015 (in 3 folds) Accuracy: 0.815 +/- 0.066 (in 3 folds) MCC: 0.655 +/- 0.066 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.815 MCC: 0.641 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support  HIV 0.66 0.91 0.77 98 Healthy/Background 0.94 0.77 0.85 194  accuracy 0.82 292  macro avg 0.80 0.84 0.81 292  weighted avg 0.85 0.82 0.82 292
,
,


---

# GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy trained on train_smaller set

## Specimen predictions on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.985 +/- 0.010 (in 3 folds),0.985 +/- 0.010 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.918 +/- 0.037 (in 3 folds),0.773 +/- 0.105 (in 3 folds),0.919,0.773,172.0,0.0,172.0,0.0,False
rf_multiclass,0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.755 +/- 0.135 (in 3 folds),0.913,0.756,172.0,0.0,172.0,0.0,False
lasso_cv,0.984 +/- 0.008 (in 3 folds),0.984 +/- 0.008 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.753 +/- 0.143 (in 3 folds),0.913,0.755,172.0,0.0,172.0,0.0,False
ridge_cv,0.983 +/- 0.008 (in 3 folds),0.983 +/- 0.008 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.872 +/- 0.103 (in 3 folds),0.537 +/- 0.468 (in 3 folds),0.872,0.633,172.0,0.0,172.0,0.0,False
lasso_multiclass,0.979 +/- 0.009 (in 3 folds),0.979 +/- 0.009 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.924 +/- 0.009 (in 3 folds),0.796 +/- 0.021 (in 3 folds),0.924,0.794,172.0,0.0,172.0,0.0,False
linearsvm_ovr,0.973 +/- 0.012 (in 3 folds),0.973 +/- 0.012 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.919 +/- 0.010 (in 3 folds),0.777 +/- 0.026 (in 3 folds),0.919,0.774,172.0,0.0,172.0,0.0,False
xgboost,0.952 +/- 0.029 (in 3 folds),0.952 +/- 0.029 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.889 +/- 0.037 (in 3 folds),0.697 +/- 0.118 (in 3 folds),0.89,0.694,172.0,0.0,172.0,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.756 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.756,0.0,172.0,0.0,172.0,0.0,True
dummy_stratified,0.401 +/- 0.026 (in 3 folds),0.401 +/- 0.026 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.558 +/- 0.018 (in 3 folds),-0.197 +/- 0.053 (in 3 folds),0.558,-0.197,172.0,0.0,172.0,0.0,False
"All results, sorted",,,,,,,,,,,,,

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.985 +/- 0.010 (in 3 folds),0.985 +/- 0.010 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.960 +/- 0.023 (in 3 folds),0.918 +/- 0.037 (in 3 folds),0.773 +/- 0.105 (in 3 folds),0.919,0.773,172,0,172,0.0,False
rf_multiclass,0.984 +/- 0.013 (in 3 folds),0.984 +/- 0.013 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.962 +/- 0.029 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.755 +/- 0.135 (in 3 folds),0.913,0.756,172,0,172,0.0,False
lasso_cv,0.984 +/- 0.008 (in 3 folds),0.984 +/- 0.008 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.954 +/- 0.022 (in 3 folds),0.913 +/- 0.047 (in 3 folds),0.753 +/- 0.143 (in 3 folds),0.913,0.755,172,0,172,0.0,False
ridge_cv,0.983 +/- 0.008 (in 3 folds),0.983 +/- 0.008 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.959 +/- 0.012 (in 3 folds),0.872 +/- 0.103 (in 3 folds),0.537 +/- 0.468 (in 3 folds),0.872,0.633,172,0,172,0.0,False
lasso_multiclass,0.979 +/- 0.009 (in 3 folds),0.979 +/- 0.009 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.944 +/- 0.026 (in 3 folds),0.924 +/- 0.009 (in 3 folds),0.796 +/- 0.021 (in 3 folds),0.924,0.794,172,0,172,0.0,False
linearsvm_ovr,0.973 +/- 0.012 (in 3 folds),0.973 +/- 0.012 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.939 +/- 0.021 (in 3 folds),0.919 +/- 0.010 (in 3 folds),0.777 +/- 0.026 (in 3 folds),0.919,0.774,172,0,172,0.0,False
xgboost,0.952 +/- 0.029 (in 3 folds),0.952 +/- 0.029 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.874 +/- 0.059 (in 3 folds),0.889 +/- 0.037 (in 3 folds),0.697 +/- 0.118 (in 3 folds),0.89,0.694,172,0,172,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.244 +/- 0.002 (in 3 folds),0.756 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.756,0.0,172,0,172,0.0,True
dummy_stratified,0.401 +/- 0.026 (in 3 folds),0.401 +/- 0.026 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.231 +/- 0.002 (in 3 folds),0.558 +/- 0.018 (in 3 folds),-0.197 +/- 0.053 (in 3 folds),0.558,-0.197,172,0,172,0.0,False


elasticnet_cv,rf_multiclass,lasso_cv,ridge_cv
Per-fold scores: ROC-AUC (weighted OvO): 0.985 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.985 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.960 +/- 0.023 (in 3 folds) au-PRC (macro OvO): 0.960 +/- 0.023 (in 3 folds) Accuracy: 0.918 +/- 0.037 (in 3 folds) MCC: 0.773 +/- 0.105 (in 3 folds) Global scores: Accuracy: 0.919 MCC: 0.773 Global classification report:  precision recall f1-score support Healthy/Background 0.91 0.99 0.95 130  Lupus 0.97 0.69 0.81 42  accuracy 0.92 172  macro avg 0.94 0.84 0.88 172  weighted avg 0.92 0.92 0.91 172,Per-fold scores: ROC-AUC (weighted OvO): 0.984 +/- 0.013 (in 3 folds) ROC-AUC (macro OvO): 0.984 +/- 0.013 (in 3 folds) au-PRC (weighted OvO): 0.962 +/- 0.029 (in 3 folds) au-PRC (macro OvO): 0.962 +/- 0.029 (in 3 folds) Accuracy: 0.913 +/- 0.047 (in 3 folds) MCC: 0.755 +/- 0.135 (in 3 folds) Global scores: Accuracy: 0.913 MCC: 0.756 Global classification report:  precision recall f1-score support Healthy/Background 0.90 0.99 0.95 130  Lupus 0.97 0.67 0.79 42  accuracy 0.91 172  macro avg 0.93 0.83 0.87 172  weighted avg 0.92 0.91 0.91 172,Per-fold scores: ROC-AUC (weighted OvO): 0.984 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.984 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.954 +/- 0.022 (in 3 folds) au-PRC (macro OvO): 0.954 +/- 0.022 (in 3 folds) Accuracy: 0.913 +/- 0.047 (in 3 folds) MCC: 0.753 +/- 0.143 (in 3 folds) Global scores: Accuracy: 0.913 MCC: 0.755 Global classification report:  precision recall f1-score support Healthy/Background 0.92 0.97 0.94 130  Lupus 0.89 0.74 0.81 42  accuracy 0.91 172  macro avg 0.90 0.85 0.87 172  weighted avg 0.91 0.91 0.91 172,Per-fold scores: ROC-AUC (weighted OvO): 0.983 +/- 0.008 (in 3 folds) ROC-AUC (macro OvO): 0.983 +/- 0.008 (in 3 folds) au-PRC (weighted OvO): 0.959 +/- 0.012 (in 3 folds) au-PRC (macro OvO): 0.959 +/- 0.012 (in 3 folds) Accuracy: 0.872 +/- 0.103 (in 3 folds) MCC: 0.537 +/- 0.468 (in 3 folds) Global scores: Accuracy: 0.872 MCC: 0.633 Global classification report:  precision recall f1-score support Healthy/Background 0.86 0.99 0.92 130  Lupus 0.95 0.50 0.66 42  accuracy 0.87 172  macro avg 0.91 0.75 0.79 172  weighted avg 0.88 0.87 0.86 172
,,,
,,,
,,,


lasso_multiclass,linearsvm_ovr,xgboost,dummy_most_frequent
Per-fold scores: ROC-AUC (weighted OvO): 0.979 +/- 0.009 (in 3 folds) ROC-AUC (macro OvO): 0.979 +/- 0.009 (in 3 folds) au-PRC (weighted OvO): 0.944 +/- 0.026 (in 3 folds) au-PRC (macro OvO): 0.944 +/- 0.026 (in 3 folds) Accuracy: 0.924 +/- 0.009 (in 3 folds) MCC: 0.796 +/- 0.021 (in 3 folds) Global scores: Accuracy: 0.924 MCC: 0.794 Global classification report:  precision recall f1-score support Healthy/Background 0.95 0.95 0.95 130  Lupus 0.85 0.83 0.84 42  accuracy 0.92 172  macro avg 0.90 0.89 0.90 172  weighted avg 0.92 0.92 0.92 172,Per-fold scores: ROC-AUC (weighted OvO): 0.973 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.973 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.939 +/- 0.021 (in 3 folds) au-PRC (macro OvO): 0.939 +/- 0.021 (in 3 folds) Accuracy: 0.919 +/- 0.010 (in 3 folds) MCC: 0.777 +/- 0.026 (in 3 folds) Global scores: Accuracy: 0.919 MCC: 0.774 Global classification report:  precision recall f1-score support Healthy/Background 0.93 0.96 0.95 130  Lupus 0.87 0.79 0.82 42  accuracy 0.92 172  macro avg 0.90 0.87 0.89 172  weighted avg 0.92 0.92 0.92 172,Per-fold scores: ROC-AUC (weighted OvO): 0.952 +/- 0.029 (in 3 folds) ROC-AUC (macro OvO): 0.952 +/- 0.029 (in 3 folds) au-PRC (weighted OvO): 0.874 +/- 0.059 (in 3 folds) au-PRC (macro OvO): 0.874 +/- 0.059 (in 3 folds) Accuracy: 0.889 +/- 0.037 (in 3 folds) MCC: 0.697 +/- 0.118 (in 3 folds) Global scores: Accuracy: 0.890 MCC: 0.694 Global classification report:  precision recall f1-score support Healthy/Background 0.92 0.94 0.93 130  Lupus 0.79 0.74 0.77 42  accuracy 0.89 172  macro avg 0.86 0.84 0.85 172  weighted avg 0.89 0.89 0.89 172,Per-fold scores: ROC-AUC (weighted OvO): 0.500 +/- 0.000 (in 3 folds) ROC-AUC (macro OvO): 0.500 +/- 0.000 (in 3 folds) au-PRC (weighted OvO): 0.244 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.244 +/- 0.002 (in 3 folds) Accuracy: 0.756 +/- 0.002 (in 3 folds) MCC: 0.000 +/- 0.000 (in 3 folds) Global scores: Accuracy: 0.756 MCC: 0.000 Global classification report:  precision recall f1-score support Healthy/Background 0.76 1.00 0.86 130  Lupus 0.00 0.00 0.00 42  accuracy 0.76 172  macro avg 0.38 0.50 0.43 172  weighted avg 0.57 0.76 0.65 172
,,,
,,,
,,,


dummy_stratified
Per-fold scores: ROC-AUC (weighted OvO): 0.401 +/- 0.026 (in 3 folds) ROC-AUC (macro OvO): 0.401 +/- 0.026 (in 3 folds) au-PRC (weighted OvO): 0.231 +/- 0.002 (in 3 folds) au-PRC (macro OvO): 0.231 +/- 0.002 (in 3 folds) Accuracy: 0.558 +/- 0.018 (in 3 folds) MCC: -0.197 +/- 0.053 (in 3 folds) Global scores: Accuracy: 0.558 MCC: -0.197 Global classification report:  precision recall f1-score support Healthy/Background 0.71 0.71 0.71 130  Lupus 0.10 0.10 0.10 42  accuracy 0.56 172  macro avg 0.40 0.40 0.40 172  weighted avg 0.56 0.56 0.56 172


## Apply train-smaller model -- Test set performance - With and without tuning on validation set

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.976 +/- 0.010 (in 3 folds),0.976 +/- 0.010 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.857 +/- 0.097 (in 3 folds),0.504 +/- 0.439 (in 3 folds),0.857,0.591,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
ridge_cv.decision_thresholds_tuned,0.976 +/- 0.010 (in 3 folds),0.976 +/- 0.010 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.873 +/- 0.109 (in 3 folds),0.550 +/- 0.477 (in 3 folds),0.872,0.635,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
lasso_multiclass,0.976 +/- 0.002 (in 3 folds),0.976 +/- 0.002 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.791 +/- 0.015 (in 3 folds),0.919,0.788,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.976 +/- 0.002 (in 3 folds),0.976 +/- 0.002 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.788 +/- 0.017 (in 3 folds),0.919,0.779,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
lasso_cv,0.975 +/- 0.004 (in 3 folds),0.975 +/- 0.004 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.774 +/- 0.035 (in 3 folds),0.919,0.774,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
lasso_cv.decision_thresholds_tuned,0.975 +/- 0.004 (in 3 folds),0.975 +/- 0.004 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.915 +/- 0.006 (in 3 folds),0.777 +/- 0.009 (in 3 folds),0.915,0.769,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
elasticnet_cv,0.975 +/- 0.003 (in 3 folds),0.975 +/- 0.003 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.888 +/- 0.023 (in 3 folds),0.683 +/- 0.065 (in 3 folds),0.888,0.682,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.975 +/- 0.003 (in 3 folds),0.975 +/- 0.003 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.796 +/- 0.018 (in 3 folds),0.919,0.791,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
linearsvm_ovr,0.966 +/- 0.012 (in 3 folds),0.966 +/- 0.012 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.926 +/- 0.008 (in 3 folds),0.802 +/- 0.025 (in 3 folds),0.926,0.8,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.966 +/- 0.012 (in 3 folds),0.966 +/- 0.012 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.930 +/- 0.011 (in 3 folds),0.818 +/- 0.018 (in 3 folds),0.93,0.811,disease.separate_past_exposures,258.0,0.0,258.0,0.0,False

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Global evaluation column name global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.976 +/- 0.010 (in 3 folds),0.976 +/- 0.010 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.857 +/- 0.097 (in 3 folds),0.504 +/- 0.439 (in 3 folds),0.857,0.591,disease.separate_past_exposures,258,0,258,0.0,False
ridge_cv.decision_thresholds_tuned,0.976 +/- 0.010 (in 3 folds),0.976 +/- 0.010 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.930 +/- 0.017 (in 3 folds),0.873 +/- 0.109 (in 3 folds),0.550 +/- 0.477 (in 3 folds),0.872,0.635,disease.separate_past_exposures,258,0,258,0.0,False
lasso_multiclass,0.976 +/- 0.002 (in 3 folds),0.976 +/- 0.002 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.791 +/- 0.015 (in 3 folds),0.919,0.788,disease.separate_past_exposures,258,0,258,0.0,False
lasso_multiclass.decision_thresholds_tuned,0.976 +/- 0.002 (in 3 folds),0.976 +/- 0.002 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.927 +/- 0.008 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.788 +/- 0.017 (in 3 folds),0.919,0.779,disease.separate_past_exposures,258,0,258,0.0,False
lasso_cv,0.975 +/- 0.004 (in 3 folds),0.975 +/- 0.004 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.774 +/- 0.035 (in 3 folds),0.919,0.774,disease.separate_past_exposures,258,0,258,0.0,False
lasso_cv.decision_thresholds_tuned,0.975 +/- 0.004 (in 3 folds),0.975 +/- 0.004 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.924 +/- 0.026 (in 3 folds),0.915 +/- 0.006 (in 3 folds),0.777 +/- 0.009 (in 3 folds),0.915,0.769,disease.separate_past_exposures,258,0,258,0.0,False
elasticnet_cv,0.975 +/- 0.003 (in 3 folds),0.975 +/- 0.003 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.888 +/- 0.023 (in 3 folds),0.683 +/- 0.065 (in 3 folds),0.888,0.682,disease.separate_past_exposures,258,0,258,0.0,False
elasticnet_cv.decision_thresholds_tuned,0.975 +/- 0.003 (in 3 folds),0.975 +/- 0.003 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.923 +/- 0.024 (in 3 folds),0.919 +/- 0.011 (in 3 folds),0.796 +/- 0.018 (in 3 folds),0.919,0.791,disease.separate_past_exposures,258,0,258,0.0,False
linearsvm_ovr,0.966 +/- 0.012 (in 3 folds),0.966 +/- 0.012 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.926 +/- 0.008 (in 3 folds),0.802 +/- 0.025 (in 3 folds),0.926,0.8,disease.separate_past_exposures,258,0,258,0.0,False
linearsvm_ovr.decision_thresholds_tuned,0.966 +/- 0.012 (in 3 folds),0.966 +/- 0.012 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.914 +/- 0.018 (in 3 folds),0.930 +/- 0.011 (in 3 folds),0.818 +/- 0.018 (in 3 folds),0.93,0.811,disease.separate_past_exposures,258,0,258,0.0,False


ridge_cv,ridge_cv.decision_thresholds_tuned,lasso_multiclass,lasso_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.976 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.976 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.930 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.017 (in 3 folds) Accuracy: 0.857 +/- 0.097 (in 3 folds) MCC: 0.504 +/- 0.439 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.857 MCC: 0.591 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.84 0.99 0.91 194  Lupus 0.97 0.44 0.60 64  accuracy 0.86 258  macro avg 0.90 0.72 0.76 258  weighted avg 0.87 0.86 0.84 258,Per-fold scores: ROC-AUC (weighted OvO): 0.976 +/- 0.010 (in 3 folds) ROC-AUC (macro OvO): 0.976 +/- 0.010 (in 3 folds) au-PRC (weighted OvO): 0.930 +/- 0.017 (in 3 folds) au-PRC (macro OvO): 0.930 +/- 0.017 (in 3 folds) Accuracy: 0.873 +/- 0.109 (in 3 folds) MCC: 0.550 +/- 0.477 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.872 MCC: 0.635 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.87 0.98 0.92 194  Lupus 0.90 0.55 0.68 64  accuracy 0.87 258  macro avg 0.88 0.76 0.80 258  weighted avg 0.87 0.87 0.86 258,Per-fold scores: ROC-AUC (weighted OvO): 0.976 +/- 0.002 (in 3 folds) ROC-AUC (macro OvO): 0.976 +/- 0.002 (in 3 folds) au-PRC (weighted OvO): 0.927 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.927 +/- 0.008 (in 3 folds) Accuracy: 0.919 +/- 0.011 (in 3 folds) MCC: 0.791 +/- 0.015 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.919 MCC: 0.788 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.96 0.93 0.95 194  Lupus 0.81 0.88 0.84 64  accuracy 0.92 258  macro avg 0.88 0.90 0.89 258  weighted avg 0.92 0.92 0.92 258,Per-fold scores: ROC-AUC (weighted OvO): 0.976 +/- 0.002 (in 3 folds) ROC-AUC (macro OvO): 0.976 +/- 0.002 (in 3 folds) au-PRC (weighted OvO): 0.927 +/- 0.008 (in 3 folds) au-PRC (macro OvO): 0.927 +/- 0.008 (in 3 folds) Accuracy: 0.919 +/- 0.011 (in 3 folds) MCC: 0.788 +/- 0.017 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.919 MCC: 0.779 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.94 0.95 0.95 194  Lupus 0.85 0.81 0.83 64  accuracy 0.92 258  macro avg 0.90 0.88 0.89 258  weighted avg 0.92 0.92 0.92 258
,,,
,,,


lasso_cv,lasso_cv.decision_thresholds_tuned,elasticnet_cv,elasticnet_cv.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.975 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.975 +/- 0.004 (in 3 folds) au-PRC (weighted OvO): 0.924 +/- 0.026 (in 3 folds) au-PRC (macro OvO): 0.924 +/- 0.026 (in 3 folds) Accuracy: 0.919 +/- 0.011 (in 3 folds) MCC: 0.774 +/- 0.035 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.919 MCC: 0.774 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.92 0.97 0.95 194  Lupus 0.91 0.75 0.82 64  accuracy 0.92 258  macro avg 0.91 0.86 0.88 258  weighted avg 0.92 0.92 0.92 258,Per-fold scores: ROC-AUC (weighted OvO): 0.975 +/- 0.004 (in 3 folds) ROC-AUC (macro OvO): 0.975 +/- 0.004 (in 3 folds) au-PRC (weighted OvO): 0.924 +/- 0.026 (in 3 folds) au-PRC (macro OvO): 0.924 +/- 0.026 (in 3 folds) Accuracy: 0.915 +/- 0.006 (in 3 folds) MCC: 0.777 +/- 0.009 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.915 MCC: 0.769 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.94 0.95 0.94 194  Lupus 0.84 0.81 0.83 64  accuracy 0.91 258  macro avg 0.89 0.88 0.88 258  weighted avg 0.91 0.91 0.91 258,Per-fold scores: ROC-AUC (weighted OvO): 0.975 +/- 0.003 (in 3 folds) ROC-AUC (macro OvO): 0.975 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.024 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.024 (in 3 folds) Accuracy: 0.888 +/- 0.023 (in 3 folds) MCC: 0.683 +/- 0.065 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.888 MCC: 0.682 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.88 0.98 0.93 194  Lupus 0.91 0.61 0.73 64  accuracy 0.89 258  macro avg 0.90 0.79 0.83 258  weighted avg 0.89 0.89 0.88 258,Per-fold scores: ROC-AUC (weighted OvO): 0.975 +/- 0.003 (in 3 folds) ROC-AUC (macro OvO): 0.975 +/- 0.003 (in 3 folds) au-PRC (weighted OvO): 0.923 +/- 0.024 (in 3 folds) au-PRC (macro OvO): 0.923 +/- 0.024 (in 3 folds) Accuracy: 0.919 +/- 0.011 (in 3 folds) MCC: 0.796 +/- 0.018 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.919 MCC: 0.791 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.96 0.93 0.94 194  Lupus 0.80 0.89 0.84 64  accuracy 0.92 258  macro avg 0.88 0.91 0.89 258  weighted avg 0.92 0.92 0.92 258
,,,
,,,


linearsvm_ovr,linearsvm_ovr.decision_thresholds_tuned,rf_multiclass,rf_multiclass.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.966 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.966 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.914 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.914 +/- 0.018 (in 3 folds) Accuracy: 0.926 +/- 0.008 (in 3 folds) MCC: 0.802 +/- 0.025 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.926 MCC: 0.800 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.94 0.96 0.95 194  Lupus 0.87 0.83 0.85 64  accuracy 0.93 258  macro avg 0.91 0.89 0.90 258  weighted avg 0.93 0.93 0.93 258,Per-fold scores: ROC-AUC (weighted OvO): 0.966 +/- 0.012 (in 3 folds) ROC-AUC (macro OvO): 0.966 +/- 0.012 (in 3 folds) au-PRC (weighted OvO): 0.914 +/- 0.018 (in 3 folds) au-PRC (macro OvO): 0.914 +/- 0.018 (in 3 folds) Accuracy: 0.930 +/- 0.011 (in 3 folds) MCC: 0.818 +/- 0.018 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.930 MCC: 0.811 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.95 0.96 0.95 194  Lupus 0.87 0.84 0.86 64  accuracy 0.93 258  macro avg 0.91 0.90 0.91 258  weighted avg 0.93 0.93 0.93 258,Per-fold scores: ROC-AUC (weighted OvO): 0.961 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.961 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.883 +/- 0.088 (in 3 folds) au-PRC (macro OvO): 0.883 +/- 0.088 (in 3 folds) Accuracy: 0.868 +/- 0.045 (in 3 folds) MCC: 0.620 +/- 0.145 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.868 MCC: 0.625 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.86 0.99 0.92 194  Lupus 0.94 0.50 0.65 64  accuracy 0.87 258  macro avg 0.90 0.74 0.79 258  weighted avg 0.88 0.87 0.85 258,Per-fold scores: ROC-AUC (weighted OvO): 0.961 +/- 0.023 (in 3 folds) ROC-AUC (macro OvO): 0.961 +/- 0.023 (in 3 folds) au-PRC (weighted OvO): 0.883 +/- 0.088 (in 3 folds) au-PRC (macro OvO): 0.883 +/- 0.088 (in 3 folds) Accuracy: 0.880 +/- 0.036 (in 3 folds) MCC: 0.673 +/- 0.125 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.880 MCC: 0.665 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.90 0.94 0.92 194  Lupus 0.80 0.69 0.74 64  accuracy 0.88 258  macro avg 0.85 0.82 0.83 258  weighted avg 0.88 0.88 0.88 258
,,,
,,,


xgboost,xgboost.decision_thresholds_tuned
Per-fold scores: ROC-AUC (weighted OvO): 0.941 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.876 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.876 +/- 0.015 (in 3 folds) Accuracy: 0.892 +/- 0.006 (in 3 folds) MCC: 0.698 +/- 0.016 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.891 MCC: 0.695 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.90 0.96 0.93 194  Lupus 0.86 0.67 0.75 64  accuracy 0.89 258  macro avg 0.88 0.82 0.84 258  weighted avg 0.89 0.89 0.89 258,Per-fold scores: ROC-AUC (weighted OvO): 0.941 +/- 0.015 (in 3 folds) ROC-AUC (macro OvO): 0.941 +/- 0.015 (in 3 folds) au-PRC (weighted OvO): 0.876 +/- 0.015 (in 3 folds) au-PRC (macro OvO): 0.876 +/- 0.015 (in 3 folds) Accuracy: 0.861 +/- 0.033 (in 3 folds) MCC: 0.621 +/- 0.088 (in 3 folds) Global scores using column name disease.separate_past_exposures: Accuracy: 0.860 MCC: 0.612 Global evaluation column name: disease.separate_past_exposures Global classification report using column name disease.separate_past_exposures:  precision recall f1-score support Healthy/Background 0.89 0.93 0.91 194  Lupus 0.75 0.66 0.70 64  accuracy 0.86 258  macro avg 0.82 0.79 0.80 258  weighted avg 0.86 0.86 0.86 258
,
,


---