In [1]:
from IPython.display import display, Markdown
from malid import config, logger
from malid.external import model_evaluation
from malid.trained_model_wrappers import SequenceClassifier
from malid.datamodels import (
    TargetObsColumnEnum,
    SampleWeightStrategy,
    combine_classification_option_names,
)

sample_weight_strategy = SampleWeightStrategy.ISOTYPE_USAGE

In [2]:
for gene_locus in config.gene_loci_used:
    for target_obs_column in config.classification_targets:
        models_base_dir = SequenceClassifier._get_model_base_dir(
            gene_locus=gene_locus,
            target_obs_column=target_obs_column,
            sample_weight_strategy=sample_weight_strategy,
        )  # should already exist
        model_location = models_base_dir / "train_smaller_model"

        output_dir = (
            config.paths.sequence_models_output_dir
            / gene_locus.name
            / combine_classification_option_names(
                target_obs_column=target_obs_column,
                sample_weight_strategy=sample_weight_strategy,
            )
        )  # might not yet exist
        output_dir.mkdir(exist_ok=True, parents=True)  # create if needed

        try:
            logger.info(
                f"{gene_locus}, {target_obs_column}, {sample_weight_strategy} -> {model_location} -> {output_dir}"
            )

            ## Load and summarize
            experiment_set = model_evaluation.ExperimentSet.load_from_disk(
                output_prefix=model_location
            )

            # Remove global fold (we trained global fold model, but now get evaluation scores on cross-validation folds only)
            # TODO: make kdict support: del self.model_outputs[:, fold_id]
            for key in experiment_set.model_outputs[:, -1].keys():
                logger.debug(f"Removing {key} (global fold)")
                del experiment_set.model_outputs[key]

            experiment_set_global_performance = experiment_set.summarize()
            experiment_set_global_performance.export_all_models(
                func_generate_classification_report_fname=lambda model_name: output_dir
                / f"sequence_model.results_on_validation_set.{model_name}.classification_report.txt",
                func_generate_confusion_matrix_fname=lambda model_name: output_dir
                / f"sequence_model.results_on_validation_set.{model_name}.confusion_matrix.png",
                dpi=72,
            )
            combined_stats = (
                experiment_set_global_performance.get_model_comparison_stats()
            )
            combined_stats.to_csv(
                output_dir
                / "sequence_model.results_on_validation_set.compare_model_scores.tsv",
                sep="\t",
            )

            # Display
            display(
                Markdown(
                    f"# {gene_locus}, {target_obs_column}, {sample_weight_strategy}"
                )
            )
            display(combined_stats)

        except Exception as err:
            logger.exception(
                f"Failed to analyze {gene_locus}, {target_obs_column}, {sample_weight_strategy}: {err}"
            )

        print("*" * 80)

2023-01-03 19:43:15,050 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.disease, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/disease_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/disease_sample_weight_strategy_ISOTYPE_USAGE


# GeneLocus.BCR, TargetObsColumnEnum.disease, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.598 +/- 0.013 (in 3 folds),0.606 +/- 0.016 (in 3 folds),0.574 +/- 0.009 (in 3 folds),0.582 +/- 0.013 (in 3 folds),0.312 +/- 0.011 (in 3 folds),0.350 +/- 0.011 (in 3 folds),0.093 +/- 0.008 (in 3 folds),0.121 +/- 0.013 (in 3 folds),0.313,0.35,0.094,0.122,9546430,0,9546430,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.354 +/- 0.014 (in 3 folds),0.354 +/- 0.014 (in 3 folds),0.000 +/- 0.000 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),0.354,0.354,0.0,0.0,9546430,0,9546430,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.504 +/- 0.027 (in 3 folds),0.504 +/- 0.027 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.504,0.504,0.0,0.0,9546430,0,9546430,0.0,True


2023-01-03 20:04:54,015 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/disease_all_demographics_present_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/disease_all_demographics_present_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.600 +/- 0.018 (in 3 folds),0.609 +/- 0.021 (in 3 folds),0.573 +/- 0.011 (in 3 folds),0.580 +/- 0.015 (in 3 folds),0.311 +/- 0.015 (in 3 folds),0.350 +/- 0.010 (in 3 folds),0.094 +/- 0.012 (in 3 folds),0.123 +/- 0.013 (in 3 folds),0.311,0.35,0.095,0.124,8491527,0,8491527,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.365 +/- 0.020 (in 3 folds),0.365 +/- 0.020 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.364,0.364,0.0,0.0,8491527,0,8491527,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.518 +/- 0.040 (in 3 folds),0.518 +/- 0.040 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.517,0.517,0.0,0.0,8491527,0,8491527,0.0,True


2023-01-03 20:24:17,366 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/covid_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/covid_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.589 +/- 0.039 (in 3 folds),0.589 +/- 0.039 (in 3 folds),0.902 +/- 0.020 (in 3 folds),0.902 +/- 0.020 (in 3 folds),0.578 +/- 0.022 (in 3 folds),0.650 +/- 0.010 (in 3 folds),0.082 +/- 0.039 (in 3 folds),0.140 +/- 0.048 (in 3 folds),0.579,0.65,0.086,0.143,5471893,0,5471893,0.0,False
dummy_stratified,0.500 +/- 0.001 (in 3 folds),0.500 +/- 0.001 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.796 +/- 0.024 (in 3 folds),0.796 +/- 0.024 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),-0.001 +/- 0.000 (in 3 folds),0.796,0.796,0.001,0.0,5471893,0,5471893,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.880 +/- 0.024 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.879,0.879,0.0,0.0,5471893,0,5471893,0.0,True


2023-01-03 20:41:24,033 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/hiv_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/hiv_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.653 +/- 0.002 (in 3 folds),0.653 +/- 0.002 (in 3 folds),0.834 +/- 0.032 (in 3 folds),0.834 +/- 0.032 (in 3 folds),0.619 +/- 0.003 (in 3 folds),0.610 +/- 0.002 (in 3 folds),0.187 +/- 0.011 (in 3 folds),0.172 +/- 0.011 (in 3 folds),0.619,0.61,0.188,0.173,6477673,0,6477673,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.605 +/- 0.022 (in 3 folds),0.605 +/- 0.022 (in 3 folds),0.000 +/- 0.001 (in 3 folds),0.001 +/- 0.001 (in 3 folds),0.605,0.606,0.0,0.0,6477673,0,6477673,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.742 +/- 0.043 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.743,0.743,0.0,0.0,6477673,0,6477673,0.0,True


2023-01-03 21:04:36,060 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/lupus_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/lupus_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.602 +/- 0.014 (in 3 folds),0.602 +/- 0.014 (in 3 folds),0.462 +/- 0.055 (in 3 folds),0.462 +/- 0.055 (in 3 folds),0.553 +/- 0.009 (in 3 folds),0.603 +/- 0.008 (in 3 folds),0.119 +/- 0.011 (in 3 folds),0.185 +/- 0.025 (in 3 folds),0.552,0.603,0.121,0.187,7218556,0,7218556,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.333 +/- 0.024 (in 3 folds),0.333 +/- 0.024 (in 3 folds),0.565 +/- 0.012 (in 3 folds),0.565 +/- 0.012 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),-0.001 +/- 0.001 (in 3 folds),0.564,0.564,-0.0,-0.001,7218556,0,7218556,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.333 +/- 0.024 (in 3 folds),0.333 +/- 0.024 (in 3 folds),0.667 +/- 0.024 (in 3 folds),0.667 +/- 0.024 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.666,0.666,0.0,0.0,7218556,0,7218556,0.0,True


2023-01-03 21:33:55,838 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/ethnicity_condensed_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/ethnicity_condensed_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.560 +/- 0.010 (in 3 folds),0.571 +/- 0.016 (in 3 folds),0.551 +/- 0.011 (in 3 folds),0.559 +/- 0.013 (in 3 folds),0.271 +/- 0.023 (in 3 folds),0.272 +/- 0.018 (in 3 folds),0.050 +/- 0.007 (in 3 folds),0.043 +/- 0.008 (in 3 folds),0.271,0.273,0.046,0.04,4394190,0,4394190,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.388 +/- 0.064 (in 3 folds),0.387 +/- 0.064 (in 3 folds),0.000 +/- 0.000 (in 3 folds),-0.001 +/- 0.000 (in 3 folds),0.386,0.385,-0.009,-0.009,4394190,0,4394190,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.485 +/- 0.137 (in 3 folds),0.485 +/- 0.137 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.483,0.483,0.0,0.0,4394190,0,4394190,0.0,True


2023-01-03 21:49:30,948 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/age_group_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/age_group_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************










































































# GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.506 +/- 0.013 (in 3 folds),0.507 +/- 0.010 (in 3 folds),0.507 +/- 0.009 (in 3 folds),0.507 +/- 0.007 (in 3 folds),0.148 +/- 0.018 (in 3 folds),0.151 +/- 0.015 (in 3 folds),0.004 +/- 0.012 (in 3 folds),0.004 +/- 0.011 (in 3 folds),0.147,0.15,0.006,0.007,4394190,0,4394190,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.154 +/- 0.004 (in 3 folds),0.154 +/- 0.004 (in 3 folds),-0.000 +/- 0.000 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),0.154,0.154,-0.014,-0.014,4394190,0,4394190,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.160 +/- 0.013 (in 3 folds),0.160 +/- 0.013 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.16,0.16,-0.065,-0.065,4394190,0,4394190,0.0,True


2023-01-03 22:06:18,326 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/age_group_binary_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/age_group_binary_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.504 +/- 0.003 (in 3 folds),0.504 +/- 0.003 (in 3 folds),0.612 +/- 0.080 (in 3 folds),0.612 +/- 0.080 (in 3 folds),0.502 +/- 0.002 (in 3 folds),0.507 +/- 0.004 (in 3 folds),0.005 +/- 0.005 (in 3 folds),0.008 +/- 0.004 (in 3 folds),0.502,0.507,0.004,0.008,4394190,0,4394190,0.0,False
dummy_stratified,0.500 +/- 0.001 (in 3 folds),0.500 +/- 0.001 (in 3 folds),0.604 +/- 0.081 (in 3 folds),0.604 +/- 0.081 (in 3 folds),0.510 +/- 0.005 (in 3 folds),0.509 +/- 0.004 (in 3 folds),0.001 +/- 0.001 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),0.51,0.509,-0.008,-0.009,4394190,0,4394190,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.604 +/- 0.080 (in 3 folds),0.604 +/- 0.080 (in 3 folds),0.604 +/- 0.080 (in 3 folds),0.604 +/- 0.080 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.6,0.6,0.0,0.0,4394190,0,4394190,0.0,True


2023-01-03 22:27:51,496 - analyze_sequence_model.ipynb - INFO - GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/BCR/sex_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/BCR/sex_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,Accuracy with sample weights per fold,MCC per fold,MCC with sample weights per fold,Accuracy global,Accuracy with sample weights global,MCC global,MCC with sample weights global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.497 +/- 0.023 (in 3 folds),0.497 +/- 0.023 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),-0.000 +/- 0.002 (in 3 folds),0.498,0.498,-0.019,-0.019,4394190,0,4394190,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.541 +/- 0.180 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.549,0.549,0.0,0.0,4394190,0,4394190,0.0,True
lasso_multiclass,0.497 +/- 0.012 (in 3 folds),0.497 +/- 0.012 (in 3 folds),0.538 +/- 0.187 (in 3 folds),0.538 +/- 0.187 (in 3 folds),0.495 +/- 0.006 (in 3 folds),0.494 +/- 0.005 (in 3 folds),-0.005 +/- 0.017 (in 3 folds),-0.007 +/- 0.014 (in 3 folds),0.495,0.495,-0.008,-0.01,4394190,0,4394190,0.0,False


2023-01-03 22:46:40,090 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.disease, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/disease_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/disease_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.disease, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.575 +/- 0.009 (in 3 folds),0.575 +/- 0.009 (in 3 folds),0.547 +/- 0.005 (in 3 folds),0.548 +/- 0.006 (in 3 folds),0.294 +/- 0.003 (in 3 folds),0.082 +/- 0.009 (in 3 folds),0.294,0.082,11942998,0,11942998,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.430 +/- 0.004 (in 3 folds),0.001 +/- 0.000 (in 3 folds),0.43,0.0,11942998,0,11942998,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.612 +/- 0.015 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.612,0.0,11942998,0,11942998,0.0,True


2023-01-03 23:27:08,622 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/disease_all_demographics_present_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/disease_all_demographics_present_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.579 +/- 0.011 (in 3 folds),0.578 +/- 0.010 (in 3 folds),0.549 +/- 0.007 (in 3 folds),0.550 +/- 0.007 (in 3 folds),0.317 +/- 0.009 (in 3 folds),0.090 +/- 0.012 (in 3 folds),0.317,0.091,10547971,0,10547971,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.424 +/- 0.008 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.424,-0.001,10547971,0,10547971,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.614 +/- 0.028 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.614,0.0,10547971,0,10547971,0.0,True


2023-01-04 00:03:41,349 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/covid_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/covid_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.566 +/- 0.018 (in 3 folds),0.566 +/- 0.018 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.544 +/- 0.015 (in 3 folds),0.046 +/- 0.014 (in 3 folds),0.544,0.047,7820089,0,7820089,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.874 +/- 0.005 (in 3 folds),-0.000 +/- 0.000 (in 3 folds),0.874,-0.001,7820089,0,7820089,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.934,0.0,7820089,0,7820089,0.0,True


2023-01-04 00:33:25,737 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/hiv_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/hiv_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.611 +/- 0.010 (in 3 folds),0.611 +/- 0.010 (in 3 folds),0.849 +/- 0.014 (in 3 folds),0.849 +/- 0.014 (in 3 folds),0.547 +/- 0.011 (in 3 folds),0.128 +/- 0.013 (in 3 folds),0.548,0.128,9292412,0,9292412,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.787 +/- 0.019 (in 3 folds),0.787 +/- 0.019 (in 3 folds),0.664 +/- 0.009 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.663,-0.0,9292412,0,9292412,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.787 +/- 0.019 (in 3 folds),0.787 +/- 0.019 (in 3 folds),0.787 +/- 0.019 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.786,0.0,9292412,0,9292412,0.0,True


2023-01-04 01:07:57,563 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/lupus_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/lupus_vs_healthy_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.590 +/- 0.014 (in 3 folds),0.590 +/- 0.014 (in 3 folds),0.289 +/- 0.013 (in 3 folds),0.289 +/- 0.013 (in 3 folds),0.556 +/- 0.012 (in 3 folds),0.103 +/- 0.016 (in 3 folds),0.556,0.103,9443733,0,9443733,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.226 +/- 0.000 (in 3 folds),0.226 +/- 0.000 (in 3 folds),0.640 +/- 0.010 (in 3 folds),-0.000 +/- 0.000 (in 3 folds),0.64,-0.0,9443733,0,9443733,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.226 +/- 0.001 (in 3 folds),0.226 +/- 0.001 (in 3 folds),0.774 +/- 0.001 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.774,0.0,9443733,0,9443733,0.0,True


2023-01-04 01:44:47,504 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/ethnicity_condensed_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/ethnicity_condensed_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.559 +/- 0.005 (in 3 folds),0.574 +/- 0.001 (in 3 folds),0.538 +/- 0.007 (in 3 folds),0.552 +/- 0.001 (in 3 folds),0.212 +/- 0.030 (in 3 folds),0.046 +/- 0.009 (in 3 folds),0.211,0.049,6474391,0,6474391,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.440 +/- 0.052 (in 3 folds),-0.000 +/- 0.000 (in 3 folds),0.44,-0.005,6474391,0,6474391,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.564 +/- 0.117 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.567,0.0,6474391,0,6474391,0.0,True


2023-01-04 02:01:43,727 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/age_group_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/age_group_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


















































# GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.546 +/- 0.004 (in 3 folds),0.541 +/- 0.006 (in 3 folds),0.537 +/- 0.003 (in 3 folds),0.533 +/- 0.004 (in 3 folds),0.213 +/- 0.022 (in 3 folds),0.045 +/- 0.008 (in 3 folds),0.213,0.056,6474391,0,6474391,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.183 +/- 0.012 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.182,-0.006,6474391,0,6474391,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.272 +/- 0.042 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.274,0.0,6474391,0,6474391,0.0,True


2023-01-04 02:17:46,679 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/age_group_binary_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/age_group_binary_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.550 +/- 0.005 (in 3 folds),0.550 +/- 0.005 (in 3 folds),0.711 +/- 0.059 (in 3 folds),0.711 +/- 0.059 (in 3 folds),0.521 +/- 0.006 (in 3 folds),0.066 +/- 0.004 (in 3 folds),0.521,0.065,6474391,0,6474391,0.0,False
dummy_stratified,0.500 +/- 0.001 (in 3 folds),0.500 +/- 0.001 (in 3 folds),0.667 +/- 0.064 (in 3 folds),0.667 +/- 0.064 (in 3 folds),0.566 +/- 0.017 (in 3 folds),0.001 +/- 0.001 (in 3 folds),0.565,-0.004,6474391,0,6474391,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.666 +/- 0.064 (in 3 folds),0.666 +/- 0.064 (in 3 folds),0.666 +/- 0.064 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.664,0.0,6474391,0,6474391,0.0,True


2023-01-04 02:38:38,878 - analyze_sequence_model.ipynb - INFO - GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE -> /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/sequence_models/TCR/sex_healthy_only_sample_weight_strategy_ISOTYPE_USAGE/train_smaller_model -> /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/sequence_models/TCR/sex_healthy_only_sample_weight_strategy_ISOTYPE_USAGE


********************************************************************************


# GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only, SampleWeightStrategy.ISOTYPE_USAGE

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.510 +/- 0.010 (in 3 folds),0.510 +/- 0.010 (in 3 folds),0.586 +/- 0.130 (in 3 folds),0.586 +/- 0.130 (in 3 folds),0.509 +/- 0.006 (in 3 folds),0.014 +/- 0.014 (in 3 folds),0.509,0.013,6474391,0,6474391,0.0,False
dummy_stratified,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.579 +/- 0.137 (in 3 folds),0.579 +/- 0.137 (in 3 folds),0.506 +/- 0.014 (in 3 folds),-0.000 +/- 0.001 (in 3 folds),0.506,-0.017,6474391,0,6474391,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.579 +/- 0.137 (in 3 folds),0.579 +/- 0.137 (in 3 folds),0.579 +/- 0.137 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.577,0.0,6474391,0,6474391,0.0,True


********************************************************************************
