# Analyze metamodel performance on test set, with abstention

> Train patient-level rollup model using existing base models trained on train-smaller set.


In [1]:
from pathlib import Path
from typing import Dict, Generator, List, Optional, Tuple, Union
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

%matplotlib inline
import seaborn as sns
import genetools
from IPython.display import display, Markdown

In [2]:
from malid import config, logger
from malid.external.glmnet_wrapper import GlmnetLogitNetWrapper
from malid.train import train_metamodel
from malid.external import model_evaluation
from malid.datamodels import (
    TargetObsColumnEnum,
    GeneLocus,
)
from malid.trained_model_wrappers import BlendingMetamodel
from malid.external.genetools_plots import (
    plot_mean_and_standard_deviation_heatmap,
)

In [3]:
def _plot_feature_importances(
    plot_df: pd.DataFrame, model_name: str, xlabel: str, xmin_at_zero: bool
):
    """plot feature importances for binary/multiclass random forest or binary linear model,
    where we have one model across all classes, rather than OvR multiclass model"""
    fig, ax = plt.subplots(figsize=(4, plot_df.shape[1] / 2.5))

    try:
        # Convert any metamodel feature names to friendly names,
        # if they have not already been renamed to friendly names when grouping/summing subsets.
        plot_df = plot_df.rename(
            columns=lambda feature_name: BlendingMetamodel.convert_feature_name_to_friendly_name(
                feature_name
            )
        )

        if plot_df.shape[0] == 1:
            # Special case: single entry. Show scatter plot instead of box plot.
            ax.scatter(plot_df.iloc[0].values, plot_df.iloc[0].index)
            # Make spacing and y-axis order similar to default boxplot
            buffer = 0.5
            ax.set_ylim(-buffer, plot_df.shape[1] - 1 + buffer)
            ax.invert_yaxis()
        else:
            # Default: boxplot
            sns.boxplot(data=plot_df, orient="h", ax=ax)

        plt.title(
            f"{model_name} ({plot_df.shape[0]} fold{'s'[:plot_df.shape[0] != 1]})"
        )
        plt.xlabel(xlabel)
        if xmin_at_zero:
            plt.xlim(
                0,
            )
        return fig
    except Exception as err:
        # close figure just in case, some Jupyter does not try to display a broken figure
        plt.close(fig)
        # reraise
        raise err


def _sum_subsets_of_feature_importances(
    df: pd.DataFrame,
    subset_names: Optional[Dict[str, str]],
    drop_empty_subsets: bool = True,
):
    """Sum up feature importances by subsets.
    Subset_names is a dict mapping friendly_subset_name to regex to match columns (we match with "contains" operation).
    Drop_empty_subsets is whether to drop empty subsets (i.e. where no columns match the regex).
    Pass through as-is without summing if subset_names is not provided
    """
    if subset_names is not None:
        # get relevant columns for each subset
        sum_parts = {
            name: df.loc[:, df.columns.str.contains(regex)]
            for name, regex in subset_names.items()
        }

        if drop_empty_subsets:
            # drop subsets where no columns have matched
            sum_parts = {
                name: df_part
                for name, df_part in sum_parts.items()
                if not df_part.empty
            }

        if len(sum_parts) == 0:
            raise ValueError(
                f"Subset names {subset_names} not found in df columns {df.columns}"
            )

        # do the sums
        return pd.DataFrame.from_dict(
            {name: df_part.sum(axis=1) for name, df_part in sum_parts.items()},
            orient="columns",
        )

    # pass through as-is without summing if subset_names is not provided
    return df


def get_feature_importance_subsets_to_plot(
    gene_locus: GeneLocus,
) -> Dict[str, Union[Dict[str, str], None]]:
    model_component_names = [
        ("repertoire_stats", "Repertoire composition"),
        ("convergent_cluster_model", "CDR3 clustering"),
        ("sequence_model", "Language model"),
    ]
    demographics_include = {"Demographics": "^demographics"}
    interactions_include = {
        "Sequence x Demographic feature interactions": "^interaction"
    }
    return {
        "all": None,
        "by_locus": {
            f"{gene_locus_part.name}": f"^{gene_locus_part.name}:*"
            for gene_locus_part in gene_locus
        }
        | demographics_include
        | interactions_include,
        "by_model_component": {
            # Don't match if starts with interaction
            # i.e. match "BCR:sequence_model:Covid19" but not "interaction|BCR:sequence_model:Covid19|demographics:age".
            f"{model_component_friendly_name}": f"^(?:(?!interaction).)*{model_component_name}"
            for model_component_name, model_component_friendly_name in model_component_names
        }
        | demographics_include
        | interactions_include,
        "by_locus_and_model_component": {
            f"{model_component_friendly_name} ({gene_locus_part.name})": f"^{gene_locus_part.name}:{model_component_name}:*"
            for model_component_name, model_component_friendly_name in model_component_names
            for gene_locus_part in gene_locus
        }
        | demographics_include
        | interactions_include,
    }


def plot_multiclass_feature_importances(
    model_name: str,
    raw_coefs_mean: pd.DataFrame,
    raw_coefs_std: Optional[pd.DataFrame],
    gene_locus: GeneLocus,
    target_obs_column: TargetObsColumnEnum,
    metamodel_flavor: str,
    n_folds: int,
) -> Generator[Tuple[str, plt.Figure], None, None]:
    ## We will plot raw coefs, and also use absvals so we can combine features

    def _sort_plot_features(features_df: pd.DataFrame) -> pd.DataFrame:
        # Arrange feature columns in desired order:
        # 1. BCR : model1 : Covid19
        # 2. BCR : model2 : Covid19
        # 3. BCR : model3 : Covid19
        # 4. TCR : model1 : Covid19
        # 5. TCR : model2 : Covid19
        # 6. TCR : model3 : Covid19
        # 7. BCR : model1 : HIV
        # and so on
        column_order = features_df.columns.to_series().str.split(":", expand=True)
        if column_order.shape[1] >= 3:
            # this is true for the above examples
            column_order = column_order.sort_values([2, 0])
        else:
            # the demographics-only metamodel flavor has feature names with only one single colon
            column_order = column_order.sort_values([0])
        return features_df[column_order.index]

    raw_coefs_mean = _sort_plot_features(raw_coefs_mean)
    if raw_coefs_std is not None:
        raw_coefs_std = _sort_plot_features(raw_coefs_std)

    diverging_color_cmap = "RdBu_r"
    # Cut cmap by 15% from both sides, so that we don't have dark blue and dark red at the extremes, which are hard to distinguish
    # https://stackoverflow.com/a/18926541/130164
    diverging_color_cmap = matplotlib.cm.get_cmap(name=diverging_color_cmap)
    diverging_color_cmap = diverging_color_cmap.from_list(
        name=f"{diverging_color_cmap.name}_truncated",
        colors=diverging_color_cmap(np.linspace(0.15, 0.85, 256)),
        N=256,
    )

    def _plot(
        features_df: pd.DataFrame,
        label: str,
        cmap_diverging: bool,
        require_sum_to_1: bool,
        make_percentage: bool = False,
    ):
        # autosize
        figsize = (features_df.shape[0] * 1.0, features_df.shape[1] / 2.5)

        if require_sum_to_1 and not np.allclose(features_df.sum(axis=1), 1):
            raise ValueError("Sum of feature importances is not 1")
        if make_percentage:
            # Turn fractions into percentages
            if not require_sum_to_1:
                raise ValueError("make_percentage requires require_sum_to_1")
            features_df = features_df * 100

        # Convert any metamodel feature names to friendly names,
        # if they have not already been renamed to friendly names when grouping/summing subsets.
        features_df = features_df.rename(
            columns=lambda feature_name: BlendingMetamodel.convert_feature_name_to_friendly_name(
                feature_name
            )
        )

        fig, ax = plt.subplots(figsize=figsize)

        try:
            # Create dedicated colorbar axis
            colorbar_ax = inset_axes(
                ax,
                width="80%",  # relative unit
                height=0.25,  # in inches
                loc="lower center",
                borderpad=-5,  # create space
            )
            sns.heatmap(
                # plot transpose, so features are on y-axis
                features_df.T,
                center=0 if cmap_diverging else None,
                linewidths=0.5,
                cmap=diverging_color_cmap
                if cmap_diverging
                else "Blues",  # sns.color_palette("vlag", as_cmap=True) is another good diverging
                ax=ax,
                # Put colorbar on bottom
                cbar_kws={"label": label, "orientation": "horizontal"},
                cbar_ax=colorbar_ax,
                # plot all tick labels
                xticklabels=True,
                yticklabels=True,
            )

            # Adjust tick labels
            ax.set_xticklabels(ax.get_xticklabels(), rotation=0)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
            genetools.plots.wrap_tick_labels(
                ax=ax, wrap_x_axis=True, wrap_y_axis=False, wrap_amount=12
            )

            # set global "current axes" back to main axes,
            # so that any calls like plt.title target main ax rather than colorbar_ax
            plt.sca(ax)
            return fig, ax
        except Exception as err:
            # close figure just in case, some Jupyter does not try to display a broken figure
            plt.close(fig)
            # reraise
            raise err

    # Plot mean
    try:
        fig, ax = _plot(
            _sort_plot_features(raw_coefs_mean),
            label="Coefficient mean",
            cmap_diverging=True,
            require_sum_to_1=False,
        )
        ax.set_title(
            f"Feature coefficients, each class versus the rest (mean over {n_folds} folds)"
        )
        yield (f"raw_coefs.mean", fig)
    except Exception as err:
        logger.warning(
            f"Failed to plot {model_name}, {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor} multiclass raw_coefs.mean with error: {err}"
        )

    if raw_coefs_std is not None:
        # Plot std
        try:
            fig, ax = _plot(
                _sort_plot_features(raw_coefs_std),
                label="Coefficient stdev",
                cmap_diverging=False,
                require_sum_to_1=False,
            )
            ax.set_title(
                f"Feature coefficients, each class versus the rest (stdev over {n_folds} folds)"
            )
            yield (f"raw_coefs.stdev", fig)
        except Exception as err:
            logger.warning(
                f"Failed to plot {model_name}, {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor} multiclass raw_coefs.stdev with error: {err}"
            )

        # Plot mean and standard deviation together
        combined = pd.merge(
            raw_coefs_mean.rename_axis(index="class")
            .reset_index()
            .melt(
                id_vars=["class"],
                value_vars=raw_coefs_mean.columns,
                var_name="feature",
                value_name="mean",
            ),
            raw_coefs_std.rename_axis(index="class")
            .reset_index()
            .melt(
                id_vars=["class"],
                value_vars=raw_coefs_std.columns,
                var_name="feature",
                value_name="stdev",
            ),
            on=["class", "feature"],
            how="inner",
            validate="1:1",
        )
        # Convert raw metamodel feature names to friendly names
        combined["feature"] = combined["feature"].apply(
            BlendingMetamodel.convert_feature_name_to_friendly_name
        )

        try:
            fig, ax = plot_mean_and_standard_deviation_heatmap(
                data=combined,
                x_axis_key="class",
                y_axis_key="feature",
                mean_key="mean",
                standard_deviation_key="stdev",
                color_cmap=diverging_color_cmap,
                color_vcenter=0,
            )
            # TODO: make hierarchical y-axis labels (https://stackoverflow.com/questions/19184484/how-to-add-group-labels-for-bar-charts, https://stackoverflow.com/questions/37934242/hierarchical-axis-labeling-in-matplotlib-python)
            ax.set_title(
                f"Feature coefficients, each class versus the rest (over {n_folds} folds)"
            )
            yield (f"raw_coefs", fig)
        except Exception as err:
            logger.warning(
                f"Failed to plot {model_name}, {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor} multiclass raw_coefs (mean+stdev together) with error: {err}"
            )

    ## Report aggregate feature importance of several features in a linear model
    # e.g. I'd like to say something about how much all the language model features contribute to the metamodel, vs all the CDR3 clustering features.
    # I believe you can [sum feature importances](https://stats.stackexchange.com/questions/311488/summing-feature-importance-in-scikit-learn-for-a-set-of-features) for a set of features in a random forest.
    # for a linear model, I suppose I could take the absolute value of the coefs and sum them for something like "overall effect strength from this set of features".

    # Convert to absolute value, and divide by the sum of absolute values of all coefficients for "percent contribution"
    normalized_coefs = genetools.stats.normalize_rows(np.abs(raw_coefs_mean))
    for fig_name, subset_names in get_feature_importance_subsets_to_plot(
        gene_locus
    ).items():
        # sum up by origin of feature importances and replot.
        try:
            logger.debug(f"{model_name} absval_coefs {fig_name} across folds")
            fig, ax = _plot(
                _sum_subsets_of_feature_importances(
                    df=normalized_coefs, subset_names=subset_names
                ),
                label="Percent contribution",  # "Coefficient absval, percent contribution",
                cmap_diverging=False,
                require_sum_to_1=True,
                make_percentage=True,
            )
            plt.title(
                f"{model_name} feature percent contributions\neach class versus the rest\n(averaged over {n_folds} folds)"
            )
            yield (f"absval_coefs.{fig_name}", fig)
        except Exception as err:
            # Skip broken figures
            # One possible cause is that the feature names for this metamodel flavor don't correspond to what get_feature_importance_subsets_to_plot() is producing.
            logger.warning(
                f"Failed to plot {model_name}, {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor} feature percent contributions for figure name absval_coefs.{fig_name}, subset names {subset_names}: {err}"
            )


def analyze_feature_importances(
    model_name: str,
    model_global_performance: model_evaluation.ModelGlobalPerformance,
    gene_locus: GeneLocus,
    target_obs_column: TargetObsColumnEnum,
    metamodel_flavor: str,
    highres_results_output_prefix: Path,
    global_fold_classifier: Optional[BlendingMetamodel],
):
    """Get and analyze feature importances."""
    # First, check if model is binary in each fold
    is_binary = all(
        len(per_fold_output.class_names) == 2
        for per_fold_output in model_global_performance.per_fold_outputs.values()
    )
    if (
        global_fold_classifier is not None
        and (len(global_fold_classifier.classes_) == 2) != is_binary
    ):
        # Sanity check
        logger.warning(
            f"Ignoring global fold classifier for {model_name} because cross validation is_binary={is_binary} does not match global fold classes count = {len(global_fold_classifier.classes_)}"
        )
        global_fold_classifier = None

    # Depending on the model type (tree vs linear model; binary vs multiclass), we will retrieve and plot feature importances differently.
    # (Tree models are always a single model across all classes, regardless of whether classification target is binary or multiclass,
    # whereas multiclass linear models may be trained separately for each class.)
    is_tree = model_name in ["rf_multiclass", "xgboost"]
    is_linear_model = model_name in [
        "linearsvm_ovr",
        "lasso_cv",
        "ridge_cv",
        "elasticnet_cv",
        "lasso_multiclass",
    ]

    if is_tree or (is_linear_model and is_binary):
        # Get feature importances for each fold
        feature_importances_cross_validation_df: Union[
            pd.DataFrame, None
        ] = model_global_performance.feature_importances
        if feature_importances_cross_validation_df is None:
            raise ValueError(f"No feature importances available for {model_name}")
        feature_importances_to_plot = [(feature_importances_cross_validation_df, "")]

        if global_fold_classifier is not None:
            global_fold_feature_importances = (
                model_evaluation._extract_feature_importances(
                    global_fold_classifier._inner
                )
            )
            global_fold_feature_names = model_evaluation._get_feature_names(
                global_fold_classifier._inner
            )
            if global_fold_feature_importances is None:
                raise ValueError(
                    f"No feature importances available for {model_name} (global fold)"
                )
            feature_importances_to_plot.append(
                (
                    pd.Series(
                        global_fold_feature_importances,
                        index=global_fold_feature_names,
                        name=-1,
                    )
                    .to_frame()
                    .T,
                    "_global_fold",
                )
            )

        # Plot feature importances.
        for (
            feature_importances,
            overall_name,
        ) in feature_importances_to_plot:
            if is_tree:
                for (
                    fig_name,
                    subset_names,
                ) in get_feature_importance_subsets_to_plot(gene_locus).items():
                    # sum up by origin of feature importances and replot.
                    try:
                        fig = _plot_feature_importances(
                            plot_df=_sum_subsets_of_feature_importances(
                                df=feature_importances,
                                subset_names=subset_names,
                            ),
                            model_name=model_name,
                            xlabel="Feature importance",
                            # Values are all positive for tree models
                            xmin_at_zero=True,
                        )
                        genetools.plots.savefig(
                            fig,
                            f"{highres_results_output_prefix}.feature_importances{overall_name}.{model_name}.{fig_name}.png",
                            dpi=300,
                        )
                        plt.close(fig)
                    except Exception as err:
                        # Skip broken figures
                        # One possible cause is that the feature names for this metamodel flavor don't correspond to what get_feature_importance_subsets_to_plot() is producing.
                        logger.warning(
                            f"Failed to plot {model_name} feature importances{overall_name} for {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor}, with figure name {fig_name} and subset names {subset_names}: {err}"
                        )

            elif is_linear_model:
                for (
                    feature_importances,
                    overall_name,
                ) in feature_importances_to_plot:
                    # TODO: Add normalization of coefficients and summing of subsets (nontrivial for linear model)
                    # For now only plot all the features - don't group by subset.
                    fig = _plot_feature_importances(
                        plot_df=feature_importances,
                        model_name=model_name,
                        xlabel="Feature coefficient",
                        # coefficients are not necessarily positive
                        xmin_at_zero=False,
                    )
                    genetools.plots.savefig(
                        fig,
                        f"{highres_results_output_prefix}.feature_importances{overall_name}.{model_name}.all.png",
                        dpi=300,
                    )
                    plt.close(fig)

    elif is_linear_model and not is_binary:
        # Many OvR models for each class vs the rest
        raw_coefs: Optional[
            Dict[int, pd.DataFrame]
        ] = model_global_performance.multiclass_feature_importances
        if raw_coefs is None:
            raise ValueError(
                f"No feature importances available for multiclass {model_name}"
            )

        ## Combine multiclass feature importances across folds:
        # The coefs are comparable across folds because the inputs to the model were standardized.

        # Create 3D array from these 2D arrays - making sure that the index and column order is the same across folds.
        first_df = next(iter(raw_coefs.values()))
        try:
            raw_coefs_data: np.ndarray = np.array(
                [df.loc[first_df.index][first_df.columns] for df in raw_coefs.values()]
            )
        except Exception as err:
            logger.warning(
                f"Could not combine feature coefficients across folds for multiclass linear model {model_name} ({gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor}), possibly because of missing classes. Skipping feature importance plots with this error: {err}"
            )
            # skip this model
            return

        # Extract mean and standard deviation, and repack in dataframe
        raw_coefs_mean: pd.DataFrame = pd.DataFrame(
            np.mean(raw_coefs_data, axis=0),
            index=first_df.index,
            columns=first_df.columns,
        )
        raw_coefs_std: pd.DataFrame = pd.DataFrame(
            np.std(raw_coefs_data, axis=0),
            index=first_df.index,
            columns=first_df.columns,
        )

        raw_coefs_mean.to_csv(
            f"{highres_results_output_prefix}.feature_importances.{model_name}.raw_coefs_mean.tsv",
            sep="\t",
        )
        raw_coefs_std.to_csv(
            f"{highres_results_output_prefix}.feature_importances.{model_name}.raw_coefs_std.tsv",
            sep="\t",
        )

        for fig_name, fig in plot_multiclass_feature_importances(
            model_name=model_name,
            raw_coefs_mean=raw_coefs_mean,
            raw_coefs_std=raw_coefs_std,
            gene_locus=gene_locus,
            target_obs_column=target_obs_column,
            metamodel_flavor=metamodel_flavor,
            n_folds=len(raw_coefs),
        ):
            fname = f"{highres_results_output_prefix}.feature_importances.{model_name}.{fig_name}.png"
            logger.debug(f"{fig_name} -> {fname}")
            genetools.plots.savefig(
                fig,
                fname,
                dpi=300,
            )
            plt.close(fig)

        if global_fold_classifier is not None:
            # Also plot global fold coefficients on their own. (We will pass them as raw_coefs_mean (without running a mean), with raw_coefs_std set to None)
            global_fold_feature_importances = (
                model_evaluation._extract_multiclass_feature_importances(
                    global_fold_classifier._inner
                )
            )
            global_fold_feature_names = model_evaluation._get_feature_names(
                global_fold_classifier._inner
            )
            if global_fold_feature_importances is None:
                raise ValueError(
                    f"No feature importances available for multiclass {model_name} (global fold)"
                )
            global_fold_feature_importances = pd.DataFrame(
                global_fold_feature_importances,
                index=global_fold_classifier.classes_,
                columns=global_fold_feature_names,
            )
            global_fold_feature_importances.to_csv(
                f"{highres_results_output_prefix}.feature_importances.{model_name}.raw_coefs.global_fold.tsv",
                sep="\t",
            )
            for (fig_name, fig,) in plot_multiclass_feature_importances(
                model_name=model_name,
                raw_coefs_mean=global_fold_feature_importances,
                raw_coefs_std=None,
                gene_locus=gene_locus,
                target_obs_column=target_obs_column,
                metamodel_flavor=metamodel_flavor,
                n_folds=1,
            ):
                fname = f"{highres_results_output_prefix}.feature_importances_global_fold.{model_name}.{fig_name}.png"
                logger.debug(f"{fig_name} -> {fname}")
                genetools.plots.savefig(
                    fig,
                    fname,
                    dpi=300,
                )
                plt.close(fig)
    else:
        logger.warning(
            f"Feature importances not plotted for {model_name}: not a recognized tree or linear model."
        )

In [4]:
def run_analysis(gene_locus: GeneLocus, target_obs_column: TargetObsColumnEnum):
    base_model_train_fold_name = "train_smaller"
    metamodel_fold_label_train = "validation"

    try:
        flavors = train_metamodel.get_metamodel_flavors(
            gene_locus=gene_locus,
            target_obs_column=target_obs_column,
            fold_id=config.all_fold_ids[0],
            base_model_train_fold_name=base_model_train_fold_name,
        )
    except Exception as err:
        logger.warning(
            f"Failed to generate metamodel flavors for {gene_locus}, {target_obs_column}: {err}"
        )
        return
    for metamodel_flavor, metamodel_config in flavors.items():
        # should already exist:
        metamodels_base_dir = BlendingMetamodel._get_metamodel_base_dir(
            gene_locus=gene_locus,
            target_obs_column=target_obs_column,
            metamodel_flavor=metamodel_flavor,
        )

        _output_suffix = (
            Path(gene_locus.name) / target_obs_column.name / metamodel_flavor
        )
        # might not exist yet:
        output_base_dir = (
            config.paths.second_stage_blending_metamodel_output_dir / _output_suffix
        )
        highres_output_base_dir = (
            config.paths.high_res_outputs_dir / "metamodel" / _output_suffix
        )
        output_base_dir.mkdir(parents=True, exist_ok=True)
        highres_output_base_dir.mkdir(parents=True, exist_ok=True)

        fname_prefix = f"{base_model_train_fold_name}_applied_to_{metamodel_fold_label_train}_model"
        model_prefix = metamodels_base_dir / fname_prefix
        results_output_prefix = output_base_dir / fname_prefix
        highres_results_output_prefix = highres_output_base_dir / fname_prefix

        try:
            # Load and summarize
            experiment_set = model_evaluation.ExperimentSet.load_from_disk(
                output_prefix=model_prefix
            )

            # Note that default y_true from BlendingMetamodel._featurize() is target_obs_column.value.blended_evaluation_column_name
            # Use DROP_INCOMPLETE_FOLDS setting because alternate classification targets might not be well-split in the small validation set of the cross-validation folds that were designed to stratify disease.
            # In the cases of some classification targets, we might need to automatically drop folds that have only a single class in the metamodel training data (i.e. in the validation set).
            experiment_set_global_performance = experiment_set.summarize(
                remove_incomplete_strategy=model_evaluation.RemoveIncompleteStrategy.DROP_INCOMPLETE_FOLDS
            )
            experiment_set_global_performance.export_all_models(
                func_generate_classification_report_fname=lambda model_name: f"{results_output_prefix}.classification_report.test_set_performance.{model_name}.txt",
                func_generate_confusion_matrix_fname=lambda model_name: f"{results_output_prefix}.confusion_matrix.test_set_performance.{model_name}.png",
                dpi=300,
            )
            combined_stats = (
                experiment_set_global_performance.get_model_comparison_stats(sort=True)
            )
            combined_stats.to_csv(
                f"{results_output_prefix}.compare_model_scores.test_set_performance.tsv",
                sep="\t",
            )
            display(
                Markdown(
                    f"## {gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor} from {model_prefix} to {results_output_prefix}"
                )
            )
            print(metamodel_config)
            display(combined_stats)

            # Redo, but (potentially) override y_true to pass in e.g. disease with past exposures separated out (delinates past exposures on ground truth axis)
            # For cleaner confusion matrices
            # (But this changes global score metrics)
            experiment_set.summarize(
                global_evaluation_column_name=target_obs_column.value.confusion_matrix_expanded_column_name,
                remove_incomplete_strategy=model_evaluation.RemoveIncompleteStrategy.DROP_INCOMPLETE_FOLDS,
            ).export_all_models(
                func_generate_classification_report_fname=lambda model_name: f"{highres_results_output_prefix}.classification_report.test_set_performance.{model_name}.expanded_confusion_matrix.txt",
                func_generate_confusion_matrix_fname=lambda model_name: f"{highres_results_output_prefix}.confusion_matrix.test_set_performance.{model_name}.expanded_confusion_matrix.png",
                confusion_matrix_true_label="Patient of origin - expanded",
                dpi=300,
            )

            if target_obs_column == TargetObsColumnEnum.disease:
                # Redo, but (potentially) override y_true to pass in disease_subtype for ground truth axis
                # (But this changes global score metrics)
                experiment_set.summarize(
                    global_evaluation_column_name="disease_subtype",
                    remove_incomplete_strategy=model_evaluation.RemoveIncompleteStrategy.DROP_INCOMPLETE_FOLDS,
                ).export_all_models(
                    func_generate_classification_report_fname=lambda model_name: f"{highres_results_output_prefix}.classification_report.test_set_performance.{model_name}.expanded_confusion_matrix_disease_subtype.txt",
                    func_generate_confusion_matrix_fname=lambda model_name: f"{highres_results_output_prefix}.confusion_matrix.test_set_performance.{model_name}.expanded_confusion_matrix_disease_subtype.png",
                    confusion_matrix_true_label="Patient of origin - subtype",
                    dpi=300,
                )

                # Also resummarize by a combined variable of disease + ethnicity
                # But first, fillna on ethnicity column to change nans to "Unknown"
                experiment_set_modified_ethnicity_metadata_column = (
                    # Create a copy of the experiment_set, to not disturb original metadata dataframes
                    experiment_set.copy()
                )
                for (
                    model_single_fold_performance
                ) in (
                    experiment_set_modified_ethnicity_metadata_column.model_outputs.values()
                ):
                    # Modify every model_single_fold_performance's metadata: fillna on the ethnicity_condensed column
                    for df in [
                        model_single_fold_performance.test_metadata,
                        model_single_fold_performance.test_abstention_metadata,
                    ]:
                        if df is None or df.shape[0] == 0:
                            continue
                        df["ethnicity_condensed"].fillna("Unknown", inplace=True)
                experiment_set_modified_ethnicity_metadata_column.summarize(
                    global_evaluation_column_name=[
                        model_evaluation.Y_TRUE_VALUES,
                        "ethnicity_condensed",
                    ],
                    remove_incomplete_strategy=model_evaluation.RemoveIncompleteStrategy.DROP_INCOMPLETE_FOLDS,
                ).export_all_models(
                    func_generate_classification_report_fname=lambda model_name: f"{highres_results_output_prefix}.classification_report.test_set_performance.{model_name}.expanded_confusion_matrix_ethnicity_condensed.txt",
                    func_generate_confusion_matrix_fname=lambda model_name: f"{highres_results_output_prefix}.confusion_matrix.test_set_performance.{model_name}.expanded_confusion_matrix_ethnicity_condensed.png",
                    confusion_matrix_true_label="Patient of origin - ancestry",
                    dpi=300,
                )

                # Also resummarize by a combined variable of disease + age_group_pediatric
                # But first, create this column, because it may not be set on older metamodel runs
                # (TODO: Remove redundant column creation - should be available on new runs. But keep the intelligent fillna behavior.)
                experiment_set_modified_metadata_age_pediatric_column = (
                    # Create a copy of the experiment_set, to not disturb original metadata dataframes
                    experiment_set.copy()
                )
                for (
                    model_single_fold_performance
                ) in (
                    experiment_set_modified_metadata_age_pediatric_column.model_outputs.values()
                ):
                    # Modify every model_single_fold_performance's metadata: create age_group_pediatric column
                    for df in [
                        model_single_fold_performance.test_metadata,
                        model_single_fold_performance.test_abstention_metadata,
                    ]:
                        if df is None or df.shape[0] == 0:
                            continue
                        df.loc[df["age"] < 18, "age_group_pediatric"] = "under 18"
                        df.loc[df["age"] >= 18, "age_group_pediatric"] = "18+"

                        # Fill NaNs intelligently:
                        # We know we have very few children cohorts and they are clearly indicated in the study name.
                        # If study name indicates that this is a pediatric cohort, set to "under 18". Otherwise set to 18+.
                        slice_children = df["study_name"].str.contains(
                            "pediatric|children", regex=True, case=False
                        )
                        df.loc[slice_children, "age_group_pediatric"] = df.loc[
                            slice_children, "age_group_pediatric"
                        ].fillna("under 18")
                        df["age_group_pediatric"].fillna("18+", inplace=True)
                experiment_set_modified_metadata_age_pediatric_column.summarize(
                    global_evaluation_column_name=[
                        model_evaluation.Y_TRUE_VALUES,
                        "age_group_pediatric",
                    ],
                    remove_incomplete_strategy=model_evaluation.RemoveIncompleteStrategy.DROP_INCOMPLETE_FOLDS,
                ).export_all_models(
                    func_generate_classification_report_fname=lambda model_name: f"{highres_results_output_prefix}.classification_report.test_set_performance.{model_name}.expanded_confusion_matrix_age_group_pediatric.txt",
                    func_generate_confusion_matrix_fname=lambda model_name: f"{highres_results_output_prefix}.confusion_matrix.test_set_performance.{model_name}.expanded_confusion_matrix_age_group_pediatric.png",
                    confusion_matrix_true_label="Patient of origin - pediatric vs adult",
                    dpi=300,
                )

            for (
                model_name,
                model_global_performance,
            ) in experiment_set_global_performance.model_global_performances.items():
                # review classification for each specimen
                individual_classifications = model_global_performance.get_all_entries()
                individual_classifications.to_csv(
                    f"{highres_results_output_prefix}.classification_raw_per_specimen.test_set_performance.{model_name}.with_abstention.tsv",
                    sep="\t",
                    index=None,
                )

                # filter to mistakes (including abstentions)
                mistakes = individual_classifications[
                    individual_classifications["y_true"]
                    != individual_classifications["y_pred"]
                ]
                mistakes.to_csv(
                    f"{highres_results_output_prefix}.classification_errors.test_set_performance.{model_name}.with_abstention.tsv",
                    sep="\t",
                    index=None,
                )

                # filter further to abstentions
                abstentions = individual_classifications[
                    individual_classifications["y_pred"]
                    == model_global_performance.abstain_label
                ]
                abstentions.to_csv(
                    f"{highres_results_output_prefix}.classification_abstentions.test_set_performance.{model_name}.with_abstention.tsv",
                    sep="\t",
                    index=None,
                )

                # label correct/incorrect
                individual_classifications["classification_success"] = "Correct"
                individual_classifications.loc[
                    individual_classifications["y_true"]
                    != individual_classifications["y_pred"],
                    "classification_success",
                ] = "Incorrect"

                # Plot difference between top two predicted probabilities, p1 - p2,
                # and difference in logits (log odds) of the top two classes, log(p1/(1-p1)) - log(p2/(1-p2)),
                # to account for the fact that these are probability distributions that sum to 1.
                # (That's the natural log, i.e. log base e.)
                # Alternative considered: difference in log probabilities of top two classes, i.e. log(p1) - log(p2), but that won't distinguish cases like p1=0.5, p2=0.25 from p1=0.4, p2=0.2.
                # difference_between_top_two_predicted_probas was already generated, but we can create the rest ourselves here.
                # TODO: consider other metrics from https://robertmunro.com/uncertainty_sampling_example.html?
                p1, p2 = (
                    individual_classifications["max_predicted_proba"],
                    individual_classifications["second_highest_predicted_proba"],
                )
                epsilon = 1e-8  # avoid log(0) if p=0 or p=1
                individual_classifications[
                    "difference_between_logits_of_top_two_classes"
                ] = (np.log(p1 + epsilon) - np.log(1 - p1 + epsilon)) - (
                    np.log(p2 + epsilon) - np.log(1 - p2 + epsilon)
                )
                for metric, label in [
                    (
                        "difference_between_top_two_predicted_probas",
                        "Difference between\ntop two predicted probabilities",
                    ),
                    (
                        "difference_between_logits_of_top_two_classes",
                        "Difference between log odds\nof top two predicted classes",
                    ),
                ]:
                    fig = plt.figure(figsize=(3, 5))
                    sns.boxplot(
                        data=individual_classifications,
                        x="classification_success",
                        y=metric,
                        order=["Incorrect", "Correct"],
                        palette=sns.color_palette("Paired"),
                    )
                    plt.title(f"Blending metamodel {model_name}")
                    plt.xlabel("Specimen classification")
                    plt.ylabel(label)
                    sns.despine()
                    genetools.plots.savefig(
                        fig,
                        f"{highres_results_output_prefix}.errors_versus_{metric}.test_set_performance.{model_name}.with_abstention.vertical.png",
                        dpi=300,
                    )
                    plt.close(fig)

                try:
                    # Try to load global fold classifier for analysis, too.
                    # It wasn't included in the ExperimentSet, because no .metadata_joblib was generated, since the global fold does not have a test set.
                    # Note that this will only process global fold classifiers for models that were trained for at least one cross validation fold.
                    global_fold_classifier = BlendingMetamodel.from_disk(
                        fold_id=-1,
                        metamodel_name=model_name,
                        gene_locus=gene_locus,
                        target_obs_column=target_obs_column,
                        base_model_train_fold_name=base_model_train_fold_name,
                        metamodel_fold_label_train=metamodel_fold_label_train,
                        metamodel_flavor=metamodel_flavor,
                    )
                except FileNotFoundError as err:
                    logger.warning(
                        f"No global fold classifier found for {model_name}: {err}"
                    )
                    global_fold_classifier = None

                analyze_feature_importances(
                    model_name=model_name,
                    model_global_performance=model_global_performance,
                    gene_locus=gene_locus,
                    target_obs_column=target_obs_column,
                    metamodel_flavor=metamodel_flavor,
                    highres_results_output_prefix=highres_results_output_prefix,
                    global_fold_classifier=global_fold_classifier,
                )

                # Plot additional model diagnostics for models with internal cross validation over a range of hyperparameters
                if model_name in ["lasso_cv", "ridge_cv", "elasticnet_cv"]:

                    def _get_classifiers():
                        # load classifier from disk.
                        for (
                            fold_id,
                            per_fold_performance,
                        ) in model_global_performance.per_fold_outputs.items():
                            yield (fold_id, per_fold_performance.classifier)
                        if global_fold_classifier is not None:
                            yield (-1, global_fold_classifier)

                    for fold_id, clf in _get_classifiers():
                        if isinstance(clf, BlendingMetamodel):
                            # Unwrap if it's a BlendingMetamodel
                            clf = clf._inner

                        # it's probably a Pipeline - unwrap it
                        clf = model_evaluation._get_final_estimator_if_pipeline(clf)

                        if not isinstance(clf, GlmnetLogitNetWrapper):
                            # it should be a GlmnetLogitNetWrapper
                            raise ValueError(
                                f"Expected {model_name} for fold {fold_id} to be of type GlmnetLogitNetWrapper, got {type(clf)}"
                            )

                        # TODO: store the CvScorer enum object in the classifier so we can just use its .name
                        # In internal/nested cross validation, we optimize MCC for metamodel, but AUC for base models. See discussion in core code
                        fig = clf.plot_cross_validation_curve(scorer_name="MCC")
                        genetools.plots.savefig(
                            fig,
                            f"{highres_results_output_prefix}.internal_cross_validation_hyperparameter_diagnostics.{model_name}.fold_{fold_id}.png",
                            dpi=300,
                        )
                        plt.close(fig)

        except Exception as err:
            logger.exception(
                f"{gene_locus}, {target_obs_column}, metamodel flavor {metamodel_flavor}, config {metamodel_config} failed with error: {err}"
            )

In [5]:
# Individual gene locus
for gene_locus in config.gene_loci_used:
    print(gene_locus)
    GeneLocus.validate_single_value(gene_locus)
    for target_obs_column in config.classification_targets:
        run_analysis(gene_locus=gene_locus, target_obs_column=target_obs_column)

GeneLocus.BCR


## GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.964 +/- 0.005 (in 3 folds),0.969 +/- 0.006 (in 3 folds),0.963 +/- 0.006 (in 3 folds),0.968 +/- 0.007 (in 3 folds),0.855 +/- 0.009 (in 3 folds),0.787 +/- 0.014 (in 3 folds),0.855,0.787,0.835 +/- 0.023 (in 3 folds),0.763 +/- 0.032 (in 3 folds),...,0.976 +/- 0.000 (in 1 folds),0.835,0.762,0.023,Unknown,469,11,480,0.022917,False
lasso_multiclass,0.960 +/- 0.006 (in 3 folds),0.966 +/- 0.007 (in 3 folds),0.959 +/- 0.008 (in 3 folds),0.965 +/- 0.008 (in 3 folds),0.846 +/- 0.009 (in 3 folds),0.778 +/- 0.017 (in 3 folds),0.846,0.778,0.827 +/- 0.034 (in 3 folds),0.754 +/- 0.046 (in 3 folds),...,0.974 +/- 0.000 (in 1 folds),0.827,0.753,0.023,Unknown,469,11,480,0.022917,False
rf_multiclass,0.959 +/- 0.009 (in 3 folds),0.963 +/- 0.010 (in 3 folds),0.954 +/- 0.014 (in 3 folds),0.960 +/- 0.014 (in 3 folds),0.850 +/- 0.013 (in 3 folds),0.781 +/- 0.020 (in 3 folds),0.851,0.78,0.831 +/- 0.035 (in 3 folds),0.757 +/- 0.047 (in 3 folds),...,0.973 +/- 0.000 (in 1 folds),0.831,0.755,0.023,Unknown,469,11,480,0.022917,False
elasticnet_cv,0.957 +/- 0.008 (in 3 folds),0.962 +/- 0.007 (in 3 folds),0.958 +/- 0.009 (in 3 folds),0.964 +/- 0.008 (in 3 folds),0.821 +/- 0.024 (in 3 folds),0.740 +/- 0.031 (in 3 folds),0.821,0.739,0.802 +/- 0.001 (in 3 folds),0.715 +/- 0.004 (in 3 folds),...,0.974 +/- 0.000 (in 1 folds),0.802,0.713,0.023,Unknown,469,11,480,0.022917,False
xgboost,0.953 +/- 0.005 (in 3 folds),0.956 +/- 0.007 (in 3 folds),0.951 +/- 0.009 (in 3 folds),0.955 +/- 0.010 (in 3 folds),0.831 +/- 0.014 (in 3 folds),0.753 +/- 0.023 (in 3 folds),0.832,0.752,0.812 +/- 0.032 (in 3 folds),0.730 +/- 0.044 (in 3 folds),...,0.967 +/- 0.000 (in 1 folds),0.812,0.728,0.023,Unknown,469,11,480,0.022917,False
lasso_cv,0.949 +/- 0.005 (in 3 folds),0.954 +/- 0.003 (in 3 folds),0.954 +/- 0.007 (in 3 folds),0.959 +/- 0.007 (in 3 folds),0.819 +/- 0.016 (in 3 folds),0.735 +/- 0.020 (in 3 folds),0.819,0.734,0.800 +/- 0.010 (in 3 folds),0.710 +/- 0.014 (in 3 folds),...,0.967 +/- 0.000 (in 1 folds),0.8,0.709,0.023,Unknown,469,11,480,0.022917,False
ridge_cv,0.948 +/- 0.005 (in 3 folds),0.952 +/- 0.004 (in 3 folds),0.951 +/- 0.006 (in 3 folds),0.957 +/- 0.006 (in 3 folds),0.821 +/- 0.021 (in 3 folds),0.740 +/- 0.025 (in 3 folds),0.821,0.739,0.802 +/- 0.016 (in 3 folds),0.715 +/- 0.024 (in 3 folds),...,0.962 +/- 0.000 (in 1 folds),0.802,0.713,0.023,Unknown,469,11,480,0.022917,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.465 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.465,0.0,0.454 +/- 0.007 (in 3 folds),0.024 +/- 0.023 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.454,0.025,0.023,Unknown,469,11,480,0.022917,True
dummy_stratified,0.496 +/- 0.011 (in 3 folds),0.499 +/- 0.008 (in 3 folds),0.503 +/- 0.001 (in 3 folds),0.506 +/- 0.001 (in 3 folds),0.320 +/- 0.018 (in 3 folds),-0.012 +/- 0.025 (in 3 folds),0.32,-0.013,0.313 +/- 0.024 (in 3 folds),-0.008 +/- 0.022 (in 3 folds),...,0.505 +/- 0.000 (in 1 folds),0.312,-0.009,0.023,Unknown,469,11,480,0.022917,False




## GeneLocus.BCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease/isotype_counts_only/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease/isotype_counts_only/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'isotype_counts': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f64c6cfb580>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.684 +/- 0.020 (in 3 folds),0.641 +/- 0.021 (in 3 folds),0.685 +/- 0.026 (in 3 folds),0.650 +/- 0.025 (in 3 folds),0.494 +/- 0.017 (in 3 folds),0.190 +/- 0.038 (in 3 folds),0.494,0.181,480,0,480,0.0,True
lasso_cv,0.676 +/- 0.023 (in 3 folds),0.633 +/- 0.023 (in 3 folds),0.681 +/- 0.032 (in 3 folds),0.644 +/- 0.031 (in 3 folds),0.508 +/- 0.005 (in 3 folds),0.225 +/- 0.020 (in 3 folds),0.508,0.215,480,0,480,0.0,False
ridge_cv,0.675 +/- 0.022 (in 3 folds),0.633 +/- 0.022 (in 3 folds),0.672 +/- 0.018 (in 3 folds),0.636 +/- 0.016 (in 3 folds),0.494 +/- 0.017 (in 3 folds),0.194 +/- 0.032 (in 3 folds),0.494,0.183,480,0,480,0.0,True
linearsvm_ovr,0.674 +/- 0.020 (in 3 folds),0.629 +/- 0.021 (in 3 folds),0.666 +/- 0.020 (in 3 folds),0.629 +/- 0.019 (in 3 folds),0.477 +/- 0.016 (in 3 folds),0.201 +/- 0.029 (in 3 folds),0.477,0.194,480,0,480,0.0,False
rf_multiclass,0.673 +/- 0.030 (in 3 folds),0.637 +/- 0.034 (in 3 folds),0.648 +/- 0.029 (in 3 folds),0.626 +/- 0.029 (in 3 folds),0.515 +/- 0.015 (in 3 folds),0.261 +/- 0.020 (in 3 folds),0.515,0.259,480,0,480,0.0,False
lasso_multiclass,0.668 +/- 0.020 (in 3 folds),0.622 +/- 0.019 (in 3 folds),0.655 +/- 0.014 (in 3 folds),0.618 +/- 0.012 (in 3 folds),0.471 +/- 0.042 (in 3 folds),0.227 +/- 0.049 (in 3 folds),0.471,0.223,480,0,480,0.0,False
xgboost,0.644 +/- 0.020 (in 3 folds),0.612 +/- 0.017 (in 3 folds),0.629 +/- 0.019 (in 3 folds),0.611 +/- 0.017 (in 3 folds),0.484 +/- 0.020 (in 3 folds),0.219 +/- 0.029 (in 3 folds),0.483,0.218,480,0,480,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.460 +/- 0.006 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.46,0.0,480,0,480,0.0,True
dummy_stratified,0.495 +/- 0.033 (in 3 folds),0.496 +/- 0.031 (in 3 folds),0.504 +/- 0.016 (in 3 folds),0.505 +/- 0.016 (in 3 folds),0.317 +/- 0.053 (in 3 folds),-0.010 +/- 0.071 (in 3 folds),0.317,-0.012,480,0,480,0.0,False


       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHD-M',
       'isotype_counts:isotype_proportion:IGHG'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHD-M',
       'isotype_counts:isotype_proportion:IGHG'],
      dtype='object')
       'isotype_

## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.959 +/- 0.009 (in 3 folds),0.963 +/- 0.011 (in 3 folds),0.957 +/- 0.010 (in 3 folds),0.962 +/- 0.013 (in 3 folds),0.833 +/- 0.008 (in 3 folds),0.758 +/- 0.020 (in 3 folds),0.833,0.758,0.821 +/- 0.014 (in 3 folds),0.743 +/- 0.026 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.821,0.744,0.014,Unknown,414,6,420,0.014286,False
lasso_multiclass,0.959 +/- 0.008 (in 3 folds),0.965 +/- 0.009 (in 3 folds),0.957 +/- 0.011 (in 3 folds),0.962 +/- 0.012 (in 3 folds),0.804 +/- 0.014 (in 3 folds),0.720 +/- 0.028 (in 3 folds),0.804,0.721,0.793 +/- 0.019 (in 3 folds),0.706 +/- 0.034 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.793,0.707,0.014,Unknown,414,6,420,0.014286,False
lasso_cv,0.956 +/- 0.009 (in 3 folds),0.961 +/- 0.010 (in 3 folds),0.954 +/- 0.013 (in 3 folds),0.960 +/- 0.013 (in 3 folds),0.845 +/- 0.005 (in 3 folds),0.772 +/- 0.014 (in 3 folds),0.845,0.772,0.833 +/- 0.011 (in 3 folds),0.756 +/- 0.021 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.833,0.756,0.014,Unknown,414,6,420,0.014286,False
elasticnet_cv,0.955 +/- 0.012 (in 3 folds),0.959 +/- 0.013 (in 3 folds),0.956 +/- 0.012 (in 3 folds),0.961 +/- 0.013 (in 3 folds),0.843 +/- 0.003 (in 3 folds),0.769 +/- 0.006 (in 3 folds),0.843,0.768,0.831 +/- 0.003 (in 3 folds),0.753 +/- 0.013 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.831,0.753,0.014,Unknown,414,6,420,0.014286,False
rf_multiclass,0.953 +/- 0.010 (in 3 folds),0.957 +/- 0.011 (in 3 folds),0.950 +/- 0.013 (in 3 folds),0.955 +/- 0.013 (in 3 folds),0.836 +/- 0.022 (in 3 folds),0.758 +/- 0.039 (in 3 folds),0.836,0.758,0.824 +/- 0.027 (in 3 folds),0.743 +/- 0.045 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.824,0.743,0.014,Unknown,414,6,420,0.014286,False
ridge_cv,0.949 +/- 0.011 (in 3 folds),0.951 +/- 0.014 (in 3 folds),0.951 +/- 0.013 (in 3 folds),0.956 +/- 0.015 (in 3 folds),0.845 +/- 0.014 (in 3 folds),0.774 +/- 0.017 (in 3 folds),0.845,0.774,0.833 +/- 0.008 (in 3 folds),0.758 +/- 0.012 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.833,0.758,0.014,Unknown,414,6,420,0.014286,False
xgboost,0.949 +/- 0.007 (in 3 folds),0.951 +/- 0.010 (in 3 folds),0.948 +/- 0.014 (in 3 folds),0.951 +/- 0.015 (in 3 folds),0.828 +/- 0.037 (in 3 folds),0.747 +/- 0.063 (in 3 folds),0.829,0.748,0.817 +/- 0.043 (in 3 folds),0.733 +/- 0.069 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.817,0.733,0.014,Unknown,414,6,420,0.014286,False
dummy_stratified,0.504 +/- 0.023 (in 3 folds),0.503 +/- 0.030 (in 3 folds),0.506 +/- 0.013 (in 3 folds),0.507 +/- 0.017 (in 3 folds),0.336 +/- 0.031 (in 3 folds),0.007 +/- 0.043 (in 3 folds),0.336,0.007,0.331 +/- 0.030 (in 3 folds),0.010 +/- 0.041 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.331,0.009,0.014,Unknown,414,6,420,0.014286,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.459 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.459,0.0,0.452 +/- 0.032 (in 3 folds),0.029 +/- 0.032 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.452,0.03,0.014,Unknown,414,6,420,0.014286,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.965 +/- 0.005 (in 3 folds),0.971 +/- 0.003 (in 3 folds),0.963 +/- 0.009 (in 3 folds),0.971 +/- 0.006 (in 3 folds),0.809 +/- 0.030 (in 3 folds),0.720 +/- 0.031 (in 3 folds),0.809,0.718,0.798 +/- 0.024 (in 3 folds),0.706 +/- 0.024 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.798,0.704,0.014,Unknown,414,6,420,0.014286,False
elasticnet_cv,0.965 +/- 0.004 (in 3 folds),0.971 +/- 0.003 (in 3 folds),0.966 +/- 0.007 (in 3 folds),0.972 +/- 0.006 (in 3 folds),0.826 +/- 0.058 (in 3 folds),0.749 +/- 0.070 (in 3 folds),0.826,0.746,0.814 +/- 0.053 (in 3 folds),0.734 +/- 0.064 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.814,0.731,0.014,Unknown,414,6,420,0.014286,False
xgboost,0.959 +/- 0.013 (in 3 folds),0.963 +/- 0.013 (in 3 folds),0.957 +/- 0.016 (in 3 folds),0.962 +/- 0.016 (in 3 folds),0.836 +/- 0.014 (in 3 folds),0.762 +/- 0.024 (in 3 folds),0.836,0.76,0.824 +/- 0.014 (in 3 folds),0.746 +/- 0.025 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.824,0.744,0.014,Unknown,414,6,420,0.014286,False
lasso_cv,0.956 +/- 0.006 (in 3 folds),0.961 +/- 0.007 (in 3 folds),0.958 +/- 0.010 (in 3 folds),0.963 +/- 0.010 (in 3 folds),0.833 +/- 0.020 (in 3 folds),0.755 +/- 0.028 (in 3 folds),0.833,0.755,0.821 +/- 0.020 (in 3 folds),0.739 +/- 0.028 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.821,0.74,0.014,Unknown,414,6,420,0.014286,False
ridge_cv,0.955 +/- 0.006 (in 3 folds),0.962 +/- 0.003 (in 3 folds),0.956 +/- 0.007 (in 3 folds),0.962 +/- 0.005 (in 3 folds),0.831 +/- 0.038 (in 3 folds),0.753 +/- 0.060 (in 3 folds),0.831,0.752,0.819 +/- 0.040 (in 3 folds),0.739 +/- 0.063 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.819,0.737,0.014,Unknown,414,6,420,0.014286,False
lasso_multiclass,0.953 +/- 0.013 (in 3 folds),0.960 +/- 0.009 (in 3 folds),0.955 +/- 0.010 (in 3 folds),0.961 +/- 0.007 (in 3 folds),0.819 +/- 0.033 (in 3 folds),0.747 +/- 0.041 (in 3 folds),0.819,0.747,0.807 +/- 0.032 (in 3 folds),0.732 +/- 0.041 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.807,0.732,0.014,Unknown,414,6,420,0.014286,False
linearsvm_ovr,0.912 +/- 0.007 (in 3 folds),0.922 +/- 0.006 (in 3 folds),0.925 +/- 0.011 (in 3 folds),0.935 +/- 0.006 (in 3 folds),0.795 +/- 0.030 (in 3 folds),0.704 +/- 0.035 (in 3 folds),0.795,0.702,0.783 +/- 0.028 (in 3 folds),0.689 +/- 0.031 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.783,0.688,0.014,Unknown,414,6,420,0.014286,False
dummy_stratified,0.504 +/- 0.023 (in 3 folds),0.503 +/- 0.030 (in 3 folds),0.506 +/- 0.013 (in 3 folds),0.507 +/- 0.017 (in 3 folds),0.336 +/- 0.031 (in 3 folds),0.007 +/- 0.043 (in 3 folds),0.336,0.007,0.331 +/- 0.030 (in 3 folds),0.010 +/- 0.041 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.331,0.009,0.014,Unknown,414,6,420,0.014286,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.459 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.459,0.0,0.452 +/- 0.032 (in 3 folds),0.029 +/- 0.032 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.452,0.03,0.014,Unknown,414,6,420,0.014286,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.929 +/- 0.014 (in 3 folds),0.933 +/- 0.016 (in 3 folds),0.930 +/- 0.013 (in 3 folds),0.937 +/- 0.014 (in 3 folds),0.802 +/- 0.014 (in 3 folds),0.710 +/- 0.016 (in 3 folds),0.802,0.707,0.791 +/- 0.010 (in 3 folds),0.696 +/- 0.015 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.79,0.693,0.014,Unknown,414,6,420,0.014286,False
xgboost,0.920 +/- 0.017 (in 3 folds),0.923 +/- 0.018 (in 3 folds),0.918 +/- 0.024 (in 3 folds),0.923 +/- 0.025 (in 3 folds),0.773 +/- 0.009 (in 3 folds),0.669 +/- 0.011 (in 3 folds),0.773,0.665,0.762 +/- 0.009 (in 3 folds),0.656 +/- 0.015 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.762,0.652,0.014,Unknown,414,6,420,0.014286,False
lasso_multiclass,0.879 +/- 0.036 (in 3 folds),0.881 +/- 0.042 (in 3 folds),0.893 +/- 0.034 (in 3 folds),0.895 +/- 0.041 (in 3 folds),0.729 +/- 0.034 (in 3 folds),0.625 +/- 0.045 (in 3 folds),0.729,0.624,0.719 +/- 0.038 (in 3 folds),0.614 +/- 0.049 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.719,0.612,0.014,Unknown,414,6,420,0.014286,False
linearsvm_ovr,0.876 +/- 0.035 (in 3 folds),0.877 +/- 0.042 (in 3 folds),0.892 +/- 0.035 (in 3 folds),0.892 +/- 0.043 (in 3 folds),0.749 +/- 0.028 (in 3 folds),0.642 +/- 0.043 (in 3 folds),0.749,0.639,0.738 +/- 0.030 (in 3 folds),0.630 +/- 0.045 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.738,0.626,0.014,Unknown,414,6,420,0.014286,False
lasso_cv,0.864 +/- 0.032 (in 3 folds),0.865 +/- 0.036 (in 3 folds),0.894 +/- 0.028 (in 3 folds),0.898 +/- 0.030 (in 3 folds),0.773 +/- 0.042 (in 3 folds),0.667 +/- 0.044 (in 3 folds),0.773,0.662,0.762 +/- 0.037 (in 3 folds),0.653 +/- 0.037 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.762,0.649,0.014,Unknown,414,6,420,0.014286,False
elasticnet_cv,0.862 +/- 0.033 (in 3 folds),0.862 +/- 0.038 (in 3 folds),0.895 +/- 0.026 (in 3 folds),0.898 +/- 0.029 (in 3 folds),0.771 +/- 0.048 (in 3 folds),0.666 +/- 0.049 (in 3 folds),0.771,0.66,0.759 +/- 0.042 (in 3 folds),0.651 +/- 0.041 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.76,0.646,0.014,Unknown,414,6,420,0.014286,False
ridge_cv,0.861 +/- 0.037 (in 3 folds),0.859 +/- 0.046 (in 3 folds),0.893 +/- 0.025 (in 3 folds),0.896 +/- 0.031 (in 3 folds),0.749 +/- 0.034 (in 3 folds),0.630 +/- 0.047 (in 3 folds),0.749,0.627,0.738 +/- 0.029 (in 3 folds),0.616 +/- 0.045 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.738,0.614,0.014,Unknown,414,6,420,0.014286,False
dummy_stratified,0.504 +/- 0.023 (in 3 folds),0.503 +/- 0.030 (in 3 folds),0.506 +/- 0.013 (in 3 folds),0.507 +/- 0.017 (in 3 folds),0.336 +/- 0.031 (in 3 folds),0.007 +/- 0.043 (in 3 folds),0.336,0.007,0.331 +/- 0.030 (in 3 folds),0.010 +/- 0.041 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.331,0.009,0.014,Unknown,414,6,420,0.014286,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.459 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.459,0.0,0.452 +/- 0.032 (in 3 folds),0.029 +/- 0.032 (in 3 folds),0.014 +/- 0.007 (in 3 folds),0.452,0.03,0.014,Unknown,414,6,420,0.014286,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f61eb8aab50>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.814 +/- 0.033 (in 3 folds),0.834 +/- 0.036 (in 3 folds),0.806 +/- 0.029 (in 3 folds),0.828 +/- 0.032 (in 3 folds),0.593 +/- 0.022 (in 3 folds),0.405 +/- 0.030 (in 3 folds),0.593,0.404,420,0,420,0.0,False
ridge_cv,0.812 +/- 0.030 (in 3 folds),0.832 +/- 0.036 (in 3 folds),0.798 +/- 0.028 (in 3 folds),0.824 +/- 0.034 (in 3 folds),0.569 +/- 0.023 (in 3 folds),0.348 +/- 0.054 (in 3 folds),0.569,0.349,420,0,420,0.0,False
lasso_multiclass,0.811 +/- 0.024 (in 3 folds),0.836 +/- 0.029 (in 3 folds),0.798 +/- 0.018 (in 3 folds),0.827 +/- 0.024 (in 3 folds),0.579 +/- 0.019 (in 3 folds),0.454 +/- 0.015 (in 3 folds),0.579,0.451,420,0,420,0.0,False
elasticnet_cv,0.809 +/- 0.035 (in 3 folds),0.828 +/- 0.040 (in 3 folds),0.797 +/- 0.032 (in 3 folds),0.822 +/- 0.038 (in 3 folds),0.571 +/- 0.051 (in 3 folds),0.364 +/- 0.056 (in 3 folds),0.571,0.362,420,0,420,0.0,False
linearsvm_ovr,0.809 +/- 0.028 (in 3 folds),0.831 +/- 0.034 (in 3 folds),0.798 +/- 0.023 (in 3 folds),0.825 +/- 0.030 (in 3 folds),0.583 +/- 0.042 (in 3 folds),0.441 +/- 0.034 (in 3 folds),0.583,0.438,420,0,420,0.0,False
xgboost,0.803 +/- 0.042 (in 3 folds),0.824 +/- 0.046 (in 3 folds),0.806 +/- 0.033 (in 3 folds),0.830 +/- 0.035 (in 3 folds),0.574 +/- 0.023 (in 3 folds),0.372 +/- 0.021 (in 3 folds),0.574,0.37,420,0,420,0.0,False
lasso_cv,0.797 +/- 0.041 (in 3 folds),0.815 +/- 0.045 (in 3 folds),0.784 +/- 0.029 (in 3 folds),0.806 +/- 0.035 (in 3 folds),0.557 +/- 0.039 (in 3 folds),0.346 +/- 0.049 (in 3 folds),0.557,0.342,420,0,420,0.0,False
dummy_stratified,0.523 +/- 0.035 (in 3 folds),0.523 +/- 0.034 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.359 +/- 0.056 (in 3 folds),0.045 +/- 0.072 (in 3 folds),0.36,0.044,420,0,420,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.455 +/- 0.033 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.455,0.0,420,0,420,0.0,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f61eb8aa730>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.670 +/- 0.025 (in 3 folds),0.687 +/- 0.027 (in 3 folds),0.660 +/- 0.022 (in 3 folds),0.677 +/- 0.024 (in 3 folds),0.419 +/- 0.012 (in 3 folds),0.190 +/- 0.011 (in 3 folds),0.419,0.187,420,0,420,0.0,False
xgboost,0.667 +/- 0.015 (in 3 folds),0.683 +/- 0.019 (in 3 folds),0.654 +/- 0.006 (in 3 folds),0.670 +/- 0.011 (in 3 folds),0.429 +/- 0.020 (in 3 folds),0.142 +/- 0.022 (in 3 folds),0.429,0.14,420,0,420,0.0,False
lasso_cv,0.638 +/- 0.020 (in 3 folds),0.653 +/- 0.023 (in 3 folds),0.649 +/- 0.028 (in 3 folds),0.666 +/- 0.031 (in 3 folds),0.476 +/- 0.054 (in 3 folds),0.123 +/- 0.104 (in 3 folds),0.476,0.127,420,0,420,0.0,True
elasticnet_cv,0.628 +/- 0.020 (in 3 folds),0.643 +/- 0.028 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.657 +/- 0.031 (in 3 folds),0.469 +/- 0.047 (in 3 folds),0.090 +/- 0.090 (in 3 folds),0.469,0.101,420,0,420,0.0,True
linearsvm_ovr,0.628 +/- 0.020 (in 3 folds),0.643 +/- 0.028 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.657 +/- 0.031 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.117 +/- 0.038 (in 3 folds),0.407,0.112,420,0,420,0.0,True
ridge_cv,0.628 +/- 0.020 (in 3 folds),0.643 +/- 0.028 (in 3 folds),0.639 +/- 0.024 (in 3 folds),0.657 +/- 0.031 (in 3 folds),0.459 +/- 0.036 (in 3 folds),0.094 +/- 0.051 (in 3 folds),0.46,0.084,420,0,420,0.0,True
lasso_multiclass,0.627 +/- 0.029 (in 3 folds),0.647 +/- 0.034 (in 3 folds),0.636 +/- 0.031 (in 3 folds),0.658 +/- 0.035 (in 3 folds),0.283 +/- 0.010 (in 3 folds),0.107 +/- 0.039 (in 3 folds),0.283,0.102,420,0,420,0.0,False
dummy_stratified,0.523 +/- 0.035 (in 3 folds),0.523 +/- 0.034 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.359 +/- 0.056 (in 3 folds),0.045 +/- 0.072 (in 3 folds),0.36,0.044,420,0,420,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.455 +/- 0.033 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.455,0.0,420,0,420,0.0,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f61eb8aa790>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.574 +/- 0.022 (in 3 folds),0.573 +/- 0.019 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.551 +/- 0.008 (in 3 folds),0.357 +/- 0.079 (in 3 folds),0.149 +/- 0.029 (in 3 folds),0.357,0.152,420,0,420,0.0,True
lasso_multiclass,0.574 +/- 0.022 (in 3 folds),0.573 +/- 0.019 (in 3 folds),0.550 +/- 0.011 (in 3 folds),0.551 +/- 0.008 (in 3 folds),0.357 +/- 0.079 (in 3 folds),0.149 +/- 0.029 (in 3 folds),0.357,0.152,420,0,420,0.0,True
linearsvm_ovr,0.563 +/- 0.025 (in 3 folds),0.552 +/- 0.029 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.536 +/- 0.017 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.110 +/- 0.095 (in 3 folds),0.407,0.088,420,0,420,0.0,True
xgboost,0.563 +/- 0.025 (in 3 folds),0.552 +/- 0.029 (in 3 folds),0.541 +/- 0.016 (in 3 folds),0.536 +/- 0.017 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.110 +/- 0.095 (in 3 folds),0.407,0.088,420,0,420,0.0,True
lasso_cv,0.543 +/- 0.040 (in 3 folds),0.544 +/- 0.042 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.532 +/- 0.029 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.110 +/- 0.095 (in 3 folds),0.407,0.088,420,0,420,0.0,True
elasticnet_cv,0.543 +/- 0.040 (in 3 folds),0.544 +/- 0.042 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.532 +/- 0.029 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.110 +/- 0.095 (in 3 folds),0.407,0.088,420,0,420,0.0,True
ridge_cv,0.543 +/- 0.040 (in 3 folds),0.544 +/- 0.042 (in 3 folds),0.530 +/- 0.027 (in 3 folds),0.532 +/- 0.029 (in 3 folds),0.407 +/- 0.009 (in 3 folds),0.110 +/- 0.095 (in 3 folds),0.407,0.088,420,0,420,0.0,True
dummy_stratified,0.523 +/- 0.035 (in 3 folds),0.523 +/- 0.034 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.359 +/- 0.056 (in 3 folds),0.045 +/- 0.072 (in 3 folds),0.36,0.044,420,0,420,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.455 +/- 0.033 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.455,0.0,420,0,420,0.0,True




## GeneLocus.BCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f61eb8aa100>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.751 +/- 0.014 (in 3 folds),0.775 +/- 0.015 (in 3 folds),0.724 +/- 0.015 (in 3 folds),0.748 +/- 0.018 (in 3 folds),0.526 +/- 0.102 (in 3 folds),0.381 +/- 0.102 (in 3 folds),0.526,0.36,420,0,420,0.0,False
rf_multiclass,0.750 +/- 0.015 (in 3 folds),0.771 +/- 0.016 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.748 +/- 0.016 (in 3 folds),0.526 +/- 0.102 (in 3 folds),0.381 +/- 0.102 (in 3 folds),0.526,0.36,420,0,420,0.0,False
xgboost,0.748 +/- 0.022 (in 3 folds),0.766 +/- 0.027 (in 3 folds),0.725 +/- 0.017 (in 3 folds),0.747 +/- 0.022 (in 3 folds),0.595 +/- 0.067 (in 3 folds),0.425 +/- 0.088 (in 3 folds),0.595,0.421,420,0,420,0.0,False
ridge_cv,0.748 +/- 0.017 (in 3 folds),0.766 +/- 0.021 (in 3 folds),0.724 +/- 0.014 (in 3 folds),0.746 +/- 0.019 (in 3 folds),0.557 +/- 0.064 (in 3 folds),0.346 +/- 0.129 (in 3 folds),0.557,0.334,420,0,420,0.0,False
elasticnet_cv,0.747 +/- 0.022 (in 3 folds),0.767 +/- 0.027 (in 3 folds),0.724 +/- 0.017 (in 3 folds),0.747 +/- 0.022 (in 3 folds),0.557 +/- 0.064 (in 3 folds),0.346 +/- 0.129 (in 3 folds),0.557,0.334,420,0,420,0.0,False
linearsvm_ovr,0.741 +/- 0.027 (in 3 folds),0.759 +/- 0.030 (in 3 folds),0.721 +/- 0.018 (in 3 folds),0.743 +/- 0.021 (in 3 folds),0.595 +/- 0.067 (in 3 folds),0.444 +/- 0.056 (in 3 folds),0.595,0.441,420,0,420,0.0,True
lasso_cv,0.739 +/- 0.029 (in 3 folds),0.759 +/- 0.034 (in 3 folds),0.722 +/- 0.026 (in 3 folds),0.745 +/- 0.031 (in 3 folds),0.552 +/- 0.069 (in 3 folds),0.337 +/- 0.132 (in 3 folds),0.552,0.327,420,0,420,0.0,True
dummy_stratified,0.523 +/- 0.035 (in 3 folds),0.523 +/- 0.034 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.359 +/- 0.056 (in 3 folds),0.045 +/- 0.072 (in 3 folds),0.36,0.044,420,0,420,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.455 +/- 0.033 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.455,0.0,420,0,420,0.0,True




## GeneLocus.BCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.996 +/- 0.006 (in 3 folds),0.996 +/- 0.006 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.953 +/- 0.032 (in 3 folds),0.859 +/- 0.096 (in 3 folds),0.954,0.864,0.940 +/- 0.043 (in 3 folds),0.820 +/- 0.131 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.94,0.824,0.014,Unknown,280,4,284,0.014085,False
lasso_cv,0.996 +/- 0.005 (in 3 folds),0.996 +/- 0.005 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.957 +/- 0.029 (in 3 folds),0.871 +/- 0.088 (in 3 folds),0.957,0.874,0.943 +/- 0.040 (in 3 folds),0.831 +/- 0.123 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.944,0.835,0.014,Unknown,280,4,284,0.014085,False
linearsvm_ovr,0.995 +/- 0.006 (in 3 folds),0.995 +/- 0.006 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.964 +/- 0.017 (in 3 folds),0.899 +/- 0.050 (in 3 folds),0.964,0.899,0.950 +/- 0.023 (in 3 folds),0.866 +/- 0.062 (in 3 folds),...,0.999 +/- 0.000 (in 1 folds),0.951,0.866,0.014,Unknown,280,4,284,0.014085,False
lasso_multiclass,0.994 +/- 0.008 (in 3 folds),0.994 +/- 0.008 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.964 +/- 0.017 (in 3 folds),0.899 +/- 0.050 (in 3 folds),0.964,0.899,0.950 +/- 0.023 (in 3 folds),0.866 +/- 0.062 (in 3 folds),...,0.999 +/- 0.000 (in 1 folds),0.951,0.866,0.014,Unknown,280,4,284,0.014085,False
ridge_cv,0.993 +/- 0.009 (in 3 folds),0.993 +/- 0.009 (in 3 folds),0.998 +/- 0.003 (in 3 folds),0.998 +/- 0.003 (in 3 folds),0.953 +/- 0.032 (in 3 folds),0.859 +/- 0.096 (in 3 folds),0.954,0.864,0.940 +/- 0.043 (in 3 folds),0.820 +/- 0.131 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.94,0.824,0.014,Unknown,280,4,284,0.014085,False
xgboost,0.992 +/- 0.007 (in 3 folds),0.992 +/- 0.007 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.957 +/- 0.020 (in 3 folds),0.872 +/- 0.062 (in 3 folds),0.957,0.873,0.943 +/- 0.028 (in 3 folds),0.835 +/- 0.082 (in 3 folds),...,0.998 +/- 0.000 (in 1 folds),0.944,0.837,0.014,Unknown,280,4,284,0.014085,False
rf_multiclass,0.991 +/- 0.009 (in 3 folds),0.991 +/- 0.009 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.957 +/- 0.020 (in 3 folds),0.873 +/- 0.061 (in 3 folds),0.957,0.873,0.943 +/- 0.028 (in 3 folds),0.835 +/- 0.083 (in 3 folds),...,0.999 +/- 0.000 (in 1 folds),0.944,0.836,0.014,Unknown,280,4,284,0.014085,False
dummy_stratified,0.529 +/- 0.028 (in 3 folds),0.529 +/- 0.028 (in 3 folds),0.789 +/- 0.016 (in 3 folds),0.789 +/- 0.016 (in 3 folds),0.696 +/- 0.012 (in 3 folds),0.060 +/- 0.056 (in 3 folds),0.696,0.06,0.687 +/- 0.009 (in 3 folds),0.058 +/- 0.046 (in 3 folds),...,0.774 +/- 0.000 (in 1 folds),0.687,0.058,0.014,Unknown,280,4,284,0.014085,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.779 +/- 0.007 (in 3 folds),0.779 +/- 0.007 (in 3 folds),0.779 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.779,0.0,0.768 +/- 0.004 (in 3 folds),0.004 +/- 0.043 (in 3 folds),...,0.771 +/- 0.000 (in 1 folds),0.768,0.004,0.014,Unknown,280,4,284,0.014085,True




## GeneLocus.BCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.987 +/- 0.007 (in 3 folds),0.987 +/- 0.007 (in 3 folds),0.994 +/- 0.003 (in 3 folds),0.994 +/- 0.003 (in 3 folds),0.945 +/- 0.016 (in 3 folds),0.875 +/- 0.035 (in 3 folds),0.946,0.873,0.928 +/- 0.016 (in 3 folds),0.833 +/- 0.038 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.928,0.832,0.019,Unknown,313,6,319,0.018809,False
linearsvm_ovr,0.987 +/- 0.005 (in 3 folds),0.987 +/- 0.005 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.952 +/- 0.001 (in 3 folds),0.890 +/- 0.001 (in 3 folds),0.952,0.89,0.934 +/- 0.001 (in 3 folds),0.853 +/- 0.002 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.934,0.853,0.019,Unknown,313,6,319,0.018809,False
ridge_cv,0.985 +/- 0.009 (in 3 folds),0.985 +/- 0.009 (in 3 folds),0.993 +/- 0.004 (in 3 folds),0.993 +/- 0.004 (in 3 folds),0.949 +/- 0.023 (in 3 folds),0.880 +/- 0.055 (in 3 folds),0.949,0.88,0.931 +/- 0.023 (in 3 folds),0.840 +/- 0.056 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.931,0.84,0.019,Unknown,313,6,319,0.018809,False
lasso_multiclass,0.985 +/- 0.006 (in 3 folds),0.985 +/- 0.006 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.949 +/- 0.005 (in 3 folds),0.884 +/- 0.011 (in 3 folds),0.949,0.883,0.931 +/- 0.005 (in 3 folds),0.847 +/- 0.010 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.931,0.847,0.019,Unknown,313,6,319,0.018809,False
lasso_cv,0.983 +/- 0.004 (in 3 folds),0.983 +/- 0.004 (in 3 folds),0.992 +/- 0.002 (in 3 folds),0.992 +/- 0.002 (in 3 folds),0.952 +/- 0.018 (in 3 folds),0.889 +/- 0.040 (in 3 folds),0.952,0.888,0.934 +/- 0.018 (in 3 folds),0.848 +/- 0.043 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.934,0.847,0.019,Unknown,313,6,319,0.018809,False
rf_multiclass,0.982 +/- 0.010 (in 3 folds),0.982 +/- 0.010 (in 3 folds),0.992 +/- 0.005 (in 3 folds),0.992 +/- 0.005 (in 3 folds),0.943 +/- 0.009 (in 3 folds),0.868 +/- 0.019 (in 3 folds),0.942,0.866,0.925 +/- 0.009 (in 3 folds),0.831 +/- 0.016 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.925,0.83,0.019,Unknown,313,6,319,0.018809,False
xgboost,0.969 +/- 0.016 (in 3 folds),0.969 +/- 0.016 (in 3 folds),0.978 +/- 0.015 (in 3 folds),0.978 +/- 0.015 (in 3 folds),0.930 +/- 0.012 (in 3 folds),0.840 +/- 0.023 (in 3 folds),0.93,0.839,0.912 +/- 0.011 (in 3 folds),0.805 +/- 0.021 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.912,0.804,0.019,Unknown,313,6,319,0.018809,False
dummy_stratified,0.511 +/- 0.065 (in 3 folds),0.511 +/- 0.065 (in 3 folds),0.692 +/- 0.030 (in 3 folds),0.692 +/- 0.030 (in 3 folds),0.606 +/- 0.051 (in 3 folds),0.021 +/- 0.141 (in 3 folds),0.607,0.024,0.595 +/- 0.050 (in 3 folds),0.013 +/- 0.135 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.596,0.016,0.019,Unknown,313,6,319,0.018809,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.687 +/- 0.002 (in 3 folds),0.687 +/- 0.002 (in 3 folds),0.687 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.687,0.0,0.674 +/- 0.002 (in 3 folds),-0.046 +/- 0.001 (in 3 folds),0.019 +/- 0.000 (in 3 folds),0.674,-0.046,0.019,Unknown,313,6,319,0.018809,True




## GeneLocus.BCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.928 +/- 0.005 (in 3 folds),0.928 +/- 0.005 (in 3 folds),0.867 +/- 0.023 (in 3 folds),0.867 +/- 0.023 (in 3 folds),0.865 +/- 0.017 (in 3 folds),0.673 +/- 0.059 (in 3 folds),0.865,0.672,0.846 +/- 0.037 (in 3 folds),0.643 +/- 0.089 (in 3 folds),...,0.890 +/- 0.000 (in 1 folds),0.846,0.64,0.022,Unknown,312,7,319,0.021944,False
lasso_cv,0.925 +/- 0.017 (in 3 folds),0.925 +/- 0.017 (in 3 folds),0.876 +/- 0.009 (in 3 folds),0.876 +/- 0.009 (in 3 folds),0.879 +/- 0.026 (in 3 folds),0.705 +/- 0.066 (in 3 folds),0.878,0.701,0.859 +/- 0.028 (in 3 folds),0.670 +/- 0.070 (in 3 folds),...,0.884 +/- 0.000 (in 1 folds),0.859,0.666,0.022,Unknown,312,7,319,0.021944,False
linearsvm_ovr,0.925 +/- 0.017 (in 3 folds),0.925 +/- 0.017 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.856 +/- 0.015 (in 3 folds),0.672 +/- 0.020 (in 3 folds),0.856,0.671,0.837 +/- 0.021 (in 3 folds),0.642 +/- 0.034 (in 3 folds),...,0.887 +/- 0.000 (in 1 folds),0.837,0.64,0.022,Unknown,312,7,319,0.021944,False
elasticnet_cv,0.924 +/- 0.023 (in 3 folds),0.924 +/- 0.023 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.877 +/- 0.009 (in 3 folds),0.875 +/- 0.015 (in 3 folds),0.697 +/- 0.040 (in 3 folds),0.875,0.694,0.856 +/- 0.021 (in 3 folds),0.662 +/- 0.055 (in 3 folds),...,0.883 +/- 0.000 (in 1 folds),0.856,0.657,0.022,Unknown,312,7,319,0.021944,False
lasso_multiclass,0.923 +/- 0.017 (in 3 folds),0.923 +/- 0.017 (in 3 folds),0.873 +/- 0.011 (in 3 folds),0.873 +/- 0.011 (in 3 folds),0.849 +/- 0.017 (in 3 folds),0.657 +/- 0.024 (in 3 folds),0.849,0.656,0.830 +/- 0.027 (in 3 folds),0.628 +/- 0.046 (in 3 folds),...,0.885 +/- 0.000 (in 1 folds),0.831,0.626,0.022,Unknown,312,7,319,0.021944,False
xgboost,0.916 +/- 0.012 (in 3 folds),0.916 +/- 0.012 (in 3 folds),0.841 +/- 0.045 (in 3 folds),0.841 +/- 0.045 (in 3 folds),0.855 +/- 0.017 (in 3 folds),0.654 +/- 0.064 (in 3 folds),0.856,0.653,0.836 +/- 0.038 (in 3 folds),0.626 +/- 0.091 (in 3 folds),...,0.892 +/- 0.000 (in 1 folds),0.837,0.622,0.022,Unknown,312,7,319,0.021944,False
ridge_cv,0.910 +/- 0.023 (in 3 folds),0.910 +/- 0.023 (in 3 folds),0.866 +/- 0.007 (in 3 folds),0.866 +/- 0.007 (in 3 folds),0.862 +/- 0.025 (in 3 folds),0.660 +/- 0.077 (in 3 folds),0.862,0.662,0.843 +/- 0.040 (in 3 folds),0.629 +/- 0.101 (in 3 folds),...,0.874 +/- 0.000 (in 1 folds),0.843,0.625,0.022,Unknown,312,7,319,0.021944,False
dummy_stratified,0.527 +/- 0.041 (in 3 folds),0.527 +/- 0.041 (in 3 folds),0.317 +/- 0.033 (in 3 folds),0.317 +/- 0.033 (in 3 folds),0.619 +/- 0.047 (in 3 folds),0.059 +/- 0.089 (in 3 folds),0.619,0.057,0.605 +/- 0.051 (in 3 folds),0.064 +/- 0.087 (in 3 folds),...,0.355 +/- 0.000 (in 1 folds),0.605,0.061,0.022,Unknown,312,7,319,0.021944,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.301 +/- 0.015 (in 3 folds),0.301 +/- 0.015 (in 3 folds),0.699 +/- 0.015 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.699,0.0,0.683 +/- 0.003 (in 3 folds),0.034 +/- 0.037 (in 3 folds),...,0.315 +/- 0.000 (in 1 folds),0.683,0.043,0.022,Unknown,312,7,319,0.021944,True




## GeneLocus.BCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.746 +/- 0.059 (in 3 folds),0.753 +/- 0.063 (in 3 folds),0.761 +/- 0.030 (in 3 folds),0.775 +/- 0.035 (in 3 folds),0.634 +/- 0.068 (in 3 folds),0.377 +/- 0.121 (in 3 folds),0.632,0.335,0.604 +/- 0.065 (in 3 folds),0.345 +/- 0.101 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.602,0.311,0.047,Unknown,182,9,191,0.04712,True
ridge_cv,0.736 +/- 0.043 (in 3 folds),0.735 +/- 0.062 (in 3 folds),0.745 +/- 0.015 (in 3 folds),0.748 +/- 0.035 (in 3 folds),0.719 +/- 0.066 (in 3 folds),0.498 +/- 0.090 (in 3 folds),0.72,0.487,0.685 +/- 0.056 (in 3 folds),0.446 +/- 0.074 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.686,0.436,0.047,Unknown,182,9,191,0.04712,True
linearsvm_ovr,0.721 +/- 0.043 (in 3 folds),0.721 +/- 0.057 (in 3 folds),0.722 +/- 0.031 (in 3 folds),0.722 +/- 0.057 (in 3 folds),0.540 +/- 0.042 (in 3 folds),0.322 +/- 0.036 (in 3 folds),0.538,0.32,0.515 +/- 0.042 (in 3 folds),0.302 +/- 0.037 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.513,0.301,0.047,Unknown,182,9,191,0.04712,False
rf_multiclass,0.716 +/- 0.081 (in 3 folds),0.699 +/- 0.083 (in 3 folds),0.734 +/- 0.076 (in 3 folds),0.717 +/- 0.088 (in 3 folds),0.705 +/- 0.042 (in 3 folds),0.501 +/- 0.114 (in 3 folds),0.703,0.481,0.673 +/- 0.049 (in 3 folds),0.464 +/- 0.105 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.67,0.446,0.047,Unknown,182,9,191,0.04712,False
xgboost,0.695 +/- 0.070 (in 3 folds),0.677 +/- 0.075 (in 3 folds),0.721 +/- 0.056 (in 3 folds),0.709 +/- 0.063 (in 3 folds),0.640 +/- 0.090 (in 3 folds),0.402 +/- 0.117 (in 3 folds),0.637,0.396,0.611 +/- 0.088 (in 3 folds),0.375 +/- 0.112 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.607,0.369,0.047,Unknown,182,9,191,0.04712,False
lasso_cv,0.690 +/- 0.022 (in 3 folds),0.694 +/- 0.031 (in 3 folds),0.715 +/- 0.020 (in 3 folds),0.711 +/- 0.042 (in 3 folds),0.714 +/- 0.089 (in 3 folds),0.490 +/- 0.116 (in 3 folds),0.714,0.475,0.680 +/- 0.078 (in 3 folds),0.441 +/- 0.087 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.681,0.435,0.047,Unknown,182,9,191,0.04712,True
lasso_multiclass,0.686 +/- 0.086 (in 3 folds),0.682 +/- 0.118 (in 3 folds),0.711 +/- 0.030 (in 3 folds),0.708 +/- 0.059 (in 3 folds),0.508 +/- 0.079 (in 3 folds),0.348 +/- 0.047 (in 3 folds),0.511,0.353,0.484 +/- 0.069 (in 3 folds),0.324 +/- 0.039 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.487,0.331,0.047,Unknown,182,9,191,0.04712,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.574 +/- 0.085 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.577,0.0,0.547 +/- 0.073 (in 3 folds),0.027 +/- 0.023 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.55,0.026,0.047,Unknown,182,9,191,0.04712,True
dummy_stratified,0.496 +/- 0.018 (in 3 folds),0.509 +/- 0.026 (in 3 folds),0.512 +/- 0.013 (in 3 folds),0.513 +/- 0.013 (in 3 folds),0.385 +/- 0.032 (in 3 folds),0.036 +/- 0.056 (in 3 folds),0.385,0.023,0.367 +/- 0.029 (in 3 folds),0.035 +/- 0.052 (in 3 folds),0.047 +/- 0.013 (in 3 folds),0.366,0.025,0.047,Unknown,182,9,191,0.04712,False




## GeneLocus.BCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.664 +/- 0.015 (in 3 folds),0.661 +/- 0.026 (in 3 folds),0.697 +/- 0.016 (in 3 folds),0.692 +/- 0.033 (in 3 folds),0.339 +/- 0.084 (in 3 folds),0.202 +/- 0.124 (in 3 folds),0.354,0.227,0.295 +/- 0.140 (in 3 folds),0.185 +/- 0.127 (in 3 folds),...,0.656 +/- 0.000 (in 1 folds),0.293,0.181,0.173,Unknown,158,33,191,0.172775,False
lasso_multiclass,0.635 +/- 0.039 (in 3 folds),0.638 +/- 0.031 (in 3 folds),0.678 +/- 0.015 (in 3 folds),0.680 +/- 0.034 (in 3 folds),0.278 +/- 0.035 (in 3 folds),0.139 +/- 0.062 (in 3 folds),0.285,0.153,0.236 +/- 0.091 (in 3 folds),0.130 +/- 0.069 (in 3 folds),...,0.642 +/- 0.000 (in 1 folds),0.236,0.125,0.173,Unknown,158,33,191,0.172775,False
lasso_cv,0.631 +/- 0.031 (in 3 folds),0.627 +/- 0.013 (in 3 folds),0.663 +/- 0.015 (in 3 folds),0.661 +/- 0.013 (in 3 folds),0.292 +/- 0.094 (in 3 folds),0.165 +/- 0.162 (in 3 folds),0.31,0.166,0.257 +/- 0.137 (in 3 folds),0.152 +/- 0.133 (in 3 folds),...,0.659 +/- 0.000 (in 1 folds),0.257,0.131,0.173,Unknown,158,33,191,0.172775,True
elasticnet_cv,0.630 +/- 0.039 (in 3 folds),0.625 +/- 0.016 (in 3 folds),0.667 +/- 0.020 (in 3 folds),0.665 +/- 0.004 (in 3 folds),0.285 +/- 0.064 (in 3 folds),0.154 +/- 0.148 (in 3 folds),0.297,0.147,0.247 +/- 0.114 (in 3 folds),0.147 +/- 0.128 (in 3 folds),...,0.662 +/- 0.000 (in 1 folds),0.246,0.114,0.173,Unknown,158,33,191,0.172775,True
linearsvm_ovr,0.615 +/- 0.029 (in 3 folds),0.622 +/- 0.022 (in 3 folds),0.674 +/- 0.013 (in 3 folds),0.684 +/- 0.036 (in 3 folds),0.292 +/- 0.032 (in 3 folds),0.150 +/- 0.041 (in 3 folds),0.297,0.167,0.246 +/- 0.089 (in 3 folds),0.137 +/- 0.059 (in 3 folds),...,0.646 +/- 0.000 (in 1 folds),0.246,0.136,0.173,Unknown,158,33,191,0.172775,False
ridge_cv,0.607 +/- 0.095 (in 3 folds),0.604 +/- 0.095 (in 3 folds),0.622 +/- 0.109 (in 3 folds),0.614 +/- 0.103 (in 3 folds),0.286 +/- 0.076 (in 3 folds),0.122 +/- 0.121 (in 3 folds),0.297,0.151,0.245 +/- 0.120 (in 3 folds),0.127 +/- 0.108 (in 3 folds),...,0.641 +/- 0.000 (in 1 folds),0.246,0.119,0.173,Unknown,158,33,191,0.172775,True
xgboost,0.600 +/- 0.035 (in 3 folds),0.591 +/- 0.012 (in 3 folds),0.664 +/- 0.035 (in 3 folds),0.659 +/- 0.042 (in 3 folds),0.272 +/- 0.050 (in 3 folds),0.117 +/- 0.097 (in 3 folds),0.278,0.141,0.233 +/- 0.097 (in 3 folds),0.112 +/- 0.083 (in 3 folds),...,0.611 +/- 0.000 (in 1 folds),0.23,0.113,0.173,Unknown,158,33,191,0.172775,False
dummy_stratified,0.505 +/- 0.039 (in 3 folds),0.509 +/- 0.039 (in 3 folds),0.529 +/- 0.027 (in 3 folds),0.531 +/- 0.027 (in 3 folds),0.171 +/- 0.070 (in 3 folds),0.001 +/- 0.074 (in 3 folds),0.158,-0.006,0.132 +/- 0.026 (in 3 folds),-0.006 +/- 0.066 (in 3 folds),...,0.503 +/- 0.000 (in 1 folds),0.131,-0.005,0.173,Unknown,158,33,191,0.172775,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.212 +/- 0.029 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.209,0.023,0.173 +/- 0.036 (in 3 folds),0.021 +/- 0.019 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.173,0.018,0.173,Unknown,158,33,191,0.172775,True




## GeneLocus.BCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.657 +/- 0.051 (in 3 folds),0.657 +/- 0.051 (in 3 folds),0.770 +/- 0.084 (in 3 folds),0.770 +/- 0.084 (in 3 folds),0.615 +/- 0.008 (in 3 folds),0.215 +/- 0.028 (in 3 folds),0.614,0.221,0.537 +/- 0.058 (in 3 folds),0.174 +/- 0.048 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.534,0.175,0.131,Unknown,166,25,191,0.13089,False
lasso_multiclass,0.654 +/- 0.044 (in 3 folds),0.654 +/- 0.044 (in 3 folds),0.765 +/- 0.083 (in 3 folds),0.765 +/- 0.083 (in 3 folds),0.627 +/- 0.032 (in 3 folds),0.240 +/- 0.059 (in 3 folds),0.627,0.246,0.546 +/- 0.035 (in 3 folds),0.190 +/- 0.033 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.545,0.194,0.131,Unknown,166,25,191,0.13089,False
rf_multiclass,0.616 +/- 0.117 (in 3 folds),0.616 +/- 0.117 (in 3 folds),0.722 +/- 0.141 (in 3 folds),0.722 +/- 0.141 (in 3 folds),0.633 +/- 0.077 (in 3 folds),0.171 +/- 0.194 (in 3 folds),0.633,0.181,0.550 +/- 0.049 (in 3 folds),0.119 +/- 0.153 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.55,0.121,0.131,Unknown,166,25,191,0.13089,False
xgboost,0.538 +/- 0.128 (in 3 folds),0.538 +/- 0.128 (in 3 folds),0.632 +/- 0.094 (in 3 folds),0.632 +/- 0.094 (in 3 folds),0.608 +/- 0.079 (in 3 folds),0.154 +/- 0.154 (in 3 folds),0.608,0.148,0.529 +/- 0.056 (in 3 folds),0.108 +/- 0.112 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.529,0.103,0.131,Unknown,166,25,191,0.13089,False
dummy_stratified,0.535 +/- 0.058 (in 3 folds),0.535 +/- 0.058 (in 3 folds),0.634 +/- 0.072 (in 3 folds),0.634 +/- 0.072 (in 3 folds),0.568 +/- 0.083 (in 3 folds),0.077 +/- 0.123 (in 3 folds),0.566,0.074,0.494 +/- 0.066 (in 3 folds),0.050 +/- 0.079 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.492,0.049,0.131,Unknown,166,25,191,0.13089,False
elasticnet_cv,0.514 +/- 0.024 (in 3 folds),0.514 +/- 0.024 (in 3 folds),0.640 +/- 0.088 (in 3 folds),0.640 +/- 0.088 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.614,0.0,0.534 +/- 0.004 (in 3 folds),-0.046 +/- 0.013 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.534,-0.051,0.131,Unknown,166,25,191,0.13089,True
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.614,0.0,0.534 +/- 0.004 (in 3 folds),-0.046 +/- 0.013 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.534,-0.051,0.131,Unknown,166,25,191,0.13089,True
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.614,0.0,0.534 +/- 0.004 (in 3 folds),-0.046 +/- 0.013 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.534,-0.051,0.131,Unknown,166,25,191,0.13089,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.616 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.614,0.0,0.534 +/- 0.004 (in 3 folds),-0.046 +/- 0.013 (in 3 folds),0.126 +/- 0.096 (in 3 folds),0.534,-0.051,0.131,Unknown,166,25,191,0.13089,True


2023-01-14 01:14:16,873 - malid.external.model_evaluation - INFO - Removing ('rf_multiclass', 0) because fold 0 is incomplete.
2023-01-14 01:14:16,875 - malid.external.model_evaluation - INFO - Removing ('dummy_stratified', 0) because fold 0 is incomplete.
2023-01-14 01:14:16,877 - malid.external.model_evaluation - INFO - Removing ('dummy_most_frequent', 0) because fold 0 is incomplete.
2023-01-14 01:14:16,878 - malid.external.model_evaluation - INFO - Removing ('xgboost', 0) because fold 0 is incomplete.


## GeneLocus.BCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.951 +/- 0.063 (in 2 folds),0.951 +/- 0.063 (in 2 folds),0.906 +/- 0.100 (in 2 folds),0.906 +/- 0.100 (in 2 folds),0.835 +/- 0.102 (in 2 folds),0.614 +/- 0.166 (in 2 folds),0.824,0.584,0.672 +/- 0.001 (in 2 folds),0.411 +/- 0.017 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.672,0.41,0.184,Unknown,102,23,125,0.184,False
lasso_multiclass,0.948 +/- 0.061 (in 2 folds),0.948 +/- 0.061 (in 2 folds),0.891 +/- 0.081 (in 2 folds),0.891 +/- 0.081 (in 2 folds),0.843 +/- 0.090 (in 2 folds),0.626 +/- 0.149 (in 2 folds),0.833,0.599,0.679 +/- 0.010 (in 2 folds),0.420 +/- 0.005 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.68,0.419,0.184,Unknown,102,23,125,0.184,False
rf_multiclass,0.926 +/- 0.102 (in 2 folds),0.926 +/- 0.102 (in 2 folds),0.905 +/- 0.101 (in 2 folds),0.905 +/- 0.101 (in 2 folds),0.946 +/- 0.043 (in 2 folds),0.829 +/- 0.119 (in 2 folds),0.941,0.797,0.765 +/- 0.058 (in 2 folds),0.491 +/- 0.009 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.768,0.475,0.184,Unknown,102,23,125,0.184,False
xgboost,0.922 +/- 0.095 (in 2 folds),0.922 +/- 0.095 (in 2 folds),0.883 +/- 0.068 (in 2 folds),0.883 +/- 0.068 (in 2 folds),0.940 +/- 0.013 (in 2 folds),0.791 +/- 0.084 (in 2 folds),0.941,0.8,0.763 +/- 0.103 (in 2 folds),0.496 +/- 0.148 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.768,0.493,0.184,Unknown,102,23,125,0.184,False
lasso_cv,0.745 +/- 0.347 (in 2 folds),0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.878 +/- 0.139 (in 2 folds),0.457 +/- 0.646 (in 2 folds),0.863,0.468,0.705 +/- 0.027 (in 2 folds),0.223 +/- 0.369 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.704,0.216,0.184,Unknown,102,23,125,0.184,False
elasticnet_cv,0.745 +/- 0.347 (in 2 folds),0.745 +/- 0.347 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.584 +/- 0.515 (in 2 folds),0.820 +/- 0.057 (in 2 folds),0.000 +/- 0.000 (in 2 folds),0.814,0.0,0.662 +/- 0.034 (in 2 folds),-0.001 +/- 0.053 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.664,-0.006,0.184,Unknown,102,23,125,0.184,True
ridge_cv,0.500 +/- 0.000 (in 2 folds),0.500 +/- 0.000 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.820 +/- 0.057 (in 2 folds),0.000 +/- 0.000 (in 2 folds),0.814,0.0,0.662 +/- 0.034 (in 2 folds),-0.001 +/- 0.053 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.664,-0.006,0.184,Unknown,102,23,125,0.184,True
dummy_most_frequent,0.500 +/- 0.000 (in 2 folds),0.500 +/- 0.000 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.180 +/- 0.057 (in 2 folds),0.820 +/- 0.057 (in 2 folds),0.000 +/- 0.000 (in 2 folds),0.814,0.0,0.662 +/- 0.034 (in 2 folds),-0.001 +/- 0.053 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.664,-0.006,0.184,Unknown,102,23,125,0.184,True
dummy_stratified,0.486 +/- 0.003 (in 2 folds),0.486 +/- 0.003 (in 2 folds),0.177 +/- 0.056 (in 2 folds),0.177 +/- 0.056 (in 2 folds),0.716 +/- 0.006 (in 2 folds),-0.032 +/- 0.017 (in 2 folds),0.716,-0.044,0.581 +/- 0.065 (in 2 folds),-0.021 +/- 0.043 (in 2 folds),0.189 +/- 0.098 (in 2 folds),0.584,-0.03,0.184,Unknown,102,23,125,0.184,False


2023-01-14 01:14:18,260 - malid.external.model_evaluation - INFO - Removing ('rf_multiclass', 0) because fold 0 is incomplete.
2023-01-14 01:14:18,261 - malid.external.model_evaluation - INFO - Removing ('dummy_stratified', 0) because fold 0 is incomplete.
2023-01-14 01:14:18,262 - malid.external.model_evaluation - INFO - Removing ('dummy_most_frequent', 0) because fold 0 is incomplete.
2023-01-14 01:14:18,263 - malid.external.model_evaluation - INFO - Removing ('xgboost', 0) because fold 0 is incomplete.


## GeneLocus.BCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR/sex_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR/sex_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.560 +/- 0.101 (in 3 folds),0.560 +/- 0.101 (in 3 folds),0.565 +/- 0.138 (in 3 folds),0.565 +/- 0.138 (in 3 folds),0.498 +/- 0.050 (in 3 folds),0.041 +/- 0.173 (in 3 folds),0.495,-0.013,0.483 +/- 0.032 (in 3 folds),0.026 +/- 0.147 (in 3 folds),...,0.534 +/- 0.000 (in 1 folds),0.482,-0.013,0.026,Unknown,186,5,191,0.026178,False
linearsvm_ovr,0.514 +/- 0.049 (in 3 folds),0.514 +/- 0.049 (in 3 folds),0.525 +/- 0.120 (in 3 folds),0.525 +/- 0.120 (in 3 folds),0.505 +/- 0.016 (in 3 folds),0.023 +/- 0.025 (in 3 folds),0.505,0.013,0.491 +/- 0.025 (in 3 folds),0.022 +/- 0.025 (in 3 folds),...,0.421 +/- 0.000 (in 1 folds),0.492,0.012,0.026,Unknown,186,5,191,0.026178,False
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.487 +/- 0.059 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.489,-0.032,0.474 +/- 0.067 (in 3 folds),-0.023 +/- 0.029 (in 3 folds),...,0.463 +/- 0.000 (in 1 folds),0.476,-0.031,0.026,Unknown,186,5,191,0.026178,False
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.487 +/- 0.059 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.489,-0.032,0.474 +/- 0.067 (in 3 folds),-0.023 +/- 0.029 (in 3 folds),...,0.463 +/- 0.000 (in 1 folds),0.476,-0.031,0.026,Unknown,186,5,191,0.026178,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.487 +/- 0.059 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.489,-0.032,0.474 +/- 0.067 (in 3 folds),-0.023 +/- 0.029 (in 3 folds),...,0.463 +/- 0.000 (in 1 folds),0.476,-0.031,0.026,Unknown,186,5,191,0.026178,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.488 +/- 0.059 (in 3 folds),0.487 +/- 0.059 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.489,-0.032,0.474 +/- 0.067 (in 3 folds),-0.023 +/- 0.029 (in 3 folds),...,0.463 +/- 0.000 (in 1 folds),0.476,-0.031,0.026,Unknown,186,5,191,0.026178,False
lasso_multiclass,0.496 +/- 0.059 (in 3 folds),0.496 +/- 0.059 (in 3 folds),0.514 +/- 0.133 (in 3 folds),0.514 +/- 0.133 (in 3 folds),0.505 +/- 0.016 (in 3 folds),0.022 +/- 0.027 (in 3 folds),0.505,0.012,0.491 +/- 0.025 (in 3 folds),0.021 +/- 0.027 (in 3 folds),...,0.412 +/- 0.000 (in 1 folds),0.492,0.011,0.026,Unknown,186,5,191,0.026178,False
xgboost,0.494 +/- 0.107 (in 3 folds),0.494 +/- 0.107 (in 3 folds),0.519 +/- 0.153 (in 3 folds),0.519 +/- 0.153 (in 3 folds),0.481 +/- 0.033 (in 3 folds),-0.013 +/- 0.101 (in 3 folds),0.478,-0.046,0.467 +/- 0.015 (in 3 folds),-0.019 +/- 0.090 (in 3 folds),...,0.462 +/- 0.000 (in 1 folds),0.466,-0.044,0.026,Unknown,186,5,191,0.026178,False
dummy_stratified,0.491 +/- 0.127 (in 3 folds),0.491 +/- 0.127 (in 3 folds),0.498 +/- 0.134 (in 3 folds),0.498 +/- 0.134 (in 3 folds),0.482 +/- 0.113 (in 3 folds),-0.008 +/- 0.272 (in 3 folds),0.473,-0.056,0.465 +/- 0.090 (in 3 folds),-0.023 +/- 0.244 (in 3 folds),...,0.425 +/- 0.000 (in 1 folds),0.461,-0.054,0.026,Unknown,186,5,191,0.026178,False




GeneLocus.TCR


## GeneLocus.TCR, TargetObsColumnEnum.disease, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.956 +/- 0.001 (in 3 folds),0.960 +/- 0.004 (in 3 folds),0.935 +/- 0.002 (in 3 folds),0.943 +/- 0.008 (in 3 folds),0.785 +/- 0.032 (in 3 folds),0.681 +/- 0.048 (in 3 folds),0.785,0.679,0.783 +/- 0.029 (in 3 folds),0.679 +/- 0.044 (in 3 folds),...,0.939 +/- 0.003 (in 2 folds),0.783,0.677,0.002,Unknown,413,1,414,0.002415,False
elasticnet_cv,0.952 +/- 0.001 (in 3 folds),0.958 +/- 0.004 (in 3 folds),0.936 +/- 0.003 (in 3 folds),0.944 +/- 0.008 (in 3 folds),0.797 +/- 0.019 (in 3 folds),0.701 +/- 0.028 (in 3 folds),0.797,0.699,0.795 +/- 0.016 (in 3 folds),0.699 +/- 0.024 (in 3 folds),...,0.941 +/- 0.007 (in 2 folds),0.795,0.697,0.002,Unknown,413,1,414,0.002415,False
lasso_multiclass,0.949 +/- 0.008 (in 3 folds),0.953 +/- 0.013 (in 3 folds),0.942 +/- 0.009 (in 3 folds),0.947 +/- 0.014 (in 3 folds),0.828 +/- 0.034 (in 3 folds),0.759 +/- 0.042 (in 3 folds),0.828,0.757,0.826 +/- 0.036 (in 3 folds),0.757 +/- 0.045 (in 3 folds),...,0.941 +/- 0.011 (in 2 folds),0.826,0.755,0.002,Unknown,413,1,414,0.002415,False
lasso_cv,0.947 +/- 0.008 (in 3 folds),0.951 +/- 0.013 (in 3 folds),0.934 +/- 0.011 (in 3 folds),0.941 +/- 0.015 (in 3 folds),0.772 +/- 0.040 (in 3 folds),0.664 +/- 0.066 (in 3 folds),0.772,0.661,0.770 +/- 0.037 (in 3 folds),0.662 +/- 0.063 (in 3 folds),...,0.935 +/- 0.015 (in 2 folds),0.771,0.659,0.002,Unknown,413,1,414,0.002415,False
rf_multiclass,0.947 +/- 0.006 (in 3 folds),0.951 +/- 0.006 (in 3 folds),0.939 +/- 0.007 (in 3 folds),0.945 +/- 0.004 (in 3 folds),0.775 +/- 0.033 (in 3 folds),0.669 +/- 0.055 (in 3 folds),0.775,0.667,0.773 +/- 0.035 (in 3 folds),0.667 +/- 0.056 (in 3 folds),...,0.944 +/- 0.006 (in 2 folds),0.773,0.665,0.002,Unknown,413,1,414,0.002415,False
xgboost,0.944 +/- 0.009 (in 3 folds),0.944 +/- 0.014 (in 3 folds),0.940 +/- 0.010 (in 3 folds),0.942 +/- 0.017 (in 3 folds),0.775 +/- 0.028 (in 3 folds),0.672 +/- 0.048 (in 3 folds),0.775,0.669,0.773 +/- 0.029 (in 3 folds),0.670 +/- 0.048 (in 3 folds),...,0.936 +/- 0.018 (in 2 folds),0.773,0.667,0.002,Unknown,413,1,414,0.002415,False
linearsvm_ovr,0.944 +/- 0.001 (in 3 folds),0.947 +/- 0.005 (in 3 folds),0.941 +/- 0.005 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.819 +/- 0.030 (in 3 folds),0.741 +/- 0.038 (in 3 folds),0.818,0.739,0.817 +/- 0.032 (in 3 folds),0.738 +/- 0.041 (in 3 folds),...,0.941 +/- 0.007 (in 2 folds),0.816,0.736,0.002,Unknown,413,1,414,0.002415,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.470 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.47,0.0,0.469 +/- 0.002 (in 3 folds),0.011 +/- 0.020 (in 3 folds),...,0.500 +/- 0.000 (in 2 folds),0.469,0.02,0.002,Unknown,413,1,414,0.002415,True
dummy_stratified,0.494 +/- 0.024 (in 3 folds),0.491 +/- 0.027 (in 3 folds),0.504 +/- 0.009 (in 3 folds),0.504 +/- 0.010 (in 3 folds),0.332 +/- 0.031 (in 3 folds),-0.005 +/- 0.047 (in 3 folds),0.332,-0.006,0.331 +/- 0.032 (in 3 folds),-0.005 +/- 0.046 (in 3 folds),...,0.506 +/- 0.012 (in 2 folds),0.331,-0.005,0.002,Unknown,413,1,414,0.002415,False




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.956 +/- 0.003 (in 3 folds),0.963 +/- 0.001 (in 3 folds),0.945 +/- 0.006 (in 3 folds),0.955 +/- 0.003 (in 3 folds),0.809 +/- 0.011 (in 3 folds),0.720 +/- 0.010 (in 3 folds),0.809,0.719,0.804 +/- 0.011 (in 3 folds),0.715 +/- 0.005 (in 3 folds),...,0.955 +/- 0.005 (in 2 folds),0.804,0.713,0.006,Unknown,356,2,358,0.005587,False
elasticnet_cv,0.955 +/- 0.003 (in 3 folds),0.962 +/- 0.003 (in 3 folds),0.939 +/- 0.004 (in 3 folds),0.951 +/- 0.003 (in 3 folds),0.803 +/- 0.024 (in 3 folds),0.711 +/- 0.038 (in 3 folds),0.803,0.709,0.799 +/- 0.016 (in 3 folds),0.705 +/- 0.028 (in 3 folds),...,0.952 +/- 0.003 (in 2 folds),0.799,0.703,0.006,Unknown,356,2,358,0.005587,False
ridge_cv,0.954 +/- 0.005 (in 3 folds),0.961 +/- 0.005 (in 3 folds),0.937 +/- 0.005 (in 3 folds),0.949 +/- 0.005 (in 3 folds),0.803 +/- 0.035 (in 3 folds),0.710 +/- 0.043 (in 3 folds),0.803,0.709,0.799 +/- 0.032 (in 3 folds),0.705 +/- 0.039 (in 3 folds),...,0.952 +/- 0.001 (in 2 folds),0.799,0.704,0.006,Unknown,356,2,358,0.005587,False
lasso_multiclass,0.953 +/- 0.003 (in 3 folds),0.962 +/- 0.003 (in 3 folds),0.945 +/- 0.004 (in 3 folds),0.957 +/- 0.003 (in 3 folds),0.803 +/- 0.031 (in 3 folds),0.726 +/- 0.035 (in 3 folds),0.803,0.723,0.799 +/- 0.032 (in 3 folds),0.721 +/- 0.033 (in 3 folds),...,0.955 +/- 0.001 (in 2 folds),0.799,0.717,0.006,Unknown,356,2,358,0.005587,False
linearsvm_ovr,0.948 +/- 0.004 (in 3 folds),0.955 +/- 0.004 (in 3 folds),0.947 +/- 0.007 (in 3 folds),0.956 +/- 0.005 (in 3 folds),0.786 +/- 0.026 (in 3 folds),0.695 +/- 0.024 (in 3 folds),0.787,0.692,0.782 +/- 0.031 (in 3 folds),0.690 +/- 0.029 (in 3 folds),...,0.956 +/- 0.007 (in 2 folds),0.782,0.687,0.006,Unknown,356,2,358,0.005587,False
rf_multiclass,0.945 +/- 0.003 (in 3 folds),0.950 +/- 0.002 (in 3 folds),0.942 +/- 0.004 (in 3 folds),0.948 +/- 0.002 (in 3 folds),0.803 +/- 0.023 (in 3 folds),0.713 +/- 0.033 (in 3 folds),0.803,0.711,0.799 +/- 0.030 (in 3 folds),0.708 +/- 0.041 (in 3 folds),...,0.949 +/- 0.001 (in 2 folds),0.799,0.706,0.006,Unknown,356,2,358,0.005587,False
xgboost,0.942 +/- 0.002 (in 3 folds),0.943 +/- 0.002 (in 3 folds),0.940 +/- 0.001 (in 3 folds),0.943 +/- 0.001 (in 3 folds),0.778 +/- 0.012 (in 3 folds),0.678 +/- 0.006 (in 3 folds),0.778,0.675,0.774 +/- 0.017 (in 3 folds),0.674 +/- 0.015 (in 3 folds),...,0.943 +/- 0.001 (in 2 folds),0.774,0.67,0.006,Unknown,356,2,358,0.005587,False
dummy_stratified,0.536 +/- 0.004 (in 3 folds),0.530 +/- 0.015 (in 3 folds),0.524 +/- 0.004 (in 3 folds),0.523 +/- 0.008 (in 3 folds),0.385 +/- 0.018 (in 3 folds),0.078 +/- 0.011 (in 3 folds),0.385,0.078,0.383 +/- 0.016 (in 3 folds),0.078 +/- 0.012 (in 3 folds),...,0.523 +/- 0.011 (in 2 folds),0.383,0.078,0.006,Unknown,356,2,358,0.005587,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,0.461 +/- 0.034 (in 3 folds),0.017 +/- 0.030 (in 3 folds),...,0.500 +/- 0.000 (in 2 folds),0.461,0.03,0.006,Unknown,356,2,358,0.005587,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.958 +/- 0.002 (in 3 folds),0.965 +/- 0.001 (in 3 folds),0.943 +/- 0.002 (in 3 folds),0.954 +/- 0.002 (in 3 folds),0.823 +/- 0.002 (in 3 folds),0.744 +/- 0.016 (in 3 folds),0.823,0.743,0.818 +/- 0.009 (in 3 folds),0.738 +/- 0.022 (in 3 folds),...,0.955 +/- 0.000 (in 2 folds),0.818,0.737,0.006,Unknown,356,2,358,0.005587,False
ridge_cv,0.957 +/- 0.007 (in 3 folds),0.963 +/- 0.011 (in 3 folds),0.943 +/- 0.008 (in 3 folds),0.951 +/- 0.014 (in 3 folds),0.806 +/- 0.013 (in 3 folds),0.716 +/- 0.032 (in 3 folds),0.806,0.716,0.802 +/- 0.011 (in 3 folds),0.711 +/- 0.031 (in 3 folds),...,0.949 +/- 0.019 (in 2 folds),0.802,0.71,0.006,Unknown,356,2,358,0.005587,False
lasso_cv,0.955 +/- 0.004 (in 3 folds),0.961 +/- 0.007 (in 3 folds),0.941 +/- 0.007 (in 3 folds),0.951 +/- 0.009 (in 3 folds),0.798 +/- 0.048 (in 3 folds),0.703 +/- 0.078 (in 3 folds),0.798,0.703,0.794 +/- 0.054 (in 3 folds),0.698 +/- 0.086 (in 3 folds),...,0.956 +/- 0.002 (in 2 folds),0.793,0.697,0.006,Unknown,356,2,358,0.005587,False
rf_multiclass,0.953 +/- 0.009 (in 3 folds),0.958 +/- 0.008 (in 3 folds),0.946 +/- 0.009 (in 3 folds),0.954 +/- 0.007 (in 3 folds),0.806 +/- 0.007 (in 3 folds),0.717 +/- 0.028 (in 3 folds),0.806,0.715,0.802 +/- 0.011 (in 3 folds),0.712 +/- 0.031 (in 3 folds),...,0.950 +/- 0.003 (in 2 folds),0.802,0.71,0.006,Unknown,356,2,358,0.005587,False
xgboost,0.944 +/- 0.002 (in 3 folds),0.946 +/- 0.002 (in 3 folds),0.944 +/- 0.007 (in 3 folds),0.948 +/- 0.009 (in 3 folds),0.778 +/- 0.015 (in 3 folds),0.674 +/- 0.006 (in 3 folds),0.778,0.672,0.774 +/- 0.016 (in 3 folds),0.669 +/- 0.011 (in 3 folds),...,0.953 +/- 0.003 (in 2 folds),0.774,0.667,0.006,Unknown,356,2,358,0.005587,False
lasso_multiclass,0.930 +/- 0.024 (in 3 folds),0.935 +/- 0.027 (in 3 folds),0.921 +/- 0.024 (in 3 folds),0.929 +/- 0.027 (in 3 folds),0.809 +/- 0.019 (in 3 folds),0.734 +/- 0.027 (in 3 folds),0.809,0.733,0.805 +/- 0.026 (in 3 folds),0.728 +/- 0.037 (in 3 folds),...,0.944 +/- 0.016 (in 2 folds),0.804,0.727,0.006,Unknown,356,2,358,0.005587,False
linearsvm_ovr,0.888 +/- 0.031 (in 3 folds),0.890 +/- 0.027 (in 3 folds),0.890 +/- 0.018 (in 3 folds),0.898 +/- 0.010 (in 3 folds),0.756 +/- 0.037 (in 3 folds),0.644 +/- 0.074 (in 3 folds),0.756,0.643,0.752 +/- 0.039 (in 3 folds),0.639 +/- 0.075 (in 3 folds),...,0.898 +/- 0.014 (in 2 folds),0.751,0.638,0.006,Unknown,356,2,358,0.005587,False
dummy_stratified,0.536 +/- 0.004 (in 3 folds),0.530 +/- 0.015 (in 3 folds),0.524 +/- 0.004 (in 3 folds),0.523 +/- 0.008 (in 3 folds),0.385 +/- 0.018 (in 3 folds),0.078 +/- 0.011 (in 3 folds),0.385,0.078,0.383 +/- 0.016 (in 3 folds),0.078 +/- 0.012 (in 3 folds),...,0.523 +/- 0.011 (in 2 folds),0.383,0.078,0.006,Unknown,356,2,358,0.005587,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,0.461 +/- 0.034 (in 3 folds),0.017 +/- 0.030 (in 3 folds),...,0.500 +/- 0.000 (in 2 folds),0.461,0.03,0.006,Unknown,356,2,358,0.005587,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.922 +/- 0.017 (in 3 folds),0.924 +/- 0.021 (in 3 folds),0.916 +/- 0.026 (in 3 folds),0.917 +/- 0.034 (in 3 folds),0.730 +/- 0.027 (in 3 folds),0.604 +/- 0.027 (in 3 folds),0.73,0.597,0.726 +/- 0.029 (in 3 folds),0.599 +/- 0.027 (in 3 folds),...,0.936 +/- 0.003 (in 2 folds),0.726,0.593,0.006,Unknown,356,2,358,0.005587,False
xgboost,0.902 +/- 0.003 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.902 +/- 0.006 (in 3 folds),0.902 +/- 0.015 (in 3 folds),0.724 +/- 0.037 (in 3 folds),0.595 +/- 0.039 (in 3 folds),0.725,0.589,0.721 +/- 0.040 (in 3 folds),0.590 +/- 0.043 (in 3 folds),...,0.908 +/- 0.013 (in 2 folds),0.721,0.584,0.006,Unknown,356,2,358,0.005587,False
lasso_multiclass,0.870 +/- 0.034 (in 3 folds),0.875 +/- 0.048 (in 3 folds),0.863 +/- 0.044 (in 3 folds),0.870 +/- 0.057 (in 3 folds),0.688 +/- 0.014 (in 3 folds),0.559 +/- 0.024 (in 3 folds),0.688,0.556,0.684 +/- 0.016 (in 3 folds),0.555 +/- 0.024 (in 3 folds),...,0.902 +/- 0.003 (in 2 folds),0.684,0.551,0.006,Unknown,356,2,358,0.005587,False
linearsvm_ovr,0.859 +/- 0.024 (in 3 folds),0.864 +/- 0.036 (in 3 folds),0.860 +/- 0.029 (in 3 folds),0.866 +/- 0.043 (in 3 folds),0.691 +/- 0.033 (in 3 folds),0.561 +/- 0.040 (in 3 folds),0.691,0.556,0.687 +/- 0.040 (in 3 folds),0.557 +/- 0.046 (in 3 folds),...,0.890 +/- 0.013 (in 2 folds),0.687,0.552,0.006,Unknown,356,2,358,0.005587,False
ridge_cv,0.835 +/- 0.014 (in 3 folds),0.844 +/- 0.024 (in 3 folds),0.840 +/- 0.032 (in 3 folds),0.850 +/- 0.043 (in 3 folds),0.637 +/- 0.090 (in 3 folds),0.452 +/- 0.173 (in 3 folds),0.638,0.462,0.634 +/- 0.095 (in 3 folds),0.452 +/- 0.173 (in 3 folds),...,0.875 +/- 0.007 (in 2 folds),0.634,0.459,0.006,Unknown,356,2,358,0.005587,False
lasso_cv,0.832 +/- 0.010 (in 3 folds),0.841 +/- 0.019 (in 3 folds),0.844 +/- 0.033 (in 3 folds),0.854 +/- 0.045 (in 3 folds),0.691 +/- 0.026 (in 3 folds),0.546 +/- 0.041 (in 3 folds),0.691,0.544,0.687 +/- 0.032 (in 3 folds),0.543 +/- 0.047 (in 3 folds),...,0.880 +/- 0.008 (in 2 folds),0.687,0.541,0.006,Unknown,356,2,358,0.005587,False
elasticnet_cv,0.823 +/- 0.026 (in 3 folds),0.831 +/- 0.036 (in 3 folds),0.826 +/- 0.060 (in 3 folds),0.839 +/- 0.067 (in 3 folds),0.694 +/- 0.007 (in 3 folds),0.551 +/- 0.018 (in 3 folds),0.694,0.546,0.690 +/- 0.013 (in 3 folds),0.547 +/- 0.022 (in 3 folds),...,0.877 +/- 0.011 (in 2 folds),0.69,0.543,0.006,Unknown,356,2,358,0.005587,False
dummy_stratified,0.536 +/- 0.004 (in 3 folds),0.530 +/- 0.015 (in 3 folds),0.524 +/- 0.004 (in 3 folds),0.523 +/- 0.008 (in 3 folds),0.385 +/- 0.018 (in 3 folds),0.078 +/- 0.011 (in 3 folds),0.385,0.078,0.383 +/- 0.016 (in 3 folds),0.078 +/- 0.012 (in 3 folds),...,0.523 +/- 0.011 (in 2 folds),0.383,0.078,0.006,Unknown,356,2,358,0.005587,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.463 +/- 0.035 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.463,0.0,0.461 +/- 0.034 (in 3 folds),0.017 +/- 0.030 (in 3 folds),...,0.500 +/- 0.000 (in 2 folds),0.461,0.03,0.006,Unknown,356,2,358,0.005587,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f6173f1a220>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.872 +/- 0.037 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.865 +/- 0.032 (in 3 folds),0.626 +/- 0.043 (in 3 folds),0.433 +/- 0.065 (in 3 folds),0.626,0.429,358,0,358,0.0,False
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.870 +/- 0.037 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.869 +/- 0.030 (in 3 folds),0.692 +/- 0.059 (in 3 folds),0.554 +/- 0.083 (in 3 folds),0.693,0.552,358,0,358,0.0,False
lasso_multiclass,0.856 +/- 0.026 (in 3 folds),0.873 +/- 0.029 (in 3 folds),0.850 +/- 0.012 (in 3 folds),0.870 +/- 0.020 (in 3 folds),0.651 +/- 0.037 (in 3 folds),0.527 +/- 0.031 (in 3 folds),0.651,0.525,358,0,358,0.0,False
rf_multiclass,0.853 +/- 0.035 (in 3 folds),0.869 +/- 0.037 (in 3 folds),0.843 +/- 0.037 (in 3 folds),0.862 +/- 0.036 (in 3 folds),0.670 +/- 0.026 (in 3 folds),0.508 +/- 0.039 (in 3 folds),0.67,0.508,358,0,358,0.0,False
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.868 +/- 0.029 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.866 +/- 0.020 (in 3 folds),0.656 +/- 0.033 (in 3 folds),0.522 +/- 0.034 (in 3 folds),0.656,0.522,358,0,358,0.0,False
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.860 +/- 0.038 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.861 +/- 0.030 (in 3 folds),0.653 +/- 0.055 (in 3 folds),0.489 +/- 0.072 (in 3 folds),0.654,0.485,358,0,358,0.0,False
xgboost,0.843 +/- 0.049 (in 3 folds),0.860 +/- 0.047 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.867 +/- 0.038 (in 3 folds),0.662 +/- 0.048 (in 3 folds),0.497 +/- 0.068 (in 3 folds),0.662,0.496,358,0,358,0.0,False
dummy_stratified,0.530 +/- 0.013 (in 3 folds),0.526 +/- 0.016 (in 3 folds),0.522 +/- 0.007 (in 3 folds),0.523 +/- 0.009 (in 3 folds),0.374 +/- 0.017 (in 3 folds),0.065 +/- 0.025 (in 3 folds),0.374,0.064,358,0,358,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f6173fbea60>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.699 +/- 0.040 (in 3 folds),0.719 +/- 0.041 (in 3 folds),0.682 +/- 0.040 (in 3 folds),0.702 +/- 0.041 (in 3 folds),0.464 +/- 0.034 (in 3 folds),0.255 +/- 0.050 (in 3 folds),0.464,0.251,358,0,358,0.0,False
xgboost,0.697 +/- 0.032 (in 3 folds),0.715 +/- 0.033 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.710 +/- 0.026 (in 3 folds),0.466 +/- 0.037 (in 3 folds),0.200 +/- 0.035 (in 3 folds),0.466,0.199,358,0,358,0.0,False
lasso_multiclass,0.682 +/- 0.067 (in 3 folds),0.707 +/- 0.069 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.715 +/- 0.065 (in 3 folds),0.338 +/- 0.013 (in 3 folds),0.199 +/- 0.060 (in 3 folds),0.338,0.193,358,0,358,0.0,False
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.681 +/- 0.033 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.700 +/- 0.034 (in 3 folds),0.441 +/- 0.045 (in 3 folds),0.145 +/- 0.063 (in 3 folds),0.441,0.144,358,0,358,0.0,True
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.676 +/- 0.011 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.699 +/- 0.029 (in 3 folds),0.472 +/- 0.010 (in 3 folds),0.092 +/- 0.093 (in 3 folds),0.472,0.11,358,0,358,0.0,True
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.665 +/- 0.045 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.692 +/- 0.052 (in 3 folds),0.472 +/- 0.010 (in 3 folds),0.092 +/- 0.093 (in 3 folds),0.472,0.11,358,0,358,0.0,True
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.657 +/- 0.045 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.681 +/- 0.051 (in 3 folds),0.480 +/- 0.024 (in 3 folds),0.109 +/- 0.097 (in 3 folds),0.48,0.133,358,0,358,0.0,True
dummy_stratified,0.530 +/- 0.013 (in 3 folds),0.526 +/- 0.016 (in 3 folds),0.522 +/- 0.007 (in 3 folds),0.523 +/- 0.009 (in 3 folds),0.374 +/- 0.017 (in 3 folds),0.065 +/- 0.025 (in 3 folds),0.374,0.064,358,0,358,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f6173f64ac0>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.573 +/- 0.026 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.545 +/- 0.013 (in 3 folds),0.332 +/- 0.090 (in 3 folds),0.118 +/- 0.066 (in 3 folds),0.332,0.11,358,0,358,0.0,False
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.560 +/- 0.024 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.537 +/- 0.015 (in 3 folds),0.397 +/- 0.025 (in 3 folds),0.104 +/- 0.091 (in 3 folds),0.397,0.089,358,0,358,0.0,True
xgboost,0.573 +/- 0.019 (in 3 folds),0.560 +/- 0.024 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.537 +/- 0.015 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.556 +/- 0.028 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.538 +/- 0.014 (in 3 folds),0.332 +/- 0.090 (in 3 folds),0.118 +/- 0.066 (in 3 folds),0.332,0.11,358,0,358,0.0,False
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.529 +/- 0.051 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_stratified,0.530 +/- 0.013 (in 3 folds),0.526 +/- 0.016 (in 3 folds),0.522 +/- 0.007 (in 3 folds),0.523 +/- 0.009 (in 3 folds),0.374 +/- 0.017 (in 3 folds),0.065 +/- 0.025 (in 3 folds),0.374,0.064,358,0,358,0.0,False
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.510 +/- 0.017 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.510 +/- 0.017 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f6173f0f910>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.800 +/- 0.032 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.783 +/- 0.026 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
xgboost,0.790 +/- 0.032 (in 3 folds),0.794 +/- 0.036 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.779 +/- 0.025 (in 3 folds),0.664 +/- 0.069 (in 3 folds),0.510 +/- 0.095 (in 3 folds),0.665,0.504,358,0,358,0.0,False
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.794 +/- 0.022 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.778 +/- 0.021 (in 3 folds),0.562 +/- 0.097 (in 3 folds),0.423 +/- 0.096 (in 3 folds),0.561,0.414,358,0,358,0.0,False
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.791 +/- 0.029 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.781 +/- 0.026 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.782 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.772 +/- 0.017 (in 3 folds),0.678 +/- 0.045 (in 3 folds),0.534 +/- 0.054 (in 3 folds),0.679,0.533,358,0,358,0.0,True
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.783 +/- 0.060 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.764 +/- 0.057 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.763 +/- 0.046 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.758 +/- 0.030 (in 3 folds),0.556 +/- 0.087 (in 3 folds),0.421 +/- 0.081 (in 3 folds),0.556,0.406,358,0,358,0.0,False
dummy_stratified,0.530 +/- 0.013 (in 3 folds),0.526 +/- 0.016 (in 3 folds),0.522 +/- 0.007 (in 3 folds),0.523 +/- 0.009 (in 3 folds),0.374 +/- 0.017 (in 3 folds),0.065 +/- 0.025 (in 3 folds),0.374,0.064,358,0,358,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.992 +/- 0.006 (in 3 folds),0.992 +/- 0.006 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.944 +/- 0.018 (in 3 folds),0.851 +/- 0.052 (in 3 folds),0.944,0.851,252,0,252,0.0,False
lasso_multiclass,0.992 +/- 0.005 (in 3 folds),0.992 +/- 0.005 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.944 +/- 0.018 (in 3 folds),0.851 +/- 0.052 (in 3 folds),0.944,0.851,252,0,252,0.0,False
ridge_cv,0.992 +/- 0.004 (in 3 folds),0.992 +/- 0.004 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.913 +/- 0.031 (in 3 folds),0.747 +/- 0.098 (in 3 folds),0.913,0.742,252,0,252,0.0,False
elasticnet_cv,0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.998 +/- 0.001 (in 3 folds),0.932 +/- 0.049 (in 3 folds),0.804 +/- 0.151 (in 3 folds),0.933,0.804,252,0,252,0.0,False
lasso_cv,0.988 +/- 0.009 (in 3 folds),0.988 +/- 0.009 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.936 +/- 0.037 (in 3 folds),0.813 +/- 0.115 (in 3 folds),0.937,0.816,252,0,252,0.0,False
rf_multiclass,0.986 +/- 0.009 (in 3 folds),0.986 +/- 0.009 (in 3 folds),0.996 +/- 0.002 (in 3 folds),0.996 +/- 0.002 (in 3 folds),0.944 +/- 0.014 (in 3 folds),0.843 +/- 0.047 (in 3 folds),0.944,0.843,252,0,252,0.0,False
xgboost,0.981 +/- 0.012 (in 3 folds),0.981 +/- 0.012 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.994 +/- 0.004 (in 3 folds),0.932 +/- 0.019 (in 3 folds),0.812 +/- 0.050 (in 3 folds),0.933,0.811,252,0,252,0.0,False
dummy_stratified,0.503 +/- 0.043 (in 3 folds),0.503 +/- 0.043 (in 3 folds),0.771 +/- 0.011 (in 3 folds),0.771 +/- 0.011 (in 3 folds),0.662 +/- 0.039 (in 3 folds),0.008 +/- 0.092 (in 3 folds),0.663,0.006,252,0,252,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.770 +/- 0.005 (in 3 folds),0.770 +/- 0.005 (in 3 folds),0.770 +/- 0.005 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.77,0.0,252,0,252,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.937 +/- 0.006 (in 3 folds),0.937 +/- 0.006 (in 3 folds),0.973 +/- 0.004 (in 3 folds),0.973 +/- 0.004 (in 3 folds),0.881 +/- 0.040 (in 3 folds),0.749 +/- 0.086 (in 3 folds),0.882,0.749,0.867 +/- 0.056 (in 3 folds),0.726 +/- 0.114 (in 3 folds),...,0.974 +/- 0.006 (in 2 folds),0.866,0.723,0.017,Unknown,287,5,292,0.017123,False
lasso_cv,0.933 +/- 0.011 (in 3 folds),0.933 +/- 0.011 (in 3 folds),0.972 +/- 0.006 (in 3 folds),0.972 +/- 0.006 (in 3 folds),0.822 +/- 0.028 (in 3 folds),0.584 +/- 0.072 (in 3 folds),0.822,0.585,0.808 +/- 0.034 (in 3 folds),0.564 +/- 0.077 (in 3 folds),...,0.972 +/- 0.008 (in 2 folds),0.808,0.563,0.017,Unknown,287,5,292,0.017123,False
lasso_multiclass,0.933 +/- 0.010 (in 3 folds),0.933 +/- 0.010 (in 3 folds),0.973 +/- 0.005 (in 3 folds),0.973 +/- 0.005 (in 3 folds),0.868 +/- 0.043 (in 3 folds),0.721 +/- 0.088 (in 3 folds),0.868,0.718,0.853 +/- 0.057 (in 3 folds),0.700 +/- 0.116 (in 3 folds),...,0.974 +/- 0.007 (in 2 folds),0.853,0.693,0.017,Unknown,287,5,292,0.017123,False
elasticnet_cv,0.929 +/- 0.017 (in 3 folds),0.929 +/- 0.017 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.972 +/- 0.007 (in 3 folds),0.812 +/- 0.028 (in 3 folds),0.567 +/- 0.077 (in 3 folds),0.812,0.56,0.798 +/- 0.034 (in 3 folds),0.548 +/- 0.082 (in 3 folds),...,0.974 +/- 0.008 (in 2 folds),0.798,0.54,0.017,Unknown,287,5,292,0.017123,False
rf_multiclass,0.925 +/- 0.031 (in 3 folds),0.925 +/- 0.031 (in 3 folds),0.966 +/- 0.016 (in 3 folds),0.966 +/- 0.016 (in 3 folds),0.833 +/- 0.050 (in 3 folds),0.638 +/- 0.087 (in 3 folds),0.833,0.627,0.819 +/- 0.058 (in 3 folds),0.618 +/- 0.107 (in 3 folds),...,0.965 +/- 0.022 (in 2 folds),0.818,0.605,0.017,Unknown,287,5,292,0.017123,False
ridge_cv,0.924 +/- 0.025 (in 3 folds),0.924 +/- 0.025 (in 3 folds),0.970 +/- 0.010 (in 3 folds),0.970 +/- 0.010 (in 3 folds),0.819 +/- 0.024 (in 3 folds),0.583 +/- 0.054 (in 3 folds),0.819,0.579,0.805 +/- 0.035 (in 3 folds),0.564 +/- 0.065 (in 3 folds),...,0.973 +/- 0.012 (in 2 folds),0.805,0.558,0.017,Unknown,287,5,292,0.017123,False
xgboost,0.922 +/- 0.032 (in 3 folds),0.922 +/- 0.032 (in 3 folds),0.957 +/- 0.029 (in 3 folds),0.957 +/- 0.029 (in 3 folds),0.850 +/- 0.041 (in 3 folds),0.669 +/- 0.089 (in 3 folds),0.85,0.669,0.836 +/- 0.053 (in 3 folds),0.649 +/- 0.106 (in 3 folds),...,0.972 +/- 0.014 (in 2 folds),0.836,0.645,0.017,Unknown,287,5,292,0.017123,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.669 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.669,0.0,0.658 +/- 0.013 (in 3 folds),0.022 +/- 0.037 (in 3 folds),...,0.665 +/- 0.002 (in 2 folds),0.658,0.037,0.017,Unknown,287,5,292,0.017123,True
dummy_stratified,0.449 +/- 0.024 (in 3 folds),0.449 +/- 0.024 (in 3 folds),0.648 +/- 0.013 (in 3 folds),0.648 +/- 0.013 (in 3 folds),0.533 +/- 0.020 (in 3 folds),-0.109 +/- 0.052 (in 3 folds),0.533,-0.108,0.524 +/- 0.015 (in 3 folds),-0.101 +/- 0.058 (in 3 folds),...,0.642 +/- 0.010 (in 2 folds),0.524,-0.099,0.017,Unknown,287,5,292,0.017123,False




## GeneLocus.TCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.973 +/- 0.012 (in 3 folds),0.973 +/- 0.012 (in 3 folds),0.940 +/- 0.017 (in 3 folds),0.940 +/- 0.017 (in 3 folds),0.905 +/- 0.025 (in 3 folds),0.744 +/- 0.076 (in 3 folds),0.905,0.739,0.887 +/- 0.036 (in 3 folds),0.693 +/- 0.114 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.888,0.693,0.019,Unknown,253,5,258,0.01938,False
lasso_multiclass,0.972 +/- 0.011 (in 3 folds),0.972 +/- 0.011 (in 3 folds),0.939 +/- 0.016 (in 3 folds),0.939 +/- 0.016 (in 3 folds),0.905 +/- 0.019 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.905,0.765,0.888 +/- 0.017 (in 3 folds),0.734 +/- 0.022 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.888,0.731,0.019,Unknown,253,5,258,0.01938,False
lasso_cv,0.971 +/- 0.016 (in 3 folds),0.971 +/- 0.016 (in 3 folds),0.933 +/- 0.030 (in 3 folds),0.933 +/- 0.030 (in 3 folds),0.901 +/- 0.042 (in 3 folds),0.731 +/- 0.129 (in 3 folds),0.901,0.727,0.883 +/- 0.043 (in 3 folds),0.682 +/- 0.140 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.884,0.681,0.019,Unknown,253,5,258,0.01938,False
ridge_cv,0.971 +/- 0.014 (in 3 folds),0.971 +/- 0.014 (in 3 folds),0.932 +/- 0.030 (in 3 folds),0.932 +/- 0.030 (in 3 folds),0.889 +/- 0.044 (in 3 folds),0.689 +/- 0.133 (in 3 folds),0.889,0.695,0.872 +/- 0.054 (in 3 folds),0.638 +/- 0.164 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.872,0.643,0.019,Unknown,253,5,258,0.01938,False
linearsvm_ovr,0.971 +/- 0.012 (in 3 folds),0.971 +/- 0.012 (in 3 folds),0.940 +/- 0.015 (in 3 folds),0.940 +/- 0.015 (in 3 folds),0.898 +/- 0.032 (in 3 folds),0.754 +/- 0.041 (in 3 folds),0.897,0.745,0.880 +/- 0.028 (in 3 folds),0.718 +/- 0.036 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.88,0.712,0.019,Unknown,253,5,258,0.01938,False
rf_multiclass,0.959 +/- 0.023 (in 3 folds),0.959 +/- 0.023 (in 3 folds),0.916 +/- 0.035 (in 3 folds),0.916 +/- 0.035 (in 3 folds),0.913 +/- 0.029 (in 3 folds),0.763 +/- 0.079 (in 3 folds),0.913,0.764,0.895 +/- 0.040 (in 3 folds),0.724 +/- 0.101 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.895,0.723,0.019,Unknown,253,5,258,0.01938,False
xgboost,0.948 +/- 0.034 (in 3 folds),0.948 +/- 0.034 (in 3 folds),0.912 +/- 0.040 (in 3 folds),0.912 +/- 0.040 (in 3 folds),0.917 +/- 0.025 (in 3 folds),0.773 +/- 0.070 (in 3 folds),0.917,0.773,0.899 +/- 0.035 (in 3 folds),0.730 +/- 0.096 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.899,0.73,0.019,Unknown,253,5,258,0.01938,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.253 +/- 0.003 (in 3 folds),0.253 +/- 0.003 (in 3 folds),0.747 +/- 0.003 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.747,0.0,0.733 +/- 0.010 (in 3 folds),-0.039 +/- 0.013 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.733,-0.04,0.019,Unknown,253,5,258,0.01938,True
dummy_stratified,0.462 +/- 0.048 (in 3 folds),0.462 +/- 0.048 (in 3 folds),0.247 +/- 0.011 (in 3 folds),0.247 +/- 0.011 (in 3 folds),0.613 +/- 0.035 (in 3 folds),-0.080 +/- 0.102 (in 3 folds),0.613,-0.081,0.601 +/- 0.027 (in 3 folds),-0.086 +/- 0.094 (in 3 folds),0.019 +/- 0.013 (in 3 folds),0.601,-0.085,0.019,Unknown,253,5,258,0.01938,False




## GeneLocus.TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.715 +/- 0.041 (in 3 folds),0.744 +/- 0.028 (in 3 folds),0.734 +/- 0.017 (in 3 folds),0.759 +/- 0.014 (in 3 folds),0.749 +/- 0.044 (in 3 folds),0.567 +/- 0.043 (in 3 folds),0.75,0.57,0.744 +/- 0.050 (in 3 folds),0.559 +/- 0.055 (in 3 folds),...,0.767 +/- 0.003 (in 2 folds),0.745,0.562,0.006,Unknown,164,1,165,0.006061,True
elasticnet_cv,0.710 +/- 0.014 (in 3 folds),0.733 +/- 0.018 (in 3 folds),0.739 +/- 0.033 (in 3 folds),0.756 +/- 0.050 (in 3 folds),0.743 +/- 0.035 (in 3 folds),0.549 +/- 0.049 (in 3 folds),0.744,0.553,0.739 +/- 0.042 (in 3 folds),0.541 +/- 0.056 (in 3 folds),...,0.777 +/- 0.047 (in 2 folds),0.739,0.545,0.006,Unknown,164,1,165,0.006061,True
lasso_cv,0.699 +/- 0.017 (in 3 folds),0.721 +/- 0.027 (in 3 folds),0.727 +/- 0.041 (in 3 folds),0.738 +/- 0.059 (in 3 folds),0.731 +/- 0.028 (in 3 folds),0.517 +/- 0.041 (in 3 folds),0.732,0.52,0.726 +/- 0.034 (in 3 folds),0.510 +/- 0.042 (in 3 folds),...,0.761 +/- 0.063 (in 2 folds),0.727,0.513,0.006,Unknown,164,1,165,0.006061,True
lasso_multiclass,0.694 +/- 0.033 (in 3 folds),0.722 +/- 0.032 (in 3 folds),0.728 +/- 0.017 (in 3 folds),0.752 +/- 0.019 (in 3 folds),0.466 +/- 0.070 (in 3 folds),0.282 +/- 0.109 (in 3 folds),0.463,0.278,0.463 +/- 0.074 (in 3 folds),0.281 +/- 0.110 (in 3 folds),...,0.763 +/- 0.006 (in 2 folds),0.461,0.276,0.006,Unknown,164,1,165,0.006061,False
linearsvm_ovr,0.687 +/- 0.013 (in 3 folds),0.719 +/- 0.018 (in 3 folds),0.726 +/- 0.012 (in 3 folds),0.751 +/- 0.021 (in 3 folds),0.510 +/- 0.079 (in 3 folds),0.274 +/- 0.072 (in 3 folds),0.506,0.255,0.507 +/- 0.082 (in 3 folds),0.273 +/- 0.074 (in 3 folds),...,0.759 +/- 0.021 (in 2 folds),0.503,0.253,0.006,Unknown,164,1,165,0.006061,False
rf_multiclass,0.668 +/- 0.013 (in 3 folds),0.674 +/- 0.027 (in 3 folds),0.700 +/- 0.014 (in 3 folds),0.692 +/- 0.020 (in 3 folds),0.660 +/- 0.071 (in 3 folds),0.414 +/- 0.122 (in 3 folds),0.659,0.393,0.656 +/- 0.077 (in 3 folds),0.413 +/- 0.123 (in 3 folds),...,0.692 +/- 0.028 (in 2 folds),0.655,0.39,0.006,Unknown,164,1,165,0.006061,True
xgboost,0.630 +/- 0.034 (in 3 folds),0.615 +/- 0.060 (in 3 folds),0.696 +/- 0.015 (in 3 folds),0.680 +/- 0.023 (in 3 folds),0.532 +/- 0.085 (in 3 folds),0.268 +/- 0.104 (in 3 folds),0.53,0.256,0.529 +/- 0.090 (in 3 folds),0.268 +/- 0.104 (in 3 folds),...,0.671 +/- 0.022 (in 2 folds),0.527,0.254,0.006,Unknown,164,1,165,0.006061,False
dummy_stratified,0.502 +/- 0.028 (in 3 folds),0.516 +/- 0.020 (in 3 folds),0.520 +/- 0.016 (in 3 folds),0.530 +/- 0.028 (in 3 folds),0.354 +/- 0.071 (in 3 folds),-0.032 +/- 0.116 (in 3 folds),0.354,-0.044,0.353 +/- 0.074 (in 3 folds),-0.028 +/- 0.108 (in 3 folds),...,0.516 +/- 0.019 (in 2 folds),0.352,-0.04,0.006,Unknown,164,1,165,0.006061,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.587 +/- 0.082 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.591,0.0,0.584 +/- 0.083 (in 3 folds),0.023 +/- 0.039 (in 3 folds),...,0.500 +/- 0.000 (in 2 folds),0.588,0.043,0.006,Unknown,164,1,165,0.006061,True




## GeneLocus.TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.704 +/- 0.060 (in 3 folds),0.683 +/- 0.054 (in 3 folds),0.736 +/- 0.043 (in 3 folds),0.717 +/- 0.041 (in 3 folds),0.495 +/- 0.143 (in 3 folds),0.440 +/- 0.118 (in 3 folds),0.491,0.408,165,0,165,0.0,True
xgboost,0.696 +/- 0.074 (in 3 folds),0.680 +/- 0.082 (in 3 folds),0.721 +/- 0.064 (in 3 folds),0.708 +/- 0.070 (in 3 folds),0.415 +/- 0.081 (in 3 folds),0.300 +/- 0.100 (in 3 folds),0.412,0.289,165,0,165,0.0,True
elasticnet_cv,0.694 +/- 0.043 (in 3 folds),0.674 +/- 0.031 (in 3 folds),0.731 +/- 0.033 (in 3 folds),0.713 +/- 0.027 (in 3 folds),0.442 +/- 0.094 (in 3 folds),0.379 +/- 0.076 (in 3 folds),0.442,0.335,165,0,165,0.0,True
lasso_multiclass,0.689 +/- 0.049 (in 3 folds),0.666 +/- 0.046 (in 3 folds),0.719 +/- 0.027 (in 3 folds),0.700 +/- 0.021 (in 3 folds),0.436 +/- 0.024 (in 3 folds),0.325 +/- 0.031 (in 3 folds),0.436,0.324,165,0,165,0.0,False
ridge_cv,0.673 +/- 0.036 (in 3 folds),0.652 +/- 0.042 (in 3 folds),0.714 +/- 0.025 (in 3 folds),0.696 +/- 0.029 (in 3 folds),0.449 +/- 0.080 (in 3 folds),0.349 +/- 0.102 (in 3 folds),0.448,0.337,165,0,165,0.0,True
rf_multiclass,0.659 +/- 0.030 (in 3 folds),0.640 +/- 0.019 (in 3 folds),0.704 +/- 0.025 (in 3 folds),0.688 +/- 0.026 (in 3 folds),0.427 +/- 0.111 (in 3 folds),0.311 +/- 0.113 (in 3 folds),0.424,0.305,165,0,165,0.0,True
linearsvm_ovr,0.654 +/- 0.030 (in 3 folds),0.633 +/- 0.027 (in 3 folds),0.689 +/- 0.017 (in 3 folds),0.672 +/- 0.008 (in 3 folds),0.372 +/- 0.079 (in 3 folds),0.243 +/- 0.088 (in 3 folds),0.37,0.237,165,0,165,0.0,False
dummy_stratified,0.513 +/- 0.035 (in 3 folds),0.515 +/- 0.030 (in 3 folds),0.528 +/- 0.018 (in 3 folds),0.529 +/- 0.016 (in 3 folds),0.185 +/- 0.075 (in 3 folds),0.023 +/- 0.080 (in 3 folds),0.188,0.027,165,0,165,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.206 +/- 0.013 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.206,0.014,165,0,165,0.0,True




## GeneLocus.TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,ROC-AUC (weighted OvO) per fold with abstention,ROC-AUC (macro OvO) per fold with abstention,...,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.777 +/- 0.049 (in 3 folds),0.777 +/- 0.049 (in 3 folds),0.896 +/- 0.009 (in 3 folds),0.896 +/- 0.009 (in 3 folds),0.701 +/- 0.075 (in 3 folds),0.420 +/- 0.164 (in 3 folds),0.698,0.417,0.735 +/- 0.000 (in 1 folds),0.735 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.685,0.403,0.018,Unknown,162,3,165,0.018182,False
linearsvm_ovr,0.755 +/- 0.055 (in 3 folds),0.755 +/- 0.055 (in 3 folds),0.886 +/- 0.009 (in 3 folds),0.886 +/- 0.009 (in 3 folds),0.689 +/- 0.078 (in 3 folds),0.387 +/- 0.173 (in 3 folds),0.685,0.384,0.721 +/- 0.000 (in 1 folds),0.721 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.673,0.371,0.018,Unknown,162,3,165,0.018182,False
xgboost,0.727 +/- 0.052 (in 3 folds),0.727 +/- 0.052 (in 3 folds),0.851 +/- 0.039 (in 3 folds),0.851 +/- 0.039 (in 3 folds),0.640 +/- 0.042 (in 3 folds),0.139 +/- 0.038 (in 3 folds),0.642,0.147,0.714 +/- 0.000 (in 1 folds),0.714 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.63,0.139,0.018,Unknown,162,3,165,0.018182,False
rf_multiclass,0.725 +/- 0.036 (in 3 folds),0.725 +/- 0.036 (in 3 folds),0.863 +/- 0.017 (in 3 folds),0.863 +/- 0.017 (in 3 folds),0.659 +/- 0.052 (in 3 folds),0.206 +/- 0.122 (in 3 folds),0.66,0.213,0.692 +/- 0.000 (in 1 folds),0.692 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.648,0.203,0.018,Unknown,162,3,165,0.018182,False
elasticnet_cv,0.687 +/- 0.168 (in 3 folds),0.687 +/- 0.168 (in 3 folds),0.817 +/- 0.130 (in 3 folds),0.817 +/- 0.130 (in 3 folds),0.700 +/- 0.048 (in 3 folds),0.227 +/- 0.255 (in 3 folds),0.698,0.284,0.735 +/- 0.000 (in 1 folds),0.735 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.685,0.268,0.018,Unknown,162,3,165,0.018182,False
lasso_cv,0.678 +/- 0.162 (in 3 folds),0.678 +/- 0.162 (in 3 folds),0.810 +/- 0.125 (in 3 folds),0.810 +/- 0.125 (in 3 folds),0.693 +/- 0.036 (in 3 folds),0.176 +/- 0.245 (in 3 folds),0.691,0.256,0.716 +/- 0.000 (in 1 folds),0.716 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.679,0.238,0.018,Unknown,162,3,165,0.018182,False
ridge_cv,0.678 +/- 0.157 (in 3 folds),0.678 +/- 0.157 (in 3 folds),0.813 +/- 0.126 (in 3 folds),0.813 +/- 0.126 (in 3 folds),0.662 +/- 0.035 (in 3 folds),0.111 +/- 0.218 (in 3 folds),0.66,0.171,0.738 +/- 0.000 (in 1 folds),0.738 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.648,0.16,0.018,Unknown,162,3,165,0.018182,False
dummy_stratified,0.527 +/- 0.103 (in 3 folds),0.527 +/- 0.103 (in 3 folds),0.660 +/- 0.093 (in 3 folds),0.660 +/- 0.093 (in 3 folds),0.568 +/- 0.134 (in 3 folds),0.059 +/- 0.212 (in 3 folds),0.574,0.07,0.630 +/- 0.000 (in 1 folds),0.630 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.564,0.067,0.018,Unknown,162,3,165,0.018182,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.646 +/- 0.047 (in 3 folds),0.646 +/- 0.047 (in 3 folds),0.646 +/- 0.047 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.648,0.0,0.500 +/- 0.000 (in 1 folds),0.500 +/- 0.000 (in 1 folds),...,0.027 +/- 0.010 (in 2 folds),0.636,-0.003,0.018,Unknown,162,3,165,0.018182,True




## GeneLocus.TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.994 +/- 0.011 (in 3 folds),0.994 +/- 0.011 (in 3 folds),0.988 +/- 0.020 (in 3 folds),0.988 +/- 0.020 (in 3 folds),0.970 +/- 0.027 (in 3 folds),0.906 +/- 0.081 (in 3 folds),0.969,0.899,0.954 +/- 0.043 (in 3 folds),0.863 +/- 0.119 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.952,0.849,0.018,Unknown,162,3,165,0.018182,False
rf_multiclass,0.986 +/- 0.023 (in 3 folds),0.986 +/- 0.023 (in 3 folds),0.979 +/- 0.037 (in 3 folds),0.979 +/- 0.037 (in 3 folds),0.982 +/- 0.018 (in 3 folds),0.946 +/- 0.049 (in 3 folds),0.981,0.938,0.965 +/- 0.028 (in 3 folds),0.896 +/- 0.058 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.964,0.883,0.018,Unknown,162,3,165,0.018182,False
lasso_cv,0.984 +/- 0.028 (in 3 folds),0.984 +/- 0.028 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.969 +/- 0.027 (in 3 folds),0.906 +/- 0.082 (in 3 folds),0.969,0.895,0.952 +/- 0.034 (in 3 folds),0.856 +/- 0.074 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.952,0.84,0.018,Unknown,162,3,165,0.018182,False
lasso_multiclass,0.984 +/- 0.027 (in 3 folds),0.984 +/- 0.027 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.982 +/- 0.018 (in 3 folds),0.944 +/- 0.050 (in 3 folds),0.981,0.94,0.965 +/- 0.034 (in 3 folds),0.898 +/- 0.090 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.964,0.888,0.018,Unknown,162,3,165,0.018182,False
xgboost,0.982 +/- 0.029 (in 3 folds),0.982 +/- 0.029 (in 3 folds),0.977 +/- 0.031 (in 3 folds),0.977 +/- 0.031 (in 3 folds),0.982 +/- 0.018 (in 3 folds),0.946 +/- 0.049 (in 3 folds),0.981,0.938,0.965 +/- 0.028 (in 3 folds),0.896 +/- 0.058 (in 3 folds),...,0.989 +/- 0.000 (in 1 folds),0.964,0.883,0.018,Unknown,162,3,165,0.018182,False
elasticnet_cv,0.978 +/- 0.037 (in 3 folds),0.978 +/- 0.037 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.981 +/- 0.033 (in 3 folds),0.969 +/- 0.027 (in 3 folds),0.906 +/- 0.082 (in 3 folds),0.969,0.895,0.952 +/- 0.034 (in 3 folds),0.856 +/- 0.074 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.952,0.84,0.018,Unknown,162,3,165,0.018182,False
ridge_cv,0.978 +/- 0.037 (in 3 folds),0.978 +/- 0.037 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.979 +/- 0.036 (in 3 folds),0.976 +/- 0.027 (in 3 folds),0.929 +/- 0.072 (in 3 folds),0.975,0.917,0.959 +/- 0.038 (in 3 folds),0.879 +/- 0.087 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.958,0.862,0.018,Unknown,162,3,165,0.018182,False
dummy_stratified,0.503 +/- 0.075 (in 3 folds),0.503 +/- 0.075 (in 3 folds),0.199 +/- 0.073 (in 3 folds),0.199 +/- 0.073 (in 3 folds),0.709 +/- 0.025 (in 3 folds),0.014 +/- 0.150 (in 3 folds),0.71,0.026,0.696 +/- 0.017 (in 3 folds),0.005 +/- 0.138 (in 3 folds),...,0.197 +/- 0.000 (in 1 folds),0.697,0.018,0.018,Unknown,162,3,165,0.018182,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.184 +/- 0.059 (in 3 folds),0.184 +/- 0.059 (in 3 folds),0.816 +/- 0.059 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.815,0.0,0.802 +/- 0.066 (in 3 folds),-0.026 +/- 0.026 (in 3 folds),...,0.180 +/- 0.000 (in 1 folds),0.8,-0.032,0.018,Unknown,162,3,165,0.018182,True




## GeneLocus.TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/TCR/sex_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/TCR/sex_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.TCR: 2>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_TCRB',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.562 +/- 0.037 (in 3 folds),0.562 +/- 0.037 (in 3 folds),0.646 +/- 0.128 (in 3 folds),0.646 +/- 0.128 (in 3 folds),0.546 +/- 0.055 (in 3 folds),0.122 +/- 0.172 (in 3 folds),0.545,0.082,0.520 +/- 0.080 (in 3 folds),0.128 +/- 0.158 (in 3 folds),...,0.771 +/- 0.000 (in 1 folds),0.515,0.076,0.055,Unknown,156,9,165,0.054545,False
dummy_stratified,0.532 +/- 0.072 (in 3 folds),0.532 +/- 0.072 (in 3 folds),0.578 +/- 0.096 (in 3 folds),0.578 +/- 0.096 (in 3 folds),0.527 +/- 0.054 (in 3 folds),0.062 +/- 0.146 (in 3 folds),0.526,0.036,0.501 +/- 0.078 (in 3 folds),0.073 +/- 0.133 (in 3 folds),...,0.683 +/- 0.000 (in 1 folds),0.497,0.037,0.055,Unknown,156,9,165,0.054545,False
xgboost,0.516 +/- 0.075 (in 3 folds),0.516 +/- 0.075 (in 3 folds),0.603 +/- 0.129 (in 3 folds),0.603 +/- 0.129 (in 3 folds),0.525 +/- 0.052 (in 3 folds),0.079 +/- 0.098 (in 3 folds),0.526,0.036,0.498 +/- 0.056 (in 3 folds),0.087 +/- 0.085 (in 3 folds),...,0.702 +/- 0.000 (in 1 folds),0.497,0.037,0.055,Unknown,156,9,165,0.054545,False
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.447 +/- 0.065 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.449,-0.07,0.422 +/- 0.039 (in 3 folds),-0.023 +/- 0.091 (in 3 folds),...,0.620 +/- 0.000 (in 1 folds),0.424,-0.073,0.055,Unknown,156,9,165,0.054545,False
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.447 +/- 0.065 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.449,-0.07,0.422 +/- 0.039 (in 3 folds),-0.023 +/- 0.091 (in 3 folds),...,0.620 +/- 0.000 (in 1 folds),0.424,-0.073,0.055,Unknown,156,9,165,0.054545,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.447 +/- 0.065 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.449,-0.07,0.422 +/- 0.039 (in 3 folds),-0.023 +/- 0.091 (in 3 folds),...,0.620 +/- 0.000 (in 1 folds),0.424,-0.073,0.055,Unknown,156,9,165,0.054545,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.559 +/- 0.056 (in 3 folds),0.447 +/- 0.065 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.449,-0.07,0.422 +/- 0.039 (in 3 folds),-0.023 +/- 0.091 (in 3 folds),...,0.620 +/- 0.000 (in 1 folds),0.424,-0.073,0.055,Unknown,156,9,165,0.054545,False
lasso_multiclass,0.487 +/- 0.053 (in 3 folds),0.487 +/- 0.053 (in 3 folds),0.545 +/- 0.063 (in 3 folds),0.545 +/- 0.063 (in 3 folds),0.507 +/- 0.080 (in 3 folds),-0.006 +/- 0.148 (in 3 folds),0.506,-0.002,0.481 +/- 0.084 (in 3 folds),-0.003 +/- 0.132 (in 3 folds),...,0.615 +/- 0.000 (in 1 folds),0.479,0.002,0.055,Unknown,156,9,165,0.054545,False
linearsvm_ovr,0.482 +/- 0.043 (in 3 folds),0.482 +/- 0.043 (in 3 folds),0.553 +/- 0.050 (in 3 folds),0.553 +/- 0.050 (in 3 folds),0.520 +/- 0.045 (in 3 folds),0.021 +/- 0.069 (in 3 folds),0.519,0.024,0.494 +/- 0.059 (in 3 folds),0.021 +/- 0.064 (in 3 folds),...,0.610 +/- 0.000 (in 1 folds),0.491,0.026,0.055,Unknown,156,9,165,0.054545,False




In [6]:
# Together in combined metamodel
if len(config.gene_loci_used) > 1:
    print(config.gene_loci_used)
    for target_obs_column in config.classification_targets:
        run_analysis(
            gene_locus=config.gene_loci_used, target_obs_column=target_obs_column
        )

GeneLocus.BCR|TCR


## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.983 +/- 0.005 (in 3 folds),0.985 +/- 0.005 (in 3 folds),0.980 +/- 0.006 (in 3 folds),0.982 +/- 0.005 (in 3 folds),0.894 +/- 0.028 (in 3 folds),0.847 +/- 0.038 (in 3 folds),0.894,0.846,0.879 +/- 0.048 (in 3 folds),0.829 +/- 0.061 (in 3 folds),...,0.987 +/- 0.000 (in 1 folds),0.879,0.826,0.017,Unknown,407,7,414,0.016908,False
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.983 +/- 0.004 (in 3 folds),0.979 +/- 0.006 (in 3 folds),0.981 +/- 0.005 (in 3 folds),0.899 +/- 0.024 (in 3 folds),0.850 +/- 0.036 (in 3 folds),0.899,0.851,0.884 +/- 0.032 (in 3 folds),0.830 +/- 0.046 (in 3 folds),...,0.983 +/- 0.000 (in 1 folds),0.884,0.83,0.017,Unknown,407,7,414,0.016908,False
ridge_cv,0.982 +/- 0.005 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.976 +/- 0.008 (in 3 folds),0.979 +/- 0.006 (in 3 folds),0.892 +/- 0.038 (in 3 folds),0.840 +/- 0.057 (in 3 folds),0.892,0.84,0.877 +/- 0.045 (in 3 folds),0.820 +/- 0.066 (in 3 folds),...,0.985 +/- 0.000 (in 1 folds),0.877,0.819,0.017,Unknown,407,7,414,0.016908,False
rf_multiclass,0.981 +/- 0.013 (in 3 folds),0.981 +/- 0.014 (in 3 folds),0.976 +/- 0.016 (in 3 folds),0.978 +/- 0.015 (in 3 folds),0.901 +/- 0.027 (in 3 folds),0.855 +/- 0.041 (in 3 folds),0.902,0.854,0.886 +/- 0.036 (in 3 folds),0.835 +/- 0.052 (in 3 folds),...,0.988 +/- 0.000 (in 1 folds),0.886,0.833,0.017,Unknown,407,7,414,0.016908,False
linearsvm_ovr,0.980 +/- 0.003 (in 3 folds),0.982 +/- 0.001 (in 3 folds),0.977 +/- 0.005 (in 3 folds),0.980 +/- 0.003 (in 3 folds),0.899 +/- 0.004 (in 3 folds),0.854 +/- 0.004 (in 3 folds),0.899,0.852,0.884 +/- 0.024 (in 3 folds),0.835 +/- 0.029 (in 3 folds),...,0.983 +/- 0.000 (in 1 folds),0.884,0.832,0.017,Unknown,407,7,414,0.016908,False
lasso_cv,0.976 +/- 0.010 (in 3 folds),0.978 +/- 0.009 (in 3 folds),0.975 +/- 0.007 (in 3 folds),0.978 +/- 0.007 (in 3 folds),0.897 +/- 0.028 (in 3 folds),0.847 +/- 0.041 (in 3 folds),0.897,0.847,0.881 +/- 0.034 (in 3 folds),0.827 +/- 0.050 (in 3 folds),...,0.981 +/- 0.000 (in 1 folds),0.882,0.826,0.017,Unknown,407,7,414,0.016908,False
xgboost,0.973 +/- 0.008 (in 3 folds),0.971 +/- 0.009 (in 3 folds),0.971 +/- 0.008 (in 3 folds),0.971 +/- 0.009 (in 3 folds),0.889 +/- 0.036 (in 3 folds),0.839 +/- 0.051 (in 3 folds),0.889,0.837,0.874 +/- 0.054 (in 3 folds),0.820 +/- 0.072 (in 3 folds),...,0.976 +/- 0.000 (in 1 folds),0.874,0.817,0.017,Unknown,407,7,414,0.016908,False
dummy_stratified,0.516 +/- 0.029 (in 3 folds),0.514 +/- 0.026 (in 3 folds),0.516 +/- 0.018 (in 3 folds),0.516 +/- 0.019 (in 3 folds),0.359 +/- 0.042 (in 3 folds),0.034 +/- 0.064 (in 3 folds),0.359,0.034,0.352 +/- 0.038 (in 3 folds),0.035 +/- 0.062 (in 3 folds),...,0.534 +/- 0.000 (in 1 folds),0.353,0.036,0.017,Unknown,407,7,414,0.016908,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.472 +/- 0.004 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.472,0.0,0.464 +/- 0.009 (in 3 folds),0.020 +/- 0.018 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.464,0.021,0.017,Unknown,407,7,414,0.016908,True




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease, metamodel flavor isotype_counts_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease/isotype_counts_only/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease/isotype_counts_only/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'isotype_counts': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f6120801a00>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.707 +/- 0.008 (in 3 folds),0.660 +/- 0.007 (in 3 folds),0.713 +/- 0.017 (in 3 folds),0.673 +/- 0.017 (in 3 folds),0.544 +/- 0.039 (in 3 folds),0.260 +/- 0.080 (in 3 folds),0.543,0.261,414,0,414,0.0,True
lasso_multiclass,0.704 +/- 0.023 (in 3 folds),0.664 +/- 0.019 (in 3 folds),0.697 +/- 0.014 (in 3 folds),0.669 +/- 0.010 (in 3 folds),0.500 +/- 0.046 (in 3 folds),0.273 +/- 0.051 (in 3 folds),0.5,0.268,414,0,414,0.0,False
rf_multiclass,0.702 +/- 0.019 (in 3 folds),0.666 +/- 0.018 (in 3 folds),0.684 +/- 0.017 (in 3 folds),0.661 +/- 0.015 (in 3 folds),0.548 +/- 0.008 (in 3 folds),0.302 +/- 0.021 (in 3 folds),0.548,0.302,414,0,414,0.0,False
elasticnet_cv,0.702 +/- 0.003 (in 3 folds),0.656 +/- 0.003 (in 3 folds),0.703 +/- 0.017 (in 3 folds),0.666 +/- 0.018 (in 3 folds),0.539 +/- 0.043 (in 3 folds),0.248 +/- 0.090 (in 3 folds),0.539,0.252,414,0,414,0.0,True
linearsvm_ovr,0.695 +/- 0.020 (in 3 folds),0.647 +/- 0.015 (in 3 folds),0.686 +/- 0.015 (in 3 folds),0.650 +/- 0.012 (in 3 folds),0.503 +/- 0.046 (in 3 folds),0.241 +/- 0.068 (in 3 folds),0.502,0.234,414,0,414,0.0,False
ridge_cv,0.691 +/- 0.022 (in 3 folds),0.646 +/- 0.023 (in 3 folds),0.683 +/- 0.023 (in 3 folds),0.649 +/- 0.018 (in 3 folds),0.522 +/- 0.059 (in 3 folds),0.212 +/- 0.126 (in 3 folds),0.522,0.217,414,0,414,0.0,True
xgboost,0.667 +/- 0.013 (in 3 folds),0.637 +/- 0.012 (in 3 folds),0.662 +/- 0.017 (in 3 folds),0.643 +/- 0.019 (in 3 folds),0.512 +/- 0.040 (in 3 folds),0.255 +/- 0.068 (in 3 folds),0.512,0.256,414,0,414,0.0,False
dummy_stratified,0.512 +/- 0.029 (in 3 folds),0.514 +/- 0.027 (in 3 folds),0.513 +/- 0.016 (in 3 folds),0.515 +/- 0.016 (in 3 folds),0.348 +/- 0.040 (in 3 folds),0.018 +/- 0.061 (in 3 folds),0.348,0.019,414,0,414,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.469 +/- 0.002 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.469,0.0,414,0,414,0.0,True


       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHA',
       'isotype_counts:isotype_proportion:IGHD-M'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHD-M',
       'isotype_counts:isotype_proportion:IGHG'],
      dtype='object')
       'isotype_counts:isotype_proportion:IGHD-M',
       'isotype_counts:isotype_proportion:IGHG'],
      dtype='object')
       'isotype_

## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.983 +/- 0.009 (in 3 folds),0.986 +/- 0.007 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.985 +/- 0.007 (in 3 folds),0.889 +/- 0.015 (in 3 folds),0.839 +/- 0.018 (in 3 folds),0.889,0.837,0.874 +/- 0.022 (in 3 folds),0.819 +/- 0.028 (in 3 folds),...,0.990 +/- 0.000 (in 1 folds),0.874,0.817,0.017,Unknown,352,6,358,0.01676,False
elasticnet_cv,0.982 +/- 0.005 (in 3 folds),0.985 +/- 0.004 (in 3 folds),0.981 +/- 0.004 (in 3 folds),0.984 +/- 0.004 (in 3 folds),0.906 +/- 0.018 (in 3 folds),0.862 +/- 0.028 (in 3 folds),0.906,0.862,0.891 +/- 0.033 (in 3 folds),0.842 +/- 0.048 (in 3 folds),...,0.988 +/- 0.000 (in 1 folds),0.891,0.841,0.017,Unknown,352,6,358,0.01676,False
lasso_multiclass,0.981 +/- 0.007 (in 3 folds),0.985 +/- 0.006 (in 3 folds),0.981 +/- 0.006 (in 3 folds),0.984 +/- 0.005 (in 3 folds),0.889 +/- 0.024 (in 3 folds),0.845 +/- 0.026 (in 3 folds),0.889,0.842,0.874 +/- 0.038 (in 3 folds),0.825 +/- 0.045 (in 3 folds),...,0.990 +/- 0.000 (in 1 folds),0.874,0.823,0.017,Unknown,352,6,358,0.01676,False
lasso_cv,0.980 +/- 0.006 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.980 +/- 0.007 (in 3 folds),0.983 +/- 0.007 (in 3 folds),0.872 +/- 0.029 (in 3 folds),0.810 +/- 0.049 (in 3 folds),0.872,0.811,0.858 +/- 0.032 (in 3 folds),0.791 +/- 0.054 (in 3 folds),...,0.989 +/- 0.000 (in 1 folds),0.858,0.791,0.017,Unknown,352,6,358,0.01676,False
ridge_cv,0.979 +/- 0.004 (in 3 folds),0.982 +/- 0.003 (in 3 folds),0.976 +/- 0.005 (in 3 folds),0.980 +/- 0.003 (in 3 folds),0.918 +/- 0.012 (in 3 folds),0.881 +/- 0.022 (in 3 folds),0.918,0.879,0.902 +/- 0.012 (in 3 folds),0.860 +/- 0.021 (in 3 folds),...,0.983 +/- 0.000 (in 1 folds),0.902,0.858,0.017,Unknown,352,6,358,0.01676,False
xgboost,0.973 +/- 0.008 (in 3 folds),0.972 +/- 0.010 (in 3 folds),0.973 +/- 0.009 (in 3 folds),0.975 +/- 0.008 (in 3 folds),0.889 +/- 0.001 (in 3 folds),0.839 +/- 0.005 (in 3 folds),0.889,0.837,0.874 +/- 0.016 (in 3 folds),0.819 +/- 0.023 (in 3 folds),...,0.979 +/- 0.000 (in 1 folds),0.874,0.817,0.017,Unknown,352,6,358,0.01676,False
linearsvm_ovr,0.966 +/- 0.011 (in 3 folds),0.970 +/- 0.013 (in 3 folds),0.968 +/- 0.006 (in 3 folds),0.973 +/- 0.007 (in 3 folds),0.878 +/- 0.027 (in 3 folds),0.826 +/- 0.031 (in 3 folds),0.878,0.822,0.863 +/- 0.034 (in 3 folds),0.807 +/- 0.041 (in 3 folds),...,0.968 +/- 0.000 (in 1 folds),0.863,0.802,0.017,Unknown,352,6,358,0.01676,False
dummy_stratified,0.514 +/- 0.021 (in 3 folds),0.516 +/- 0.018 (in 3 folds),0.512 +/- 0.009 (in 3 folds),0.514 +/- 0.010 (in 3 folds),0.353 +/- 0.046 (in 3 folds),0.029 +/- 0.051 (in 3 folds),0.352,0.028,0.346 +/- 0.039 (in 3 folds),0.030 +/- 0.051 (in 3 folds),...,0.504 +/- 0.000 (in 1 folds),0.346,0.03,0.017,Unknown,352,6,358,0.01676,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.466 +/- 0.039 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.466,0.0,0.458 +/- 0.034 (in 3 folds),0.030 +/- 0.028 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.458,0.033,0.017,Unknown,352,6,358,0.01676,True




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor with_demographics_columns from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/with_demographics_columns/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.984 +/- 0.004 (in 3 folds),0.986 +/- 0.003 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.986 +/- 0.005 (in 3 folds),0.895 +/- 0.006 (in 3 folds),0.845 +/- 0.013 (in 3 folds),0.895,0.845,0.880 +/- 0.020 (in 3 folds),0.825 +/- 0.032 (in 3 folds),...,0.989 +/- 0.000 (in 1 folds),0.88,0.824,0.017,Unknown,352,6,358,0.01676,False
rf_multiclass,0.980 +/- 0.007 (in 3 folds),0.983 +/- 0.005 (in 3 folds),0.978 +/- 0.006 (in 3 folds),0.982 +/- 0.004 (in 3 folds),0.881 +/- 0.038 (in 3 folds),0.828 +/- 0.046 (in 3 folds),0.881,0.826,0.866 +/- 0.024 (in 3 folds),0.807 +/- 0.026 (in 3 folds),...,0.982 +/- 0.000 (in 1 folds),0.866,0.807,0.017,Unknown,352,6,358,0.01676,False
lasso_cv,0.978 +/- 0.007 (in 3 folds),0.979 +/- 0.007 (in 3 folds),0.974 +/- 0.011 (in 3 folds),0.977 +/- 0.011 (in 3 folds),0.849 +/- 0.028 (in 3 folds),0.776 +/- 0.049 (in 3 folds),0.849,0.778,0.835 +/- 0.042 (in 3 folds),0.758 +/- 0.065 (in 3 folds),...,0.988 +/- 0.000 (in 1 folds),0.835,0.759,0.017,Unknown,352,6,358,0.01676,False
xgboost,0.977 +/- 0.008 (in 3 folds),0.977 +/- 0.007 (in 3 folds),0.975 +/- 0.008 (in 3 folds),0.976 +/- 0.006 (in 3 folds),0.892 +/- 0.021 (in 3 folds),0.842 +/- 0.027 (in 3 folds),0.892,0.841,0.877 +/- 0.021 (in 3 folds),0.822 +/- 0.027 (in 3 folds),...,0.983 +/- 0.000 (in 1 folds),0.877,0.821,0.017,Unknown,352,6,358,0.01676,False
lasso_multiclass,0.975 +/- 0.008 (in 3 folds),0.976 +/- 0.010 (in 3 folds),0.969 +/- 0.011 (in 3 folds),0.972 +/- 0.012 (in 3 folds),0.880 +/- 0.032 (in 3 folds),0.828 +/- 0.043 (in 3 folds),0.881,0.828,0.866 +/- 0.043 (in 3 folds),0.810 +/- 0.057 (in 3 folds),...,0.965 +/- 0.000 (in 1 folds),0.866,0.809,0.017,Unknown,352,6,358,0.01676,False
ridge_cv,0.975 +/- 0.006 (in 3 folds),0.976 +/- 0.007 (in 3 folds),0.970 +/- 0.008 (in 3 folds),0.972 +/- 0.010 (in 3 folds),0.869 +/- 0.019 (in 3 folds),0.809 +/- 0.019 (in 3 folds),0.869,0.807,0.855 +/- 0.010 (in 3 folds),0.789 +/- 0.009 (in 3 folds),...,0.960 +/- 0.000 (in 1 folds),0.855,0.788,0.017,Unknown,352,6,358,0.01676,False
linearsvm_ovr,0.941 +/- 0.012 (in 3 folds),0.943 +/- 0.014 (in 3 folds),0.945 +/- 0.011 (in 3 folds),0.948 +/- 0.011 (in 3 folds),0.835 +/- 0.049 (in 3 folds),0.759 +/- 0.071 (in 3 folds),0.835,0.757,0.822 +/- 0.062 (in 3 folds),0.743 +/- 0.086 (in 3 folds),...,0.944 +/- 0.000 (in 1 folds),0.821,0.74,0.017,Unknown,352,6,358,0.01676,False
dummy_stratified,0.514 +/- 0.021 (in 3 folds),0.516 +/- 0.018 (in 3 folds),0.512 +/- 0.009 (in 3 folds),0.514 +/- 0.010 (in 3 folds),0.353 +/- 0.046 (in 3 folds),0.029 +/- 0.051 (in 3 folds),0.352,0.028,0.346 +/- 0.039 (in 3 folds),0.030 +/- 0.051 (in 3 folds),...,0.504 +/- 0.000 (in 1 folds),0.346,0.03,0.017,Unknown,352,6,358,0.01676,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.466 +/- 0.039 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.466,0.0,0.458 +/- 0.034 (in 3 folds),0.030 +/- 0.028 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.458,0.033,0.017,Unknown,352,6,358,0.01676,True




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_regressed_out from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_regressed_out/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.969 +/- 0.016 (in 3 folds),0.969 +/- 0.016 (in 3 folds),0.963 +/- 0.020 (in 3 folds),0.965 +/- 0.018 (in 3 folds),0.855 +/- 0.018 (in 3 folds),0.788 +/- 0.033 (in 3 folds),0.855,0.785,0.841 +/- 0.032 (in 3 folds),0.770 +/- 0.050 (in 3 folds),...,0.984 +/- 0.000 (in 1 folds),0.841,0.766,0.017,Unknown,352,6,358,0.01676,False
xgboost,0.951 +/- 0.024 (in 3 folds),0.951 +/- 0.023 (in 3 folds),0.946 +/- 0.029 (in 3 folds),0.948 +/- 0.027 (in 3 folds),0.801 +/- 0.038 (in 3 folds),0.707 +/- 0.068 (in 3 folds),0.801,0.705,0.788 +/- 0.046 (in 3 folds),0.691 +/- 0.075 (in 3 folds),...,0.979 +/- 0.000 (in 1 folds),0.788,0.689,0.017,Unknown,352,6,358,0.01676,False
lasso_multiclass,0.912 +/- 0.035 (in 3 folds),0.915 +/- 0.040 (in 3 folds),0.915 +/- 0.042 (in 3 folds),0.918 +/- 0.047 (in 3 folds),0.747 +/- 0.059 (in 3 folds),0.645 +/- 0.079 (in 3 folds),0.747,0.642,0.735 +/- 0.070 (in 3 folds),0.632 +/- 0.091 (in 3 folds),...,0.955 +/- 0.000 (in 1 folds),0.735,0.627,0.017,Unknown,352,6,358,0.01676,False
lasso_cv,0.894 +/- 0.020 (in 3 folds),0.898 +/- 0.025 (in 3 folds),0.911 +/- 0.024 (in 3 folds),0.915 +/- 0.032 (in 3 folds),0.747 +/- 0.058 (in 3 folds),0.621 +/- 0.100 (in 3 folds),0.747,0.62,0.735 +/- 0.068 (in 3 folds),0.609 +/- 0.111 (in 3 folds),...,0.940 +/- 0.000 (in 1 folds),0.735,0.606,0.017,Unknown,352,6,358,0.01676,False
linearsvm_ovr,0.893 +/- 0.058 (in 3 folds),0.896 +/- 0.062 (in 3 folds),0.894 +/- 0.065 (in 3 folds),0.898 +/- 0.070 (in 3 folds),0.744 +/- 0.095 (in 3 folds),0.633 +/- 0.133 (in 3 folds),0.744,0.63,0.732 +/- 0.102 (in 3 folds),0.620 +/- 0.139 (in 3 folds),...,0.937 +/- 0.000 (in 1 folds),0.732,0.615,0.017,Unknown,352,6,358,0.01676,False
elasticnet_cv,0.893 +/- 0.028 (in 3 folds),0.896 +/- 0.034 (in 3 folds),0.913 +/- 0.029 (in 3 folds),0.918 +/- 0.035 (in 3 folds),0.761 +/- 0.048 (in 3 folds),0.647 +/- 0.078 (in 3 folds),0.761,0.643,0.749 +/- 0.057 (in 3 folds),0.634 +/- 0.089 (in 3 folds),...,0.940 +/- 0.000 (in 1 folds),0.749,0.628,0.017,Unknown,352,6,358,0.01676,False
ridge_cv,0.882 +/- 0.025 (in 3 folds),0.885 +/- 0.031 (in 3 folds),0.895 +/- 0.027 (in 3 folds),0.901 +/- 0.033 (in 3 folds),0.756 +/- 0.026 (in 3 folds),0.641 +/- 0.060 (in 3 folds),0.756,0.638,0.743 +/- 0.033 (in 3 folds),0.626 +/- 0.068 (in 3 folds),...,0.926 +/- 0.000 (in 1 folds),0.743,0.622,0.017,Unknown,352,6,358,0.01676,False
dummy_stratified,0.514 +/- 0.021 (in 3 folds),0.516 +/- 0.018 (in 3 folds),0.512 +/- 0.009 (in 3 folds),0.514 +/- 0.010 (in 3 folds),0.353 +/- 0.046 (in 3 folds),0.029 +/- 0.051 (in 3 folds),0.352,0.028,0.346 +/- 0.039 (in 3 folds),0.030 +/- 0.051 (in 3 folds),...,0.504 +/- 0.000 (in 1 folds),0.346,0.03,0.017,Unknown,352,6,358,0.01676,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.466 +/- 0.039 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.466,0.0,0.458 +/- 0.034 (in 3 folds),0.030 +/- 0.028 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.458,0.033,0.017,Unknown,352,6,358,0.01676,True




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f5fea469f10>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.858 +/- 0.031 (in 3 folds),0.872 +/- 0.037 (in 3 folds),0.848 +/- 0.024 (in 3 folds),0.865 +/- 0.032 (in 3 folds),0.626 +/- 0.043 (in 3 folds),0.433 +/- 0.065 (in 3 folds),0.626,0.429,358,0,358,0.0,False
elasticnet_cv,0.856 +/- 0.033 (in 3 folds),0.870 +/- 0.037 (in 3 folds),0.851 +/- 0.023 (in 3 folds),0.869 +/- 0.030 (in 3 folds),0.692 +/- 0.059 (in 3 folds),0.554 +/- 0.083 (in 3 folds),0.693,0.552,358,0,358,0.0,False
rf_multiclass,0.856 +/- 0.031 (in 3 folds),0.872 +/- 0.034 (in 3 folds),0.845 +/- 0.033 (in 3 folds),0.864 +/- 0.034 (in 3 folds),0.665 +/- 0.033 (in 3 folds),0.499 +/- 0.059 (in 3 folds),0.665,0.5,358,0,358,0.0,False
linearsvm_ovr,0.853 +/- 0.023 (in 3 folds),0.868 +/- 0.029 (in 3 folds),0.848 +/- 0.012 (in 3 folds),0.866 +/- 0.020 (in 3 folds),0.656 +/- 0.033 (in 3 folds),0.522 +/- 0.034 (in 3 folds),0.656,0.522,358,0,358,0.0,False
lasso_multiclass,0.851 +/- 0.031 (in 3 folds),0.870 +/- 0.033 (in 3 folds),0.845 +/- 0.021 (in 3 folds),0.867 +/- 0.025 (in 3 folds),0.642 +/- 0.049 (in 3 folds),0.522 +/- 0.039 (in 3 folds),0.642,0.521,358,0,358,0.0,False
lasso_cv,0.845 +/- 0.032 (in 3 folds),0.860 +/- 0.038 (in 3 folds),0.843 +/- 0.022 (in 3 folds),0.861 +/- 0.030 (in 3 folds),0.653 +/- 0.055 (in 3 folds),0.489 +/- 0.072 (in 3 folds),0.654,0.485,358,0,358,0.0,False
xgboost,0.843 +/- 0.049 (in 3 folds),0.860 +/- 0.047 (in 3 folds),0.848 +/- 0.041 (in 3 folds),0.867 +/- 0.038 (in 3 folds),0.662 +/- 0.048 (in 3 folds),0.497 +/- 0.068 (in 3 folds),0.662,0.496,358,0,358,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_stratified,0.486 +/- 0.036 (in 3 folds),0.493 +/- 0.034 (in 3 folds),0.506 +/- 0.011 (in 3 folds),0.510 +/- 0.010 (in 3 folds),0.310 +/- 0.050 (in 3 folds),-0.032 +/- 0.082 (in 3 folds),0.31,-0.033,358,0,358,0.0,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_age from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_age/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f5fea469b80>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.704 +/- 0.036 (in 3 folds),0.724 +/- 0.036 (in 3 folds),0.686 +/- 0.036 (in 3 folds),0.708 +/- 0.036 (in 3 folds),0.467 +/- 0.022 (in 3 folds),0.255 +/- 0.028 (in 3 folds),0.466,0.25,358,0,358,0.0,False
xgboost,0.697 +/- 0.032 (in 3 folds),0.715 +/- 0.033 (in 3 folds),0.691 +/- 0.030 (in 3 folds),0.710 +/- 0.026 (in 3 folds),0.466 +/- 0.037 (in 3 folds),0.200 +/- 0.035 (in 3 folds),0.466,0.199,358,0,358,0.0,False
lasso_multiclass,0.681 +/- 0.067 (in 3 folds),0.707 +/- 0.070 (in 3 folds),0.687 +/- 0.059 (in 3 folds),0.715 +/- 0.065 (in 3 folds),0.338 +/- 0.013 (in 3 folds),0.199 +/- 0.060 (in 3 folds),0.338,0.193,358,0,358,0.0,False
linearsvm_ovr,0.663 +/- 0.028 (in 3 folds),0.681 +/- 0.033 (in 3 folds),0.678 +/- 0.025 (in 3 folds),0.700 +/- 0.034 (in 3 folds),0.441 +/- 0.045 (in 3 folds),0.145 +/- 0.063 (in 3 folds),0.441,0.144,358,0,358,0.0,True
elasticnet_cv,0.659 +/- 0.007 (in 3 folds),0.676 +/- 0.011 (in 3 folds),0.679 +/- 0.021 (in 3 folds),0.699 +/- 0.029 (in 3 folds),0.472 +/- 0.010 (in 3 folds),0.092 +/- 0.093 (in 3 folds),0.472,0.11,358,0,358,0.0,True
lasso_cv,0.647 +/- 0.044 (in 3 folds),0.665 +/- 0.045 (in 3 folds),0.671 +/- 0.049 (in 3 folds),0.692 +/- 0.052 (in 3 folds),0.472 +/- 0.010 (in 3 folds),0.092 +/- 0.093 (in 3 folds),0.472,0.11,358,0,358,0.0,True
ridge_cv,0.640 +/- 0.039 (in 3 folds),0.657 +/- 0.045 (in 3 folds),0.659 +/- 0.043 (in 3 folds),0.681 +/- 0.051 (in 3 folds),0.480 +/- 0.024 (in 3 folds),0.109 +/- 0.097 (in 3 folds),0.48,0.133,358,0,358,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_stratified,0.486 +/- 0.036 (in 3 folds),0.493 +/- 0.034 (in 3 folds),0.506 +/- 0.011 (in 3 folds),0.510 +/- 0.010 (in 3 folds),0.310 +/- 0.050 (in 3 folds),-0.032 +/- 0.082 (in 3 folds),0.31,-0.033,358,0,358,0.0,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_sex from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_sex/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f5fea469d00>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.579 +/- 0.023 (in 3 folds),0.573 +/- 0.026 (in 3 folds),0.547 +/- 0.013 (in 3 folds),0.545 +/- 0.013 (in 3 folds),0.332 +/- 0.090 (in 3 folds),0.118 +/- 0.066 (in 3 folds),0.332,0.11,358,0,358,0.0,False
linearsvm_ovr,0.573 +/- 0.019 (in 3 folds),0.560 +/- 0.024 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.537 +/- 0.015 (in 3 folds),0.397 +/- 0.025 (in 3 folds),0.104 +/- 0.091 (in 3 folds),0.397,0.089,358,0,358,0.0,True
xgboost,0.573 +/- 0.019 (in 3 folds),0.560 +/- 0.024 (in 3 folds),0.543 +/- 0.013 (in 3 folds),0.537 +/- 0.015 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
lasso_multiclass,0.561 +/- 0.030 (in 3 folds),0.556 +/- 0.028 (in 3 folds),0.540 +/- 0.016 (in 3 folds),0.538 +/- 0.014 (in 3 folds),0.332 +/- 0.090 (in 3 folds),0.118 +/- 0.066 (in 3 folds),0.332,0.11,358,0,358,0.0,False
ridge_cv,0.530 +/- 0.052 (in 3 folds),0.529 +/- 0.051 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.517 +/- 0.029 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
lasso_cv,0.512 +/- 0.020 (in 3 folds),0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.510 +/- 0.017 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
elasticnet_cv,0.512 +/- 0.020 (in 3 folds),0.512 +/- 0.020 (in 3 folds),0.509 +/- 0.016 (in 3 folds),0.510 +/- 0.017 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_stratified,0.486 +/- 0.036 (in 3 folds),0.493 +/- 0.034 (in 3 folds),0.506 +/- 0.011 (in 3 folds),0.510 +/- 0.010 (in 3 folds),0.310 +/- 0.050 (in 3 folds),-0.032 +/- 0.082 (in 3 folds),0.31,-0.033,358,0,358,0.0,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.disease_all_demographics_present, metamodel flavor demographics_only_ethnicity_condensed from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/disease_all_demographics_present/demographics_only_ethnicity_condensed/train_smaller_applied_to_validation_model

MetamodelConfig(submodels=None, extra_metadata_featurizers={'demographics': <malid.trained_model_wrappers.blending_metamodel.DemographicsFeaturizer object at 0x7f5fea469a60>}, interaction_terms=None, regress_out_featurizers=None, regress_out_pipeline=None, sample_weight_strategy=<SampleWeightStrategy.ISOTYPE_USAGE: 3>)


Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
ridge_cv,0.792 +/- 0.029 (in 3 folds),0.800 +/- 0.032 (in 3 folds),0.770 +/- 0.021 (in 3 folds),0.783 +/- 0.026 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
xgboost,0.790 +/- 0.032 (in 3 folds),0.794 +/- 0.036 (in 3 folds),0.769 +/- 0.021 (in 3 folds),0.779 +/- 0.025 (in 3 folds),0.664 +/- 0.069 (in 3 folds),0.510 +/- 0.095 (in 3 folds),0.665,0.504,358,0,358,0.0,False
rf_multiclass,0.785 +/- 0.017 (in 3 folds),0.794 +/- 0.022 (in 3 folds),0.766 +/- 0.014 (in 3 folds),0.778 +/- 0.021 (in 3 folds),0.564 +/- 0.097 (in 3 folds),0.433 +/- 0.097 (in 3 folds),0.564,0.42,358,0,358,0.0,False
elasticnet_cv,0.780 +/- 0.025 (in 3 folds),0.791 +/- 0.029 (in 3 folds),0.767 +/- 0.021 (in 3 folds),0.781 +/- 0.026 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
linearsvm_ovr,0.775 +/- 0.023 (in 3 folds),0.782 +/- 0.023 (in 3 folds),0.761 +/- 0.014 (in 3 folds),0.772 +/- 0.017 (in 3 folds),0.678 +/- 0.045 (in 3 folds),0.534 +/- 0.054 (in 3 folds),0.679,0.533,358,0,358,0.0,True
lasso_cv,0.771 +/- 0.055 (in 3 folds),0.783 +/- 0.060 (in 3 folds),0.750 +/- 0.053 (in 3 folds),0.764 +/- 0.057 (in 3 folds),0.659 +/- 0.079 (in 3 folds),0.499 +/- 0.114 (in 3 folds),0.659,0.495,358,0,358,0.0,True
lasso_multiclass,0.759 +/- 0.023 (in 3 folds),0.763 +/- 0.046 (in 3 folds),0.749 +/- 0.016 (in 3 folds),0.758 +/- 0.030 (in 3 folds),0.639 +/- 0.063 (in 3 folds),0.498 +/- 0.056 (in 3 folds),0.64,0.483,358,0,358,0.0,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.461 +/- 0.034 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.461,0.0,358,0,358,0.0,True
dummy_stratified,0.486 +/- 0.036 (in 3 folds),0.493 +/- 0.034 (in 3 folds),0.506 +/- 0.011 (in 3 folds),0.510 +/- 0.010 (in 3 folds),0.310 +/- 0.050 (in 3 folds),-0.032 +/- 0.082 (in 3 folds),0.31,-0.033,358,0,358,0.0,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.covid_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/covid_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),1.000 +/- 0.000 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.963 +/- 0.045 (in 3 folds),0.891 +/- 0.134 (in 3 folds),0.964,0.896,0.948 +/- 0.054 (in 3 folds),0.852 +/- 0.160 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.948,0.854,0.016,Unknown,248,4,252,0.015873,False
ridge_cv,0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),1.000 +/- 0.000 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.959 +/- 0.051 (in 3 folds),0.878 +/- 0.156 (in 3 folds),0.96,0.885,0.944 +/- 0.061 (in 3 folds),0.837 +/- 0.180 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.944,0.842,0.016,Unknown,248,4,252,0.015873,False
lasso_cv,0.998 +/- 0.002 (in 3 folds),0.998 +/- 0.002 (in 3 folds),1.000 +/- 0.000 (in 3 folds),1.000 +/- 0.000 (in 3 folds),0.971 +/- 0.031 (in 3 folds),0.916 +/- 0.092 (in 3 folds),0.972,0.919,0.956 +/- 0.042 (in 3 folds),0.877 +/- 0.121 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.956,0.878,0.016,Unknown,248,4,252,0.015873,False
lasso_multiclass,0.996 +/- 0.005 (in 3 folds),0.996 +/- 0.005 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.999 +/- 0.001 (in 3 folds),0.971 +/- 0.019 (in 3 folds),0.920 +/- 0.057 (in 3 folds),0.972,0.922,0.956 +/- 0.030 (in 3 folds),0.885 +/- 0.083 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.956,0.884,0.016,Unknown,248,4,252,0.015873,False
linearsvm_ovr,0.995 +/- 0.008 (in 3 folds),0.995 +/- 0.008 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.999 +/- 0.002 (in 3 folds),0.971 +/- 0.031 (in 3 folds),0.918 +/- 0.092 (in 3 folds),0.972,0.921,0.956 +/- 0.042 (in 3 folds),0.883 +/- 0.117 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.956,0.882,0.016,Unknown,248,4,252,0.015873,False
rf_multiclass,0.995 +/- 0.007 (in 3 folds),0.995 +/- 0.007 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.998 +/- 0.002 (in 3 folds),0.967 +/- 0.028 (in 3 folds),0.906 +/- 0.081 (in 3 folds),0.968,0.908,0.952 +/- 0.041 (in 3 folds),0.868 +/- 0.115 (in 3 folds),...,1.000 +/- 0.000 (in 1 folds),0.952,0.867,0.016,Unknown,248,4,252,0.015873,False
xgboost,0.990 +/- 0.008 (in 3 folds),0.990 +/- 0.008 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.997 +/- 0.003 (in 3 folds),0.955 +/- 0.026 (in 3 folds),0.870 +/- 0.081 (in 3 folds),0.956,0.873,0.940 +/- 0.036 (in 3 folds),0.833 +/- 0.103 (in 3 folds),...,0.999 +/- 0.000 (in 1 folds),0.94,0.834,0.016,Unknown,248,4,252,0.015873,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.770 +/- 0.007 (in 3 folds),0.770 +/- 0.007 (in 3 folds),0.770 +/- 0.007 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.77,0.0,0.758 +/- 0.007 (in 3 folds),0.003 +/- 0.047 (in 3 folds),...,0.765 +/- 0.000 (in 1 folds),0.758,0.003,0.016,Unknown,248,4,252,0.015873,True
dummy_stratified,0.472 +/- 0.049 (in 3 folds),0.472 +/- 0.049 (in 3 folds),0.761 +/- 0.019 (in 3 folds),0.761 +/- 0.019 (in 3 folds),0.641 +/- 0.027 (in 3 folds),-0.062 +/- 0.107 (in 3 folds),0.641,-0.06,0.631 +/- 0.032 (in 3 folds),-0.058 +/- 0.109 (in 3 folds),...,0.765 +/- 0.000 (in 1 folds),0.631,-0.056,0.016,Unknown,248,4,252,0.015873,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.hiv_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/hiv_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.989 +/- 0.005 (in 3 folds),0.989 +/- 0.005 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.957 +/- 0.001 (in 3 folds),0.906 +/- 0.002 (in 3 folds),0.957,0.905,0.925 +/- 0.023 (in 3 folds),0.844 +/- 0.041 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.925,0.842,0.034,Unknown,282,10,292,0.034247,False
elasticnet_cv,0.989 +/- 0.005 (in 3 folds),0.989 +/- 0.005 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.995 +/- 0.002 (in 3 folds),0.937 +/- 0.027 (in 3 folds),0.860 +/- 0.058 (in 3 folds),0.936,0.856,0.904 +/- 0.005 (in 3 folds),0.794 +/- 0.020 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.904,0.792,0.034,Unknown,282,10,292,0.034247,False
ridge_cv,0.987 +/- 0.007 (in 3 folds),0.987 +/- 0.007 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.994 +/- 0.002 (in 3 folds),0.961 +/- 0.012 (in 3 folds),0.913 +/- 0.027 (in 3 folds),0.961,0.913,0.928 +/- 0.020 (in 3 folds),0.850 +/- 0.037 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.928,0.848,0.034,Unknown,282,10,292,0.034247,False
linearsvm_ovr,0.987 +/- 0.004 (in 3 folds),0.987 +/- 0.004 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.950 +/- 0.006 (in 3 folds),0.891 +/- 0.014 (in 3 folds),0.95,0.89,0.918 +/- 0.027 (in 3 folds),0.830 +/- 0.047 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.918,0.828,0.034,Unknown,282,10,292,0.034247,False
lasso_cv,0.986 +/- 0.005 (in 3 folds),0.986 +/- 0.005 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.993 +/- 0.003 (in 3 folds),0.940 +/- 0.026 (in 3 folds),0.866 +/- 0.059 (in 3 folds),0.94,0.864,0.907 +/- 0.011 (in 3 folds),0.801 +/- 0.029 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.908,0.8,0.034,Unknown,282,10,292,0.034247,False
rf_multiclass,0.982 +/- 0.009 (in 3 folds),0.982 +/- 0.009 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.992 +/- 0.003 (in 3 folds),0.958 +/- 0.010 (in 3 folds),0.906 +/- 0.021 (in 3 folds),0.957,0.905,0.925 +/- 0.015 (in 3 folds),0.843 +/- 0.027 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.925,0.842,0.034,Unknown,282,10,292,0.034247,False
xgboost,0.978 +/- 0.006 (in 3 folds),0.978 +/- 0.006 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.991 +/- 0.002 (in 3 folds),0.943 +/- 0.012 (in 3 folds),0.878 +/- 0.029 (in 3 folds),0.943,0.877,0.911 +/- 0.031 (in 3 folds),0.819 +/- 0.057 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.911,0.817,0.034,Unknown,282,10,292,0.034247,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.663 +/- 0.009 (in 3 folds),0.663 +/- 0.009 (in 3 folds),0.663 +/- 0.009 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.663,0.0,0.640 +/- 0.007 (in 3 folds),-0.020 +/- 0.055 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.64,-0.007,0.034,Unknown,282,10,292,0.034247,True
dummy_stratified,0.492 +/- 0.039 (in 3 folds),0.492 +/- 0.039 (in 3 folds),0.660 +/- 0.010 (in 3 folds),0.660 +/- 0.010 (in 3 folds),0.563 +/- 0.037 (in 3 folds),-0.016 +/- 0.082 (in 3 folds),0.564,-0.016,0.545 +/- 0.046 (in 3 folds),-0.014 +/- 0.067 (in 3 folds),0.034 +/- 0.023 (in 3 folds),0.545,-0.016,0.034,Unknown,282,10,292,0.034247,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.lupus_vs_healthy, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/lupus_vs_healthy/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
elasticnet_cv,0.982 +/- 0.015 (in 3 folds),0.982 +/- 0.015 (in 3 folds),0.958 +/- 0.033 (in 3 folds),0.958 +/- 0.033 (in 3 folds),0.920 +/- 0.028 (in 3 folds),0.786 +/- 0.088 (in 3 folds),0.92,0.778,0.891 +/- 0.025 (in 3 folds),0.714 +/- 0.093 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.891,0.71,0.031,Unknown,250,8,258,0.031008,False
lasso_multiclass,0.980 +/- 0.015 (in 3 folds),0.980 +/- 0.015 (in 3 folds),0.953 +/- 0.033 (in 3 folds),0.953 +/- 0.033 (in 3 folds),0.916 +/- 0.021 (in 3 folds),0.779 +/- 0.072 (in 3 folds),0.916,0.779,0.887 +/- 0.019 (in 3 folds),0.720 +/- 0.068 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.888,0.721,0.031,Unknown,250,8,258,0.031008,False
ridge_cv,0.979 +/- 0.017 (in 3 folds),0.979 +/- 0.017 (in 3 folds),0.953 +/- 0.035 (in 3 folds),0.953 +/- 0.035 (in 3 folds),0.920 +/- 0.014 (in 3 folds),0.781 +/- 0.045 (in 3 folds),0.92,0.782,0.891 +/- 0.008 (in 3 folds),0.706 +/- 0.027 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.891,0.706,0.031,Unknown,250,8,258,0.031008,False
lasso_cv,0.975 +/- 0.020 (in 3 folds),0.975 +/- 0.020 (in 3 folds),0.946 +/- 0.042 (in 3 folds),0.946 +/- 0.042 (in 3 folds),0.932 +/- 0.019 (in 3 folds),0.817 +/- 0.057 (in 3 folds),0.932,0.814,0.903 +/- 0.014 (in 3 folds),0.749 +/- 0.052 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.903,0.749,0.031,Unknown,250,8,258,0.031008,False
rf_multiclass,0.974 +/- 0.026 (in 3 folds),0.974 +/- 0.026 (in 3 folds),0.950 +/- 0.039 (in 3 folds),0.950 +/- 0.039 (in 3 folds),0.932 +/- 0.028 (in 3 folds),0.813 +/- 0.079 (in 3 folds),0.932,0.813,0.903 +/- 0.025 (in 3 folds),0.741 +/- 0.075 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.903,0.741,0.031,Unknown,250,8,258,0.031008,False
linearsvm_ovr,0.974 +/- 0.020 (in 3 folds),0.974 +/- 0.020 (in 3 folds),0.941 +/- 0.042 (in 3 folds),0.941 +/- 0.042 (in 3 folds),0.912 +/- 0.014 (in 3 folds),0.773 +/- 0.057 (in 3 folds),0.912,0.773,0.884 +/- 0.012 (in 3 folds),0.717 +/- 0.052 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.884,0.717,0.031,Unknown,250,8,258,0.031008,False
xgboost,0.969 +/- 0.032 (in 3 folds),0.969 +/- 0.032 (in 3 folds),0.937 +/- 0.053 (in 3 folds),0.937 +/- 0.053 (in 3 folds),0.932 +/- 0.039 (in 3 folds),0.812 +/- 0.110 (in 3 folds),0.932,0.813,0.903 +/- 0.034 (in 3 folds),0.744 +/- 0.100 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.903,0.746,0.031,Unknown,250,8,258,0.031008,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.248 +/- 0.006 (in 3 folds),0.248 +/- 0.006 (in 3 folds),0.752 +/- 0.006 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.752,0.0,0.729 +/- 0.011 (in 3 folds),0.003 +/- 0.051 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.729,0.0,0.031,Unknown,250,8,258,0.031008,True
dummy_stratified,0.464 +/- 0.052 (in 3 folds),0.464 +/- 0.052 (in 3 folds),0.243 +/- 0.007 (in 3 folds),0.243 +/- 0.007 (in 3 folds),0.616 +/- 0.041 (in 3 folds),-0.076 +/- 0.109 (in 3 folds),0.616,-0.076,0.597 +/- 0.044 (in 3 folds),-0.068 +/- 0.105 (in 3 folds),0.031 +/- 0.006 (in 3 folds),0.597,-0.07,0.031,Unknown,250,8,258,0.031008,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.ethnicity_condensed_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/ethnicity_condensed_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
linearsvm_ovr,0.752 +/- 0.028 (in 3 folds),0.766 +/- 0.041 (in 3 folds),0.745 +/- 0.029 (in 3 folds),0.763 +/- 0.047 (in 3 folds),0.539 +/- 0.031 (in 3 folds),0.326 +/- 0.045 (in 3 folds),0.538,0.324,0.517 +/- 0.040 (in 3 folds),0.308 +/- 0.050 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.515,0.305,0.042,Unknown,158,7,165,0.042424,False
ridge_cv,0.734 +/- 0.055 (in 3 folds),0.758 +/- 0.063 (in 3 folds),0.743 +/- 0.016 (in 3 folds),0.763 +/- 0.028 (in 3 folds),0.644 +/- 0.046 (in 3 folds),0.371 +/- 0.007 (in 3 folds),0.646,0.331,0.617 +/- 0.039 (in 3 folds),0.335 +/- 0.013 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.618,0.309,0.042,Unknown,158,7,165,0.042424,True
rf_multiclass,0.732 +/- 0.064 (in 3 folds),0.729 +/- 0.082 (in 3 folds),0.754 +/- 0.057 (in 3 folds),0.754 +/- 0.071 (in 3 folds),0.692 +/- 0.055 (in 3 folds),0.468 +/- 0.144 (in 3 folds),0.69,0.434,0.664 +/- 0.066 (in 3 folds),0.436 +/- 0.135 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.661,0.405,0.042,Unknown,158,7,165,0.042424,True
elasticnet_cv,0.727 +/- 0.080 (in 3 folds),0.746 +/- 0.092 (in 3 folds),0.738 +/- 0.030 (in 3 folds),0.750 +/- 0.050 (in 3 folds),0.626 +/- 0.087 (in 3 folds),0.385 +/- 0.123 (in 3 folds),0.627,0.331,0.600 +/- 0.088 (in 3 folds),0.352 +/- 0.109 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.6,0.311,0.042,Unknown,158,7,165,0.042424,False
lasso_multiclass,0.721 +/- 0.080 (in 3 folds),0.730 +/- 0.098 (in 3 folds),0.731 +/- 0.028 (in 3 folds),0.741 +/- 0.055 (in 3 folds),0.583 +/- 0.047 (in 3 folds),0.394 +/- 0.024 (in 3 folds),0.582,0.387,0.559 +/- 0.052 (in 3 folds),0.369 +/- 0.025 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.558,0.364,0.042,Unknown,158,7,165,0.042424,False
xgboost,0.712 +/- 0.062 (in 3 folds),0.684 +/- 0.073 (in 3 folds),0.735 +/- 0.068 (in 3 folds),0.722 +/- 0.070 (in 3 folds),0.611 +/- 0.089 (in 3 folds),0.377 +/- 0.159 (in 3 folds),0.608,0.372,0.587 +/- 0.098 (in 3 folds),0.358 +/- 0.157 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.582,0.35,0.042,Unknown,158,7,165,0.042424,False
lasso_cv,0.691 +/- 0.088 (in 3 folds),0.714 +/- 0.088 (in 3 folds),0.712 +/- 0.041 (in 3 folds),0.723 +/- 0.060 (in 3 folds),0.654 +/- 0.107 (in 3 folds),0.389 +/- 0.199 (in 3 folds),0.658,0.332,0.626 +/- 0.091 (in 3 folds),0.324 +/- 0.218 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.63,0.305,0.042,Unknown,158,7,165,0.042424,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.591 +/- 0.096 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.595,0.0,0.566 +/- 0.085 (in 3 folds),0.022 +/- 0.074 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.57,0.031,0.042,Unknown,158,7,165,0.042424,True
dummy_stratified,0.487 +/- 0.036 (in 3 folds),0.499 +/- 0.045 (in 3 folds),0.512 +/- 0.020 (in 3 folds),0.522 +/- 0.032 (in 3 folds),0.333 +/- 0.101 (in 3 folds),-0.056 +/- 0.125 (in 3 folds),0.329,-0.079,0.320 +/- 0.104 (in 3 folds),-0.050 +/- 0.110 (in 3 folds),0.041 +/- 0.019 (in 3 folds),0.315,-0.07,0.042,Unknown,158,7,165,0.042424,False




## GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,...,au-PRC (macro OvO) per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.696 +/- 0.026 (in 3 folds),0.686 +/- 0.031 (in 3 folds),0.734 +/- 0.036 (in 3 folds),0.726 +/- 0.050 (in 3 folds),0.430 +/- 0.089 (in 3 folds),0.307 +/- 0.107 (in 3 folds),0.449,0.334,0.369 +/- 0.171 (in 3 folds),0.268 +/- 0.155 (in 3 folds),...,0.668 +/- 0.000 (in 1 folds),0.37,0.269,0.176,Unknown,136,29,165,0.175758,True
lasso_cv,0.687 +/- 0.041 (in 3 folds),0.678 +/- 0.028 (in 3 folds),0.736 +/- 0.024 (in 3 folds),0.726 +/- 0.033 (in 3 folds),0.363 +/- 0.141 (in 3 folds),0.275 +/- 0.189 (in 3 folds),0.39,0.264,0.316 +/- 0.194 (in 3 folds),0.253 +/- 0.212 (in 3 folds),...,0.688 +/- 0.000 (in 1 folds),0.321,0.212,0.176,Unknown,136,29,165,0.175758,True
ridge_cv,0.683 +/- 0.039 (in 3 folds),0.677 +/- 0.033 (in 3 folds),0.734 +/- 0.026 (in 3 folds),0.729 +/- 0.045 (in 3 folds),0.349 +/- 0.153 (in 3 folds),0.238 +/- 0.191 (in 3 folds),0.375,0.239,0.303 +/- 0.199 (in 3 folds),0.210 +/- 0.217 (in 3 folds),...,0.677 +/- 0.000 (in 1 folds),0.309,0.187,0.176,Unknown,136,29,165,0.175758,True
xgboost,0.681 +/- 0.052 (in 3 folds),0.670 +/- 0.065 (in 3 folds),0.728 +/- 0.046 (in 3 folds),0.723 +/- 0.064 (in 3 folds),0.425 +/- 0.052 (in 3 folds),0.299 +/- 0.066 (in 3 folds),0.434,0.317,0.359 +/- 0.141 (in 3 folds),0.257 +/- 0.114 (in 3 folds),...,0.651 +/- 0.000 (in 1 folds),0.358,0.258,0.176,Unknown,136,29,165,0.175758,False
elasticnet_cv,0.666 +/- 0.058 (in 3 folds),0.656 +/- 0.032 (in 3 folds),0.727 +/- 0.028 (in 3 folds),0.722 +/- 0.025 (in 3 folds),0.313 +/- 0.184 (in 3 folds),0.169 +/- 0.263 (in 3 folds),0.346,0.2,0.278 +/- 0.220 (in 3 folds),0.175 +/- 0.258 (in 3 folds),...,0.701 +/- 0.000 (in 1 folds),0.285,0.157,0.176,Unknown,136,29,165,0.175758,True
linearsvm_ovr,0.662 +/- 0.020 (in 3 folds),0.656 +/- 0.011 (in 3 folds),0.707 +/- 0.012 (in 3 folds),0.701 +/- 0.036 (in 3 folds),0.375 +/- 0.071 (in 3 folds),0.241 +/- 0.089 (in 3 folds),0.39,0.266,0.320 +/- 0.144 (in 3 folds),0.214 +/- 0.123 (in 3 folds),...,0.663 +/- 0.000 (in 1 folds),0.321,0.214,0.176,Unknown,136,29,165,0.175758,True
lasso_multiclass,0.659 +/- 0.016 (in 3 folds),0.651 +/- 0.013 (in 3 folds),0.702 +/- 0.015 (in 3 folds),0.694 +/- 0.041 (in 3 folds),0.327 +/- 0.054 (in 3 folds),0.196 +/- 0.083 (in 3 folds),0.338,0.214,0.279 +/- 0.120 (in 3 folds),0.176 +/- 0.100 (in 3 folds),...,0.653 +/- 0.000 (in 1 folds),0.279,0.173,0.176,Unknown,136,29,165,0.175758,False
dummy_stratified,0.544 +/- 0.008 (in 3 folds),0.546 +/- 0.012 (in 3 folds),0.546 +/- 0.008 (in 3 folds),0.549 +/- 0.010 (in 3 folds),0.245 +/- 0.028 (in 3 folds),0.084 +/- 0.016 (in 3 folds),0.243,0.093,0.199 +/- 0.056 (in 3 folds),0.074 +/- 0.018 (in 3 folds),...,0.558 +/- 0.000 (in 1 folds),0.2,0.077,0.176,Unknown,136,29,165,0.175758,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.211 +/- 0.010 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.213,0.015,0.176 +/- 0.060 (in 3 folds),0.014 +/- 0.022 (in 3 folds),...,0.500 +/- 0.000 (in 1 folds),0.176,0.017,0.176,Unknown,136,29,165,0.175758,True




## GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_binary_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_binary_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_multiclass,0.748 +/- 0.071 (in 3 folds),0.748 +/- 0.071 (in 3 folds),0.859 +/- 0.041 (in 3 folds),0.859 +/- 0.041 (in 3 folds),0.678 +/- 0.050 (in 3 folds),0.347 +/- 0.129 (in 3 folds),0.678,0.352,0.592 +/- 0.095 (in 3 folds),0.285 +/- 0.154 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.588,0.279,0.133,Unknown,143,22,165,0.133333,False
rf_multiclass,0.719 +/- 0.106 (in 3 folds),0.719 +/- 0.106 (in 3 folds),0.822 +/- 0.088 (in 3 folds),0.822 +/- 0.088 (in 3 folds),0.671 +/- 0.018 (in 3 folds),0.267 +/- 0.077 (in 3 folds),0.671,0.276,0.585 +/- 0.066 (in 3 folds),0.186 +/- 0.119 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.582,0.186,0.133,Unknown,143,22,165,0.133333,False
linearsvm_ovr,0.700 +/- 0.053 (in 3 folds),0.700 +/- 0.053 (in 3 folds),0.833 +/- 0.018 (in 3 folds),0.833 +/- 0.018 (in 3 folds),0.629 +/- 0.051 (in 3 folds),0.230 +/- 0.137 (in 3 folds),0.629,0.237,0.551 +/- 0.095 (in 3 folds),0.187 +/- 0.152 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.545,0.182,0.133,Unknown,143,22,165,0.133333,False
xgboost,0.663 +/- 0.106 (in 3 folds),0.663 +/- 0.106 (in 3 folds),0.790 +/- 0.080 (in 3 folds),0.790 +/- 0.080 (in 3 folds),0.615 +/- 0.056 (in 3 folds),0.129 +/- 0.120 (in 3 folds),0.615,0.138,0.533 +/- 0.030 (in 3 folds),0.065 +/- 0.101 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.533,0.075,0.133,Unknown,143,22,165,0.133333,False
lasso_cv,0.661 +/- 0.156 (in 3 folds),0.661 +/- 0.156 (in 3 folds),0.778 +/- 0.132 (in 3 folds),0.778 +/- 0.132 (in 3 folds),0.664 +/- 0.056 (in 3 folds),0.184 +/- 0.234 (in 3 folds),0.664,0.243,0.581 +/- 0.103 (in 3 folds),0.114 +/- 0.260 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.576,0.136,0.133,Unknown,143,22,165,0.133333,False
elasticnet_cv,0.612 +/- 0.195 (in 3 folds),0.612 +/- 0.195 (in 3 folds),0.721 +/- 0.154 (in 3 folds),0.721 +/- 0.154 (in 3 folds),0.650 +/- 0.032 (in 3 folds),0.118 +/- 0.204 (in 3 folds),0.65,0.199,0.568 +/- 0.080 (in 3 folds),0.048 +/- 0.237 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.564,0.083,0.133,Unknown,143,22,165,0.133333,False
ridge_cv,0.593 +/- 0.162 (in 3 folds),0.593 +/- 0.162 (in 3 folds),0.714 +/- 0.143 (in 3 folds),0.714 +/- 0.143 (in 3 folds),0.643 +/- 0.020 (in 3 folds),0.100 +/- 0.173 (in 3 folds),0.643,0.175,0.561 +/- 0.068 (in 3 folds),0.029 +/- 0.205 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.558,0.058,0.133,Unknown,143,22,165,0.133333,False
dummy_stratified,0.542 +/- 0.050 (in 3 folds),0.542 +/- 0.050 (in 3 folds),0.637 +/- 0.049 (in 3 folds),0.637 +/- 0.049 (in 3 folds),0.560 +/- 0.066 (in 3 folds),0.085 +/- 0.102 (in 3 folds),0.559,0.085,0.485 +/- 0.030 (in 3 folds),0.052 +/- 0.063 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.485,0.059,0.133,Unknown,143,22,165,0.133333,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.615 +/- 0.028 (in 3 folds),0.615 +/- 0.028 (in 3 folds),0.615 +/- 0.028 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.615,0.0,0.534 +/- 0.022 (in 3 folds),-0.087 +/- 0.022 (in 3 folds),0.129 +/- 0.078 (in 3 folds),0.533,-0.088,0.133,Unknown,143,22,165,0.133333,True


2023-01-14 01:31:36,640 - malid.external.model_evaluation - INFO - Removing ('rf_multiclass', 0) because fold 0 is incomplete.
2023-01-14 01:31:36,641 - malid.external.model_evaluation - INFO - Removing ('dummy_stratified', 0) because fold 0 is incomplete.
2023-01-14 01:31:36,643 - malid.external.model_evaluation - INFO - Removing ('dummy_most_frequent', 0) because fold 0 is incomplete.
2023-01-14 01:31:36,644 - malid.external.model_evaluation - INFO - Removing ('xgboost', 0) because fold 0 is incomplete.


## GeneLocus.BCR|TCR, TargetObsColumnEnum.age_group_pediatric_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/age_group_pediatric_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
lasso_cv,0.990 +/- 0.015 (in 2 folds),0.990 +/- 0.015 (in 2 folds),0.972 +/- 0.040 (in 2 folds),0.972 +/- 0.040 (in 2 folds),0.878 +/- 0.099 (in 2 folds),0.610 +/- 0.257 (in 2 folds),0.867,0.561,0.716 +/- 0.006 (in 2 folds),0.300 +/- 0.136 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.716,0.283,0.174,Unknown,90,19,109,0.174312,False
lasso_multiclass,0.989 +/- 0.015 (in 2 folds),0.989 +/- 0.015 (in 2 folds),0.976 +/- 0.034 (in 2 folds),0.976 +/- 0.034 (in 2 folds),0.952 +/- 0.068 (in 2 folds),0.869 +/- 0.185 (in 2 folds),0.944,0.83,0.778 +/- 0.026 (in 2 folds),0.527 +/- 0.028 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.78,0.514,0.174,Unknown,90,19,109,0.174312,False
xgboost,0.978 +/- 0.031 (in 2 folds),0.978 +/- 0.031 (in 2 folds),0.971 +/- 0.041 (in 2 folds),0.971 +/- 0.041 (in 2 folds),0.977 +/- 0.005 (in 2 folds),0.924 +/- 0.035 (in 2 folds),0.978,0.933,0.802 +/- 0.088 (in 2 folds),0.593 +/- 0.171 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.807,0.605,0.174,Unknown,90,19,109,0.174312,False
linearsvm_ovr,0.975 +/- 0.035 (in 2 folds),0.975 +/- 0.035 (in 2 folds),0.975 +/- 0.036 (in 2 folds),0.975 +/- 0.036 (in 2 folds),0.948 +/- 0.036 (in 2 folds),0.846 +/- 0.075 (in 2 folds),0.944,0.83,0.777 +/- 0.052 (in 2 folds),0.516 +/- 0.062 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.78,0.514,0.174,Unknown,90,19,109,0.174312,False
rf_multiclass,0.969 +/- 0.025 (in 2 folds),0.969 +/- 0.025 (in 2 folds),0.940 +/- 0.006 (in 2 folds),0.940 +/- 0.006 (in 2 folds),0.968 +/- 0.009 (in 2 folds),0.898 +/- 0.001 (in 2 folds),0.967,0.898,0.794 +/- 0.076 (in 2 folds),0.578 +/- 0.150 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.798,0.59,0.174,Unknown,90,19,109,0.174312,False
ridge_cv,0.967 +/- 0.046 (in 2 folds),0.967 +/- 0.046 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.948 +/- 0.036 (in 2 folds),0.846 +/- 0.075 (in 2 folds),0.944,0.83,0.777 +/- 0.052 (in 2 folds),0.516 +/- 0.062 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.78,0.514,0.174,Unknown,90,19,109,0.174312,False
elasticnet_cv,0.966 +/- 0.047 (in 2 folds),0.966 +/- 0.047 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.972 +/- 0.039 (in 2 folds),0.948 +/- 0.036 (in 2 folds),0.846 +/- 0.075 (in 2 folds),0.944,0.83,0.777 +/- 0.052 (in 2 folds),0.516 +/- 0.062 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.78,0.514,0.174,Unknown,90,19,109,0.174312,False
dummy_most_frequent,0.500 +/- 0.000 (in 2 folds),0.500 +/- 0.000 (in 2 folds),0.204 +/- 0.065 (in 2 folds),0.204 +/- 0.065 (in 2 folds),0.796 +/- 0.065 (in 2 folds),0.000 +/- 0.000 (in 2 folds),0.789,0.0,0.651 +/- 0.015 (in 2 folds),0.005 +/- 0.065 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.651,-0.0,0.174,Unknown,90,19,109,0.174312,True
dummy_stratified,0.492 +/- 0.048 (in 2 folds),0.492 +/- 0.048 (in 2 folds),0.206 +/- 0.080 (in 2 folds),0.206 +/- 0.080 (in 2 folds),0.685 +/- 0.038 (in 2 folds),-0.001 +/- 0.100 (in 2 folds),0.689,-0.012,0.564 +/- 0.090 (in 2 folds),0.002 +/- 0.030 (in 2 folds),0.179 +/- 0.086 (in 2 folds),0.569,-0.008,0.174,Unknown,90,19,109,0.174312,False


2023-01-14 01:31:38,067 - malid.external.model_evaluation - INFO - Removing ('rf_multiclass', 0) because fold 0 is incomplete.
2023-01-14 01:31:38,068 - malid.external.model_evaluation - INFO - Removing ('dummy_stratified', 0) because fold 0 is incomplete.
2023-01-14 01:31:38,068 - malid.external.model_evaluation - INFO - Removing ('dummy_most_frequent', 0) because fold 0 is incomplete.
2023-01-14 01:31:38,069 - malid.external.model_evaluation - INFO - Removing ('xgboost', 0) because fold 0 is incomplete.


## GeneLocus.BCR|TCR, TargetObsColumnEnum.sex_healthy_only, metamodel flavor default from /users/maximz/code/boyd-immune-repertoire-classification/data/data_v_20221224/embedded/unirep_fine_tuned/blending_metamodel/BCR_TCR/sex_healthy_only/default/train_smaller_applied_to_validation_model to /users/maximz/code/boyd-immune-repertoire-classification/out/unirep_fine_tuned/blending_metamodel/BCR_TCR/sex_healthy_only/default/train_smaller_applied_to_validation_model

MetamodelConfig(submodels={<GeneLocus.BCR: 1>: {'repertoire_stats': RepertoireClassifier: Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('log1p-scale-PCA_IGHG',
                                                  Pipeline(steps=[('log1p',
                                                                   FunctionTransformer(feature_names_out='one-to-one',
                                                                                       func=<ufunc 'log1p'>,
                                                                                       validate=True)),
                                                                  ('scale',
                                                                   StandardScaler()),
                                                                  ('pca',
                                                                   PCA(n_components=15,
      

Unnamed: 0,ROC-AUC (weighted OvO) per fold,ROC-AUC (macro OvO) per fold,au-PRC (weighted OvO) per fold,au-PRC (macro OvO) per fold,Accuracy per fold,MCC per fold,Accuracy global,MCC global,Accuracy per fold with abstention,MCC per fold with abstention,Unknown/abstention proportion per fold with abstention,Accuracy global with abstention,MCC global with abstention,Unknown/abstention proportion global with abstention,Abstention label global with abstention,sample_size,n_abstentions,sample_size including abstentions,abstention_rate,missing_classes
rf_multiclass,0.546 +/- 0.086 (in 3 folds),0.546 +/- 0.086 (in 3 folds),0.586 +/- 0.118 (in 3 folds),0.586 +/- 0.118 (in 3 folds),0.507 +/- 0.060 (in 3 folds),0.049 +/- 0.081 (in 3 folds),0.51,0.006,0.471 +/- 0.059 (in 3 folds),0.054 +/- 0.062 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.473,0.009,0.073,Unknown,153,12,165,0.072727,False
xgboost,0.542 +/- 0.090 (in 3 folds),0.542 +/- 0.090 (in 3 folds),0.595 +/- 0.115 (in 3 folds),0.595 +/- 0.115 (in 3 folds),0.508 +/- 0.040 (in 3 folds),0.048 +/- 0.033 (in 3 folds),0.51,0.006,0.472 +/- 0.041 (in 3 folds),0.053 +/- 0.027 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.473,0.009,0.073,Unknown,153,12,165,0.072727,False
dummy_stratified,0.506 +/- 0.071 (in 3 folds),0.506 +/- 0.071 (in 3 folds),0.563 +/- 0.092 (in 3 folds),0.563 +/- 0.092 (in 3 folds),0.499 +/- 0.051 (in 3 folds),0.008 +/- 0.146 (in 3 folds),0.497,-0.021,0.464 +/- 0.057 (in 3 folds),0.018 +/- 0.120 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.461,-0.014,0.073,Unknown,153,12,165,0.072727,False
lasso_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.448 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.451,-0.068,0.415 +/- 0.050 (in 3 folds),-0.019 +/- 0.092 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.418,-0.067,0.073,Unknown,153,12,165,0.072727,False
elasticnet_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.448 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.451,-0.068,0.415 +/- 0.050 (in 3 folds),-0.019 +/- 0.092 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.418,-0.067,0.073,Unknown,153,12,165,0.072727,False
ridge_cv,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.448 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.451,-0.068,0.415 +/- 0.050 (in 3 folds),-0.019 +/- 0.092 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.418,-0.067,0.073,Unknown,153,12,165,0.072727,False
dummy_most_frequent,0.500 +/- 0.000 (in 3 folds),0.500 +/- 0.000 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.558 +/- 0.055 (in 3 folds),0.448 +/- 0.063 (in 3 folds),0.000 +/- 0.000 (in 3 folds),0.451,-0.068,0.415 +/- 0.050 (in 3 folds),-0.019 +/- 0.092 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.418,-0.067,0.073,Unknown,153,12,165,0.072727,False
linearsvm_ovr,0.471 +/- 0.041 (in 3 folds),0.471 +/- 0.041 (in 3 folds),0.564 +/- 0.061 (in 3 folds),0.564 +/- 0.061 (in 3 folds),0.511 +/- 0.041 (in 3 folds),0.025 +/- 0.096 (in 3 folds),0.51,0.015,0.475 +/- 0.040 (in 3 folds),0.025 +/- 0.084 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.473,0.014,0.073,Unknown,153,12,165,0.072727,False
lasso_multiclass,0.457 +/- 0.055 (in 3 folds),0.457 +/- 0.055 (in 3 folds),0.545 +/- 0.083 (in 3 folds),0.545 +/- 0.083 (in 3 folds),0.474 +/- 0.089 (in 3 folds),-0.064 +/- 0.172 (in 3 folds),0.471,-0.074,0.439 +/- 0.081 (in 3 folds),-0.054 +/- 0.153 (in 3 folds),0.072 +/- 0.026 (in 3 folds),0.436,-0.06,0.073,Unknown,153,12,165,0.072727,False


