In [29]:
import numpy as np

import pandas as pd

from tabulate import tabulate

from pathlib import Path

from mlhand import config

import json

from IPython.display import display, Markdown, Latex, HTML

# Statistics Loading Function

In [3]:
_ = pd.read_csv("../resources/unimodality.txt", header=None, names=["modality"])
unimodalities = _.modality.values.tolist()

In [4]:
def alias(col_name):
    if "component_amplitudes" in col_name:
        return col_name.replace("component_amplitudes", "CA")
    elif "full_correlation" in col_name:
        return col_name.replace("full_correlation", "FC")
    elif "partial_correlation" in col_name:
        return col_name.replace("partial_correlation", "PC")
    elif "controlled" in col_name:
        return "controlled"
    else:
        return col_name

In [5]:
_ = pd.read_csv("../resources/multimodality.txt", header=None, names=["modality"], sep=";")
multimodalities = _.modality.values.tolist()

In [6]:
multimodalities

['structure,diffusion,rfmri_component_amplitudes_100',
 'structure,diffusion,rfmri_full_correlation_100',
 'structure,diffusion,rfmri_partial_correlation_100',
 'structure,diffusion,rfmri_component_amplitudes_100,rfmri_full_correlation_100',
 'structure,diffusion,rfmri_component_amplitudes_100,rfmri_partial_correlation_100',
 'controlled_variables,structure,diffusion,rfmri_component_amplitudes_100,rfmri_partial_correlation_100']

In [35]:
def load_data(artifact_dir, feature_sets, model_type="logistic", assert_same_random_seed_commit=True):
    headers = list(map(lambda c: alias(c), unimodalities)) \
        + [ "Total Features" ] + ["auroc:mean±std", "min", "max"]
    
    print(f"Statistics from {artifact_dir}")
    
    rows = []
    
    commit = None
    random_seed = None
    for feature_set in feature_sets:
        # this line is for multimodal only
        feature_set = feature_set.replace(",", "-")
    
        
        modality_cols = list(map(lambda c: "✓" if c in feature_set else "", unimodalities))
        total_features = np.sum(list(map(lambda c: config.NUM_FEATURES[c], feature_set.split("-"))))

        artifact_path = f"{artifact_dir}/{model_type}--{feature_set}"
        
        if not Path(f"{artifact_path}/statistics.json").is_file():
            continue
        with open(f"{artifact_path}/statistics.json", "r") as fh:

            statistics = json.load(fh)
            aucs = np.array(statistics["aucs"])
            statistics_cols = [
                f"{np.mean(aucs):4.4f}±{np.std(aucs):05.4f}",
                f"{np.min(aucs):4.4f}",
                f"{np.max(aucs):4.4f}",
            ]
        
        rows.append([*modality_cols, total_features, *statistics_cols])
            
        if assert_same_random_seed_commit:        
            with open(f"{artifact_path}/config.json", "r") as fh:
                model_config = json.load(fh)
                if commit is None:
                    commit = model_config["commit"]
                    random_seed = model_config["random_seed_value"]
                else:
                    assert commit == model_config["commit"]
                    assert random_seed == model_config["random_seed_value"]
                    
    return rows, headers

_ = load_data("../artifacts/dev-all-unimodality-quick-run", unimodalities)

Statistics from ../artifacts/dev-all-unimodality-quick-run


# Plot Tables

In [36]:
def plot_table(table, headers, fmt="html"):
    

    table_str = tabulate(table, headers=headers, tablefmt=fmt, colalign=["center"]*len(headers))

    if fmt=="html":
        return HTML(table_str)
    else:
        return print(table_str)
    
dummy_data = [
    ["Sun", 696000, 1989100000],
    ["Earth", 6371, 5973.6],
    ["Moon", 1737, 73.5],
    ["Mars", 3390, 641.85]
]

dummy_headers = [
    "Planet",
    "R (km)",
    "mass (x 10^29 kg)"
]

plot_table(dummy_data, dummy_headers, fmt="html")

Planet,R (km),mass (x 10^29 kg)
Sun,696000,1989100000.0
Earth,6371,5973.6
Moon,1737,73.5
Mars,3390,641.85


In [37]:
dev_data, dev_headers = load_data(
    "../artifacts/tmp",
    unimodalities + multimodalities,
    assert_same_random_seed_commit=False
)

plot_table(dev_data,  dev_headers)

Statistics from ../artifacts/tmp


controlled,structure,diffusion,rfmri_CA_25,rfmri_FC_25,rfmri_PC_25,rfmri_CA_100,rfmri_FC_100,rfmri_PC_100,Total Features,auroc:mean±std,min,max
✓,,,,,,,,,22,0.5502±0.0798,0.3895,0.6867
,✓,,,,,,,,198,0.4552±0.0903,0.2978,0.6105
,,✓,,,,,,,432,0.4882±0.0936,0.3553,0.6386
,,,✓,,,,,,21,0.5382±0.1010,0.359,0.7389
,,,,✓,,,,,210,0.6045±0.0837,0.4589,0.7375
,,,,,✓,,,,210,0.6230±0.0635,0.5177,0.7179
,,,,,,✓,,,55,0.5537±0.1105,0.3602,0.7144
,,,,,,,✓,,1485,0.6311±0.0901,0.4505,0.7424
,,,,,,,,✓,1485,0.6008±0.1486,0.3541,0.8217


# Plot Official Artifacts

2021-06-26-paper-submission-logistic

In [38]:
plot_table(
    *load_data(
        "../artifacts/2021-06-26-paper-submission-logistic",
        unimodalities + multimodalities,
        assert_same_random_seed_commit=True
    )
)

Statistics from ../artifacts/2021-06-26-paper-submission-logistic


controlled,structure,diffusion,rfmri_CA_25,rfmri_FC_25,rfmri_PC_25,rfmri_CA_100,rfmri_FC_100,rfmri_PC_100,Total Features,auroc:mean±std,min,max
✓,,,,,,,,,22,0.5529±0.0137,0.5203,0.5709
,✓,,,,,,,,198,0.5542±0.0100,0.5352,0.5679
,,✓,,,,,,,432,0.5747±0.0126,0.5553,0.5985
,,,✓,,,,,,21,0.5779±0.0138,0.5601,0.603
,,,,✓,,,,,210,0.6572±0.0124,0.6264,0.6743
,,,,,✓,,,,210,0.6657±0.0092,0.643,0.6769
,,,,,,✓,,,55,0.5904±0.0154,0.5633,0.6197
,,,,,,,✓,,1485,0.6958±0.0169,0.6672,0.7239
,,,,,,,,✓,1485,0.7217±0.0118,0.703,0.7342
,✓,✓,,,,✓,,,685,0.6110±0.0169,0.5829,0.6481


In [39]:
# important remark: make sure the column header is labelled correctly. 

# plot_table(
#     *load_data(
#         "../artifacts/2021-07-25-models-with-auprg-objective",
#         unimodalities + multimodalities,
#         assert_same_random_seed_commit=True
#     )
# )

## Models with Area Under Precision Recall Curve

In [41]:
# important remark: make sure the column header is labelled correctly. 

# dev_data, dev_headers = load_data(
#     "../artifacts/2021-07-25-models-with-auprg-objective",
#     unimodalities + multimodalities,
#     assert_same_random_seed_commit=False
# )

# plot_table(dev_data,  dev_headers)

## Models with AUROC and After Removing Withdrawing Participants

In [42]:

dev_data, dev_headers = load_data(
    "../artifacts/2021-09-05-participants-withdraw-and-auroc",
    unimodalities + multimodalities,
    assert_same_random_seed_commit=False
)

plot_table(dev_data,  dev_headers)

Statistics from ../artifacts/2021-09-05-participants-withdraw-and-auroc


controlled,structure,diffusion,rfmri_CA_25,rfmri_FC_25,rfmri_PC_25,rfmri_CA_100,rfmri_FC_100,rfmri_PC_100,Total Features,auroc:mean±std,min,max
✓,,,,,,,,,22,0.5525±0.0120,0.5223,0.5681
,✓,,,,,,,,198,0.5539±0.0075,0.5432,0.5642
,,✓,,,,,,,432,0.5755±0.0144,0.549,0.5982
,,,✓,,,,,,21,0.5775±0.0134,0.5551,0.598
,,,,✓,,,,,210,0.6556±0.0137,0.6223,0.6732
,,,,,✓,,,,210,0.6655±0.0099,0.6423,0.6793
,,,,,,✓,,,55,0.5900±0.0164,0.5595,0.6184
,,,,,,,✓,,1485,0.7001±0.0152,0.6808,0.7294
,,,,,,,,✓,1485,0.7243±0.0158,0.6944,0.7437
,✓,✓,,,,✓,,,685,0.6086±0.0161,0.5813,0.64
