In [52]:
import numpy as np

import pandas as pd

from tabulate import tabulate

from pathlib import Path

from mlhand import config

import json

from IPython.display import display, Markdown, Latex, HTML

from matplotlib import pyplot as  plt

# Statistics Loading Function

In [53]:
_ = pd.read_csv("./resources/rfmri_ics_partial_correlation_100.txt", header=None, names=["model"], sep=";")
ic_models = _.model.values.tolist()

In [54]:
ic_models[:5]

['rfmri_ic1_partial_correlation_100',
 'rfmri_ic2_partial_correlation_100',
 'rfmri_ic3_partial_correlation_100',
 'rfmri_ic4_partial_correlation_100',
 'rfmri_ic5_partial_correlation_100']

In [55]:
"abc".upper()

'ABC'

In [171]:
def plot_table(table, headers, fmt="html"):
    

    table_str = tabulate(table, headers=headers, tablefmt=fmt, colalign=["center"]*len(headers))

    if fmt=="html":
        return HTML(table_str)
    else:
        return print(table_str)
    
def alias(feature_set_name):
    slugs = feature_set_name.split("_") 
    if "not" in feature_set_name:
        return f"{slugs[1]}_{slugs[2]}".upper()
    else:
        return slugs[1].upper()

def load_data(artifact_dir, models, model_type="logistic", assert_same_random_seed_commit=True, sorting_factor=-1):
    headers =  [
        "rfMRI Partial Correlation 100 Features Related to", "auroc:mean±std", "min", "max"
    ]
    
    print(f"Statistics from {artifact_dir}")

    
    commit = None
    random_seed = None
    rows = []
    n = None
    for feature_set in models:

        artifact_path = f"{artifact_dir}/{model_type}--{feature_set}"
        
        if not Path(f"{artifact_path}/statistics.json").is_file():
            continue
            
        with open(f"{artifact_path}/statistics.json", "r") as fh:
            statistics = json.load(fh)
            aucs = np.array(statistics["aucs"]) 
            
            statistics_cols = [
                f"{np.mean(aucs):4.2f}±{np.std(aucs):05.2f}",
                f"{np.min(aucs):4.2f}",
                f"{np.max(aucs):4.2f}",
            ]
            rows.append(dict(
                name=feature_set,
                auc_mean=np.mean(aucs),
                auc_std=np.std(aucs),
                auc_max=np.max(aucs),
                auc_min=np.min(aucs),
                n=aucs.shape[0],
                aucs=aucs.tolist()
            ))
        
            
        if assert_same_random_seed_commit:        
            with open(f"{artifact_path}/config.json", "r") as fh:
                model_config = json.load(fh)
                if commit is None:
                    commit = model_config["commit"]
                    random_seed = model_config["random_seed_value"]
                    n=aucs.shape[0]
                else:
                    assert commit == model_config["commit"]
                    assert random_seed == model_config["random_seed_value"]
                    assert n == aucs.shape[0]
    
    sorted_indices = np.argsort(list(map(lambda r: sorting_factor * r["auc_mean"], rows)))
    
    sorted_rows = []
    sorted_rows_for_table = []
    
    for ix in sorted_indices:
        row = rows[ix]
        cols = (
            alias(row['name']),
            f"{row['auc_mean']:4.4f}±{row['auc_std']:05.4f}",
            f"{row['auc_min']:4.4f}",
            f"{row['auc_max']:4.4f}",
        )
        sorted_rows_for_table.append(cols)
        sorted_rows.append(row)
        
    return sorted_rows_for_table, headers, sorted_rows

def ano():
    rows, headers, _ = load_data("../artifacts/dev-2021-06-27-ic-pc100-features", ic_models)

    plot_table(rows, headers)
ano()

Statistics from ../artifacts/dev-2021-06-27-ic-pc100-features


# Plot Official Artifacts

Running `../artifacts/2021-06-27-ic-ranking-based-on-partial-correlation-100-features`

In [172]:
artifact_dir = "../artifacts/2021-06-27-ic-ranking-based-on-partial-correlation-100-features"
sorted_rows_for_table, headers, sorted_rows = load_data(
    artifact_dir,
    ic_models,
    assert_same_random_seed_commit=True
)

plot_table(sorted_rows_for_table[:10], headers)

Statistics from ../artifacts/2021-06-27-ic-ranking-based-on-partial-correlation-100-features


rfMRI Partial Correlation 100 Features Related to,auroc:mean±std,min,max
IC33,0.6504±0.0116,0.6344,0.665
IC2,0.6446±0.0138,0.6178,0.6637
IC26,0.6438±0.0194,0.6096,0.6699
IC12,0.6327±0.0104,0.6141,0.6584
IC28,0.6318±0.0125,0.6072,0.6533
IC5,0.6200±0.0134,0.5968,0.6455
IC18,0.6144±0.0110,0.5952,0.6374
IC11,0.6137±0.0184,0.5837,0.632
IC9,0.6037±0.0172,0.576,0.6354
IC29,0.6025±0.0097,0.5858,0.6173


In [176]:
def save_ic_statistics(rows):
    with open ("../statistics/ic_performace_statistics.json", "w") as fh:
        json.dump(dict(
            artifact_dir=artifact_dir,
            statistics=rows
        ), fh, indent=4, sort_keys=True)
# save_ic_statistics()

TypeError: save_ic_statistics() missing 1 required positional argument: 'rows'

# Virtual Lesion

In [174]:
_ = pd.read_csv("./resources/rfmri_not_ics_partial_correlation_100.txt", header=None, names=["model"], sep=";")
not_ic_models = _.model.values.tolist()

In [178]:
artifact_dir = "../artifacts/2021-07-24-ic-ranking-by-virtual-lesion"
sorted_rows_for_table, headers, sorted_rows = load_data(
    artifact_dir,
    not_ic_models,
    assert_same_random_seed_commit=True,
    sorting_factor=1
)


plot_table(sorted_rows_for_table[:10], headers)

Statistics from ../artifacts/2021-07-24-ic-ranking-by-virtual-lesion


rfMRI Partial Correlation 100 Features Related to,auroc:mean±std,min,max
NOT_IC26,0.7112±0.0130,0.6918,0.7331
NOT_IC12,0.7117±0.0130,0.6927,0.7391
NOT_IC2,0.7142±0.0125,0.6956,0.734
NOT_IC21,0.7155±0.0131,0.6874,0.735
NOT_IC20,0.7155±0.0148,0.6832,0.7342
NOT_IC18,0.7162±0.0136,0.6881,0.7367
NOT_IC33,0.7162±0.0177,0.6763,0.7387
NOT_IC31,0.7167±0.0141,0.6817,0.7347
NOT_IC5,0.7169±0.0093,0.6999,0.7312
NOT_IC34,0.7178±0.0160,0.6816,0.7386


In [167]:
# Important Remark: 
# - please make sure that the column header is labelled correctly

# artifact_dir = "../artifacts/2021-07-27-ic-ranking-by-auprg-permute-retrain"
# sorted_rows_for_table, headers, sorted_rows = load_data(
#     artifact_dir,
#     not_ic_models,
#     assert_same_random_seed_commit=True,
#     sorting_factor=1
# )

# plot_table(sorted_rows_for_table[:10], headers)

In [168]:
# Important Remark: 
# - please make sure that the column header is labelled correctly


# artifact_dir = "../artifacts/2021-07-29-ic-ranking-by-auprg"
# sorted_rows_for_table, headers, sorted_rows = load_data(
#     artifact_dir,
#     not_ic_models,
#     assert_same_random_seed_commit=True,
#     sorting_factor=1
# )

# plot_table(sorted_rows_for_table[:10], headers)

## Models with AUROC and After Removing Withdrawing Participants

In [180]:
artifact_dir = "../artifacts/2021-09-05-participants-withdraw-and-auroc"
sorted_rows_for_table, headers, sorted_rows = load_data(
    artifact_dir,
    not_ic_models,
    assert_same_random_seed_commit=True,
    sorting_factor=1
)

save_ic_statistics(sorted_rows)

plot_table(sorted_rows_for_table[:10], headers)

Statistics from ../artifacts/2021-09-05-participants-withdraw-and-auroc


rfMRI Partial Correlation 100 Features Related to,auroc:mean±std,min,max
NOT_IC18,0.7089±0.0135,0.6899,0.7361
NOT_IC12,0.7093±0.0092,0.6915,0.7201
NOT_IC26,0.7102±0.0196,0.6712,0.7371
NOT_IC20,0.7124±0.0157,0.6873,0.741
NOT_IC9,0.7134±0.0119,0.6879,0.7274
NOT_IC33,0.7136±0.0131,0.6905,0.7406
NOT_IC2,0.7140±0.0132,0.6892,0.7333
NOT_IC32,0.7140±0.0159,0.6835,0.7354
NOT_IC11,0.7151±0.0146,0.6901,0.7367
NOT_IC39,0.7156±0.0120,0.6962,0.7306
