Produce a table oveview on motif, anti-motif, and pmotif detection results for one dataset

In [1]:
# Pmotif Defintion Config
# Effect Size
CORR_COEF_THRESHOLD = 0.3
# MWU significance
GLOBAL_ALPHA = 0.01
SIG_THRESH = 0.8

# Motif Definition Detection
Z_SCORE_THRESHOLD = 2.5

In [2]:
from os import listdir
from pathlib import Path

BASE_PATH = Path("/home/timgarrels/Projects/masterthesis/copied_results_data/prelim_analysis/_analysis_out")

# Added for analysis of enigma dataset
BASE_PATH = Path("/home/timgarrels/pmotif_publication/enigma/out/analyisi_out")

datasets = sorted(listdir(BASE_PATH))
datasets

['enigma_schaefer_200_gtrie.edgelist']

In [3]:
DATASET = "human_brain_development_cutoff_0.772.edgelist"
# DATASET = 'human_cancer_cutoff_0.935.edgelist'
DATASET = "enigma_schaefer_200_gtrie.edgelist"
assert DATASET in datasets

In [4]:
dataset_label_lookup = {
    "human_brain_development_cutoff_0.772.edgelist": r'\brain',
    'human_cancer_cutoff_0.935.edgelist': r'\cancer',
    'enigma_schaefer_200_gtrie.edgelist': "Structural Brain Connectivity Network"
}

In [5]:
from pmotif_lib.graphlet_representation import graphlet_classes_from_size, graphlet_class_to_name, get_graphlet_size_from_class

graphlet_classes = list(map(graphlet_class_to_name, graphlet_classes_from_size(4)))
graphlet_classes

['4-Dash',
 'Fork',
 'Spoon',
 'Square',
 'Crossed Square',
 'Double Crossed Square']

In [6]:
graphlet_classes_from_size(4)

['0110 1001 1000 0100',
 '0111 1000 1000 1000',
 '0111 1010 1100 1000',
 '0110 1001 1001 0110',
 '0111 1011 1100 1100',
 '0111 1011 1101 1110']

In [7]:
def load_artifacts_obj(dataset: str, graphlet_size: int):
    print(listdir(BASE_PATH / dataset / "artifacts" / str(graphlet_size)))
     
    motifs = []

load_artifacts_obj(datasets[0], 3)

['report.html', 'local', 'global', 'meta.json']


In [8]:
import json


def get_table_cell(dataset: str, graphlet_class: str, p_metric_name: str):
    """Produces the cell input for the overview table"""

    graphlet_size = get_graphlet_size_from_class(graphlet_class)
    graphlet_class_name = graphlet_class_to_name(graphlet_class)
    base_path = BASE_PATH / dataset / "artifacts" / str(graphlet_size) / "global"
        
    with open(base_path / f"{graphlet_class_name}_frequency_split.json", "r") as f:
        data = json.load(f)
        z_score = data["z-score"]
    is_motif = z_score > Z_SCORE_THRESHOLD
    is_antimotif = z_score < -Z_SCORE_THRESHOLD
    
    pmetric_dir = base_path / p_metric_name
    with open(pmetric_dir / f"{graphlet_class_name}_pairwise.json", "r") as f:
        pairwise_data = json.load(f)
        
    if pairwise_data["error"] is not None:
        return {"is_motif": is_motif, "is_antimotif": is_antimotif}

    corr_coef_relevant = abs(pairwise_data["mean_corr_coef"]) > CORR_COEF_THRESHOLD
    
    usable_graphs = pairwise_data["real_total"]
    local_alpha = GLOBAL_ALPHA / usable_graphs
    relevant_p_values = [p for p in pairwise_data["p-values"] if p > local_alpha]
    
    percentage_relevant = len(relevant_p_values) / usable_graphs
    mwu_relevant = percentage_relevant < SIG_THRESH
    
    # TODO: P-value significance
    
    
    return {
        "z-score": z_score,
        "is_motif": is_motif,
        "is_antimotif": is_antimotif,
        "pmotif": mwu_relevant and corr_coef_relevant,
        "mwu": mwu_relevant,
        "corr_coef": corr_coef_relevant,
        "r_value": -1 if pairwise_data["mean_corr_coef"] < 0 else 1,
        "corr_coef_value": pairwise_data["mean_corr_coef"],
    }

get_table_cell(datasets[0], "0111 1011 1101 1110", "degree")    

{'z-score': 386.447559033853,
 'is_motif': True,
 'is_antimotif': False,
 'pmotif': False,
 'mwu': True,
 'corr_coef': False,
 'r_value': 1,
 'corr_coef_value': 0.28602598622638703}

In [9]:
import pandas as pd

def create_row(pmetric: str, dataset: str, flag=None):
    graphlet_classes = graphlet_classes_from_size(3) + graphlet_classes_from_size(4)
    
    row = {}
    for gc in graphlet_classes:
        try:
            if flag:
                row[gc] = get_table_cell(dataset, gc, pmetric)[flag]
            else:
                row[gc] = get_table_cell(dataset, gc, pmetric)
        except FileNotFoundError:
            # Dataset was not processed
            row[gc] = 0.6 # "Failed to process"
        except KeyError:
            # Graphlet Class missing
            row[gc] = 0.3

        row["name"] = dataset
    
    df = pd.DataFrame([row])
    df.set_index("name", inplace=True)
    return df

df = create_row("degree", datasets[0])
df.style.background_gradient(cmap='Blues', axis=None)

Unnamed: 0_level_0,011 101 110,011 100 100,0110 1001 1000 0100,0111 1000 1000 1000,0111 1010 1100 1000,0110 1001 1001 0110,0111 1011 1100 1100,0111 1011 1101 1110
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
enigma_schaefer_200_gtrie.edgelist,"{'z-score': 131.4063801844388, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': 1, 'corr_coef_value': 0.11110230063672788}","{'z-score': -131.4063801844388, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.06984397352066143}","{'z-score': -119.86683659058964, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.12371767495280399}","{'z-score': -82.05447861426522, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.16907829985679151}","{'z-score': 44.19056784177688, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.0034134205930217913}","{'z-score': -49.67207660660679, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.10485772855822902}","{'z-score': 107.10205839761323, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': 1, 'corr_coef_value': 0.11670445769064197}","{'z-score': 386.447559033853, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': 1, 'corr_coef_value': 0.28602598622638703}"


In [10]:
def get_dataframe(dataset, flag="mwu"):
    metric_name_lookup = {
        'graph module participation ratio': "MPR",
        'degree': "Degree",
        'min normalized anchor hop distance': "HD (min)",
        'max normalized anchor hop distance': "HD (max)",
        'mean normalized anchor hop distance': "HD (mean)",
    }
    rows = []

    for metric, metric_label in metric_name_lookup.items():
        row = create_row(metric, dataset, flag=flag)
        row[r"\pmetric"] = metric_label
        row.set_index(r"\pmetric", inplace=True)
        rows.append(row)

    motif_row = create_row(metric, dataset, flag="is_motif")
    motif_row[r"\pmetric"] = "Network Motif"
    motif_row.set_index(r"\pmetric", inplace=True)

    antimotif_row = create_row(metric, dataset, flag="is_antimotif")
    antimotif_row[r"\pmetric"] = "Network Anti-Motif"
    antimotif_row.set_index(r"\pmetric", inplace=True)

    rows = [motif_row, antimotif_row] + rows

    df = pd.concat(rows)

    df = df.replace(True, 1).replace(False, 0)
    df = df.rename(graphlet_class_to_name, axis=1)

    p1 = pd.concat([df.iloc[[0]]], keys=['Motif'], names=["Detection"])
    p2 = pd.concat([df.iloc[[1]]], keys=['Anti-Motif'], names=["Detection"])
    p3 = pd.concat([df.iloc[[2, 3, 4, 5, 6]]], keys=[r"\pmotif"], names=["Detection"])

    df = pd.concat([p1, p2, p3])
    df = df.rename(index={'Network Motif': '-', "Network Anti-Motif": "-"})
    
    return df

In [11]:
def turn_styler_to_latex(styler):
    text = styler.to_latex()
    # extract colors
    parts = text.split("#")
    colors = {p[:6] for p in parts[1:]}
    color_definitions = [
        r"\definecolor{" + c + "}{HTML}{" + c + "}"
        for c in colors
    ]
    print("\n".join(color_definitions))
    print()
    
    # Replace commands
    for c in colors:
        text = text.replace(r"\background-color#" + c, r"\cellcolor{" + c + "}")
        text = text.replace(r"\color#" + c, r"\color{" + c + "}")
        
    # Add vertical lines
    text = text.replace(r"begin{tabular}{llr", "begin{tabular}{ll|r")
    # Add horizontal lines
    lines = text.split("\n")
    addadge = "\n" + r"\hline" + "\n"
    
    ne_lines = []
    for l in lines:
        if (r"Detection & \pmetric" in l) or ("Motif & -" in l) or ("Anti-Motif & -" in l):
            l += addadge
        ne_lines.append(l)
    text = "\n".join(ne_lines)
    
    # Add scaling and centering
    text = text.replace(r"\begin{tabular}", r"\begin{adjustbox}{max width=\textwidth}" +"\n" + r"\begin{tabular}")
    text = text.replace(r"\end{tabular}", r"\end{tabular}" + "\n" + r"\end{adjustbox}")
    
    print(text)

In [12]:

df = get_dataframe(DATASET, "mwu")

styler = pd.io.formats.style.Styler(df)
styler.background_gradient(cmap='Greens', axis=None)
styler.set_caption(f"{dataset_label_lookup[DATASET]}, MWU")

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,1,0,0,0,1,0,1,1
Anti-Motif,-,0,1,1,1,0,1,0,0
\pmotif,MPR,1,1,1,1,1,1,1,1
\pmotif,Degree,1,1,1,1,1,1,1,1
\pmotif,HD (min),0,1,1,1,1,1,1,1
\pmotif,HD (max),1,1,1,1,1,1,1,1
\pmotif,HD (mean),1,1,1,1,1,1,1,0


In [13]:

df = get_dataframe(DATASET, "corr_coef")

styler = pd.io.formats.style.Styler(df)
styler.background_gradient(cmap='Greens', axis=None)
styler.set_caption(f"{dataset_label_lookup[DATASET]}, corr_coef")

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,1,0,0,0,1,0,1,1
Anti-Motif,-,0,1,1,1,0,1,0,0
\pmotif,MPR,0,0,0,0,0,0,0,0
\pmotif,Degree,0,0,0,0,0,0,0,0
\pmotif,HD (min),0,0,0,0,0,0,0,0
\pmotif,HD (max),0,1,1,1,1,1,1,1
\pmotif,HD (mean),0,1,1,1,0,0,0,0


In [14]:

df = get_dataframe(DATASET, "pmotif")

styler = pd.io.formats.style.Styler(df)
styler.background_gradient(cmap='Greens', axis=None)
styler.set_caption(r"Overview over the motif and \pmotif classifications in the " + f"{dataset_label_lookup[DATASET]} Dataset")


def brief_graphlet_name(name):
    if name == "Crossed Square":
        return "CSquare"
    if name == "Double Crossed Square":
        return "DCSquare"
    return name

styler.format_index(brief_graphlet_name, axis=1)
styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,CSquare,DCSquare
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,1,0,0,0,1,0,1,1
Anti-Motif,-,0,1,1,1,0,1,0,0
\pmotif,MPR,0,0,0,0,0,0,0,0
\pmotif,Degree,0,0,0,0,0,0,0,0
\pmotif,HD (min),0,0,0,0,0,0,0,0
\pmotif,HD (max),0,1,1,1,1,1,1,1
\pmotif,HD (mean),0,1,1,1,0,0,0,0


In [15]:
turn_styler_to_latex(styler)

\definecolor{f1f1f1}{HTML}{f1f1f1}
\definecolor{f7fcf5}{HTML}{f7fcf5}
\definecolor{00441b}{HTML}{00441b}
\definecolor{000000}{HTML}{000000}

\begin{table}
\caption{Overview over the motif and \pmotif classifications in the Structural Brain Connectivity Network Dataset}
\begin{adjustbox}{max width=\textwidth}
\begin{tabular}{ll|rrrrrrrr}
 &  & Triangle & 3-Dash & 4-Dash & Fork & Spoon & Square & CSquare & DCSquare \\
Detection & \pmetric &  &  &  &  &  &  &  &  \\
\hline

Motif & - & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{f7fcf5} \color{000000} 0 & \cellcolor{f7fcf5} \color{000000} 0 & \cellcolor{f7fcf5} \color{000000} 0 & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{f7fcf5} \color{000000} 0 & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{00441b} \color{f1f1f1} 1 \\
\hline

Anti-Motif & - & \cellcolor{f7fcf5} \color{000000} 0 & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{00441b} \color{f1f1f1} 1 & \cellcolor{f7fcf5} \color{0000

human brain: 35/40 significant mwu, 21/40 effect size significant, 21 pmotifs

In [16]:
for gc in graphlet_classes_from_size(4):
    print(get_table_cell(DATASET, gc, "graph module participation ratio"))

{'z-score': -119.86683659058964, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.14614979583132018}
{'z-score': -82.05447861426522, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.14255740220336796}
{'z-score': 44.19056784177688, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': -1, 'corr_coef_value': -0.0433544884314667}
{'z-score': -49.67207660660679, 'is_motif': False, 'is_antimotif': True, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': 1, 'corr_coef_value': 0.04029926843284813}
{'z-score': 107.10205839761323, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coef': False, 'r_value': 1, 'corr_coef_value': 0.05609013699274258}
{'z-score': 386.447559033853, 'is_motif': True, 'is_antimotif': False, 'pmotif': False, 'mwu': True, 'corr_coe

In [17]:
"""Analyse with r sign!"""
brain = "human_brain_development_cutoff_0.772.edgelist"
cancer = 'human_cancer_cutoff_0.935.edgelist'

same_df = get_dataframe(brain, "r_value") == get_dataframe(cancer, "r_value")

same_df.replace(True, 1).replace(False, 0).style.background_gradient(axis=None)

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,1,1,1,1,1,1,1,1
Anti-Motif,-,1,1,1,1,1,1,1,1
\pmotif,MPR,1,1,1,1,1,1,1,1
\pmotif,Degree,1,1,1,1,1,1,1,1
\pmotif,HD (min),1,1,1,1,1,1,1,1
\pmotif,HD (max),1,1,1,1,1,1,1,1
\pmotif,HD (mean),1,1,1,1,1,1,1,1


In [18]:
get_dataframe(brain, "r_value")

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
Anti-Motif,-,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,MPR,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,Degree,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (min),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (max),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (mean),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6


In [19]:
get_dataframe(cancer, "r_value")

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
Anti-Motif,-,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,MPR,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,Degree,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (min),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (max),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6
\pmotif,HD (mean),0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6


In [20]:
corr_coef_diff_df = (get_dataframe(brain, "corr_coef_value").apply(abs) - get_dataframe(cancer, "corr_coef_value").apply(abs))
corr_coef_diff_df.style.background_gradient("bwr", axis=None)

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Anti-Motif,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\pmotif,MPR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\pmotif,Degree,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\pmotif,HD (min),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\pmotif,HD (max),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
\pmotif,HD (mean),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
differences = [0.195721,
               0.204417, 0.101640, 0.123024, 0.160043, 0.194265, 0.264475,
               0.237215, 0.272027, 0.210343, 0.183526, 0.235685, 0.268464]
from statistics import mean
mean(differences)

0.20391115384615385

In [22]:
corr_coef_diff_df.index

MultiIndex([(     'Motif',         '-'),
            ('Anti-Motif',         '-'),
            (   '\pmotif',       'MPR'),
            (   '\pmotif',    'Degree'),
            (   '\pmotif',  'HD (min)'),
            (   '\pmotif',  'HD (max)'),
            (   '\pmotif', 'HD (mean)')],
           names=['Detection', '\pmetric'])

In [23]:
corr_coef_diff_df[corr_coef_diff_df.columns].loc[(   '\pmotif',       'MPR')].mean()

0.0