Produces a tabel overview over all instances of a random graph configuration, and a summary of all five! 

In [1]:
# Pmotif Defintion Config
# Effect Size
CORR_COEF_THRESHOLD = 0.3
# MWU significance
GLOBAL_ALPHA = 0.01
SIG_THRESH = 0.8

# Motif Definition Detection
Z_SCORE_THRESHOLD = 2.5

In [2]:
from os import listdir
from pathlib import Path
from collections import defaultdict

BASE_PATH = Path("/home/timgarrels/Projects/masterthesis/copied_results_data/prelim_analysis/_analysis_out")
datasets = sorted(listdir(BASE_PATH))

random_datasets = [d for d in datasets if d[0].isnumeric()]
random_datasets

random_groups = defaultdict(list)
for r in random_datasets:
    number, *group = r.split("_")
    group = "_".join(group)
    random_groups[group].append(r)
list(random_groups.keys())

['barabasi_albert_graph_m_1',
 'barabasi_albert_graph_m_2',
 'barabasi_albert_graph_m_3',
 'ferdos_renyi_graph_m_2000',
 'scale_free_graph_a_28_b_7_g_02',
 'scale_free_graph_a_35_b_3_g_35',
 'scale_free_graph_a_65_b_1_g_25']

In [3]:
group_label_lookup = {
    'barabasi_albert_graph_m_1': r"\barabasi $m_1$",
    'barabasi_albert_graph_m_2': r"\barabasi $m_2$",
    'barabasi_albert_graph_m_3': r"\barabasi $m_3$",
    'ferdos_renyi_graph_m_2000': r"\erdos",
    'scale_free_graph_a_28_b_7_g_02': r"\scale{28}",
    'scale_free_graph_a_35_b_3_g_35': r"\scale{35}",
    'scale_free_graph_a_65_b_1_g_25': r"\scale{65}",
}

In [4]:
GROUP1 = "barabasi_albert_graph_m_1"
GROUP2 = "barabasi_albert_graph_m_2"
GROUP3 = "barabasi_albert_graph_m_3"
GROUP4 = "ferdos_renyi_graph_m_2000"
GROUP5 = "scale_free_graph_a_28_b_7_g_02"
GROUP6 = "scale_free_graph_a_65_b_1_g_25"
GROUP7 = "scale_free_graph_a_35_b_3_g_35"

GROUP = GROUP2
assert GROUP in random_groups

In [5]:
from pmotif_lib.graphlet_representation import graphlet_classes_from_size, graphlet_class_to_name, get_graphlet_size_from_class

graphlet_classes = list(map(graphlet_class_to_name, graphlet_classes_from_size(4)))
graphlet_classes

['4-Dash',
 'Fork',
 'Spoon',
 'Square',
 'Crossed Square',
 'Double Crossed Square']

In [6]:
graphlet_classes_from_size(4)

['0110 1001 1000 0100',
 '0111 1000 1000 1000',
 '0111 1010 1100 1000',
 '0110 1001 1001 0110',
 '0111 1011 1100 1100',
 '0111 1011 1101 1110']

In [7]:
def load_artifacts_obj(dataset: str, graphlet_size: int):
    print(listdir(BASE_PATH / dataset / "artifacts" / str(graphlet_size)))
     
    motifs = []

load_artifacts_obj(datasets[0], 3)

['report.html', 'local', 'global', 'meta.json']


In [8]:
import json


def get_table_cell(dataset: str, graphlet_class: str, p_metric_name: str):
    """Produces the cell input for the overview table"""

    graphlet_size = get_graphlet_size_from_class(graphlet_class)
    graphlet_class_name = graphlet_class_to_name(graphlet_class)
    base_path = BASE_PATH / dataset / "artifacts" / str(graphlet_size) / "global"
        
    with open(base_path / f"{graphlet_class_name}_frequency_split.json", "r") as f:
        data = json.load(f)
        z_score = data["z-score"]
    is_motif = z_score > Z_SCORE_THRESHOLD
    is_antimotif = z_score < -Z_SCORE_THRESHOLD
    
    pmetric_dir = base_path / p_metric_name
    with open(pmetric_dir / f"{graphlet_class_name}_pairwise.json", "r") as f:
        pairwise_data = json.load(f)
        
    if pairwise_data["error"] is not None:
        return {"is_motif": is_motif, "is_antimotif": is_antimotif}

    corr_coef_relevant = abs(pairwise_data["mean_corr_coef"]) > CORR_COEF_THRESHOLD
    
    usable_graphs = pairwise_data["real_total"]
    local_alpha = GLOBAL_ALPHA / usable_graphs
    relevant_p_values = [p for p in pairwise_data["p-values"] if p > local_alpha]
    
    percentage_relevant = len(relevant_p_values) / usable_graphs
    mwu_relevant = percentage_relevant < SIG_THRESH
    
    # TODO: P-value significance
    
    
    return {"is_motif": is_motif, "is_antimotif": is_antimotif, "pmotif": mwu_relevant and corr_coef_relevant, "mwu": mwu_relevant, "corr_coef": corr_coef_relevant}

In [9]:
import pandas as pd

def create_row(pmetric: str, dataset: str, flag=None):
    """Collects the data of one pmetric and dataset in a dataframe"""
    graphlet_classes = graphlet_classes_from_size(3) + graphlet_classes_from_size(4)
    
    row = {}
    for gc in graphlet_classes:
        try:
            if flag:
                row[gc] = get_table_cell(dataset, gc, pmetric)[flag]
            else:
                row[gc] = get_table_cell(dataset, gc, pmetric)
        except FileNotFoundError:
            # Dataset was not processed
            row[gc] = 0.6 # "Failed to process"
        except KeyError:
            # Graphlet Class missing
            row[gc] = 0.3

        row["name"] = dataset
    
    df = pd.DataFrame([row])
    df.set_index("name", inplace=True)
    return df

In [10]:
def create_dataframe(group: str):
    metric_name_lookup = {
        'graph module participation ratio': "MPR",
        'degree': "Degree",
        'min normalized anchor hop distance': "HD (min)",
        'max normalized anchor hop distance': "HD (max)",
        'mean normalized anchor hop distance': "HD (mean)",
    }

    dfs = []
    for d in random_groups[group]:

        rows = []
        # Metric Rows
        for metric, metric_label in metric_name_lookup.items():
            row = create_row(metric, d, flag="pmotif")
            row[r"\pmetric"] = metric_label
            row.set_index(r"\pmetric", inplace=True)
            rows.append(row)

        # Motif Row
        motif_row = create_row(metric, d, flag="is_motif")
        motif_row[r"\pmetric"] = "Network Motif"
        motif_row.set_index(r"\pmetric", inplace=True)

        # Anti Motif Row
        antimotif_row = create_row(metric, d, flag="is_antimotif")
        antimotif_row[r"\pmetric"] = "Network Anti-Motif"
        antimotif_row.set_index(r"\pmetric", inplace=True)

        rows = [motif_row, antimotif_row] + rows

        df = pd.concat(rows)
        dfs.append(df)


    df = pd.concat(dfs, keys=random_groups[group])
    df = df.replace(True, 1).replace(False, 0)
    df = df.rename(graphlet_class_to_name, axis=1)
    return df

In [11]:
def turn_styler_to_latex(styler):
    text = styler.to_latex()
    # extract colors
    parts = text.split("#")
    colors = {p[:6] for p in parts[1:]}
    color_definitions = [
        r"\definecolor{" + c + "}{HTML}{" + c + "}"
        for c in colors
    ]
    print("\n".join(color_definitions))
    print()
    
    # Add table placment
    text = text.replace(r"\begin{table}", r"\begin{table}[htb]")
    
    # Replace commands
    for c in colors:
        text = text.replace(r"\background-color#" + c, r"\cellcolor{" + c + "}")
        text = text.replace(r"\color#" + c, r"\color{" + c + "}")
        
    # Add vertical lines
    text = text.replace(r"begin{tabular}{lll", "begin{tabular}{ll|l")
    # Add horizontal lines
    lines = text.split("\n")
    addadge = "\n" + r"\hline" + "\n"
    
    ne_lines = []
    for l in lines:
        if (r"Detection & \pmetric" in l) or ("Motif & -" in l) or ("Anti-Motif & -" in l):
            l += addadge
        ne_lines.append(l)
    text = "\n".join(ne_lines)
    
    # Add scaling and centering
    text = text.replace(r"\begin{tabular}", r"\begin{adjustbox}{max width=\textwidth}" +"\n" + r"\begin{tabular}")
    text = text.replace(r"\end{tabular}", r"\end{tabular}" + "\n" + r"\end{adjustbox}")
    
    print(text)

In [12]:
from pandas.io.formats import style as pd_style

df = create_dataframe(GROUP)

# df = df[["3-Dash", "4-Dash", "Fork"]]  # m1
# df = df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]  # m2
# df = df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square']]  # erdos

styler = pd_style.Styler(df)
styler.background_gradient(cmap='Greens', axis=None)
styler.set_caption(f"{GROUP}")

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Unnamed: 0_level_1,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0_barabasi_albert_graph_m_2,Network Motif,0,1,0,1,0,0,0,0.0
0_barabasi_albert_graph_m_2,Network Anti-Motif,1,0,1,0,1,1,0,0.0
0_barabasi_albert_graph_m_2,MPR,0,0,0,0,0,0,0,0.3
0_barabasi_albert_graph_m_2,Degree,0,0,0,0,0,0,0,0.3
0_barabasi_albert_graph_m_2,HD (min),0,0,0,0,0,0,0,0.3
0_barabasi_albert_graph_m_2,HD (max),1,0,0,1,1,0,1,0.3
0_barabasi_albert_graph_m_2,HD (mean),1,1,1,1,1,1,0,0.3
1_barabasi_albert_graph_m_2,Network Motif,0,1,0,1,0,0,0,0.0
1_barabasi_albert_graph_m_2,Network Anti-Motif,1,0,0,0,1,1,0,0.0
1_barabasi_albert_graph_m_2,MPR,0,1,0,1,0,0,0,0.3


In [13]:
def brief_graphlet_name(name):
    if name == "Crossed Square":
        return "CSquare"
    if name == "Double Crossed Square":
        return "DCSquare"
    return name

In [14]:
# ORIGINAL IDEA
sum_df = df.groupby(r"\pmetric", sort=False).agg(lambda x: sum(x) / 5)

p1 = pd.concat([sum_df.iloc[[0]]], keys=['Motif'], names=["Detection"])
p2 = pd.concat([sum_df.iloc[[1]]], keys=['Anti-Motif'], names=["Detection"])
p3 = pd.concat([sum_df.iloc[[2, 3, 4, 5, 6]]], keys=[r"\pmotif"], names=["Detection"])

sum_df = pd.concat([p1, p2, p3])
sum_df = sum_df.rename(index={'Network Motif': '-', "Network Anti-Motif": "-"})

if 1:
    if GROUP == "barabasi_albert_graph_m_1":
        sum_df = sum_df[["3-Dash", "4-Dash", "Fork"]]
    elif GROUP == "barabasi_albert_graph_m_2":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]
    elif GROUP == "ferdos_renyi_graph_m_2000":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square']]
    elif GROUP == "scale_free_graph_a_65_b_1_g_25":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]

styler = pd_style.Styler(sum_df)
styler.background_gradient(cmap='Greens', axis=None, vmin=0, vmax=1)

prefix = r"Summary of the motif and \pmotif detection results of the five graphs generated by the "
styler.set_caption(prefix+ f"{group_label_lookup[GROUP]} model.")
styler.format(formatter=lambda x: f"{int(x * 5)} / 5")
styler.format_index(brief_graphlet_name, axis=1)

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,CSquare
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Motif,-,0 / 5,3 / 5,0 / 5,2 / 5,0 / 5,0 / 5,0 / 5
Anti-Motif,-,3 / 5,0 / 5,1 / 5,0 / 5,2 / 5,5 / 5,0 / 5
\pmotif,MPR,0 / 5,2 / 5,0 / 5,2 / 5,0 / 5,0 / 5,1 / 5
\pmotif,Degree,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,3 / 5
\pmotif,HD (min),0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5
\pmotif,HD (max),2 / 5,0 / 5,1 / 5,3 / 5,2 / 5,1 / 5,3 / 5
\pmotif,HD (mean),2 / 5,2 / 5,2 / 5,3 / 5,2 / 5,3 / 5,2 / 5


# Visualization

In [15]:
# Testing visualization Possibilities
def get_sum_df(group):
    df = create_dataframe(group)
    sum_df = df.groupby(r"\pmetric", sort=False).agg(list)

    p1 = pd.concat([sum_df.iloc[[0]]], keys=['Motif'], names=["Detection"])
    p2 = pd.concat([sum_df.iloc[[1]]], keys=['Anti-Motif'], names=["Detection"])
    p3 = pd.concat([sum_df.iloc[[2, 3, 4, 5, 6]]], keys=[r"\pmotif"], names=["Detection"])

    sum_df = pd.concat([p1, p2, p3])
    sum_df = sum_df.rename(index={'Network Motif': '-', "Network Anti-Motif": "-"})
    return sum_df

In [16]:
def add_text_styler(group, filter_cols=None):
    sum_df = get_sum_df(group)
    if filter_cols:
        sum_df = sum_df[filter_cols]

    text_styler = pd_style.Styler(sum_df)
    prefix = r"Summary of the motif and \pmotif detection results of the five graphs generated by the "
    text_styler.set_caption(prefix+ f"{group_label_lookup[GROUP]} model.")

    def text_format(l):
        valids = [e for e in l if e == 1 or e == 0]
        if len(valids) == 0:
            return "-"

        nominator = len(valids)
        if len(valids) != 5:
            # Different nominator!
            nominator = r"\textbf{" + str(nominator) + "}"
            #nominator = f"<b>{nominator}</b>"

        return f"{int(sum(valids))} / {nominator}"

    text_styler.format(formatter=text_format)
    text_styler.format_index(brief_graphlet_name, axis=1)
    return text_styler

In [17]:
"""Adds the color styler"""
from matplotlib import colormaps, colors
import numpy as np

def fraction_creation(l):
    valids = [e for e in l if e == 1 or e == 0]
    if len(valids) == 0:
        return None
    return int(sum(valids)) / len(valids)

def background_color(s, cmap='PuBu', low=0, high=0, filter_cols=None):
    sum_df = get_sum_df(GROUP)
    if filter_cols:
        sum_df = sum_df[filter_cols]

    a = sum_df.applymap(fraction_creation).fillna(np.nan)
        
    rng = df.max().max() - df.min().min()
    norm = colors.Normalize(df.min().min() - (rng * low),
                        df.max().max() + (rng * high))
    normed = norm(a.values)
    
    css_rows = []
    for color_row in normed:
        css = []
        for rgb_value in color_row:
            if np.isnan(rgb_value):
                hex_value = "#d3d3d3"
            else:
                hex_value = colors.rgb2hex(colormaps[cmap](rgb_value))

            css.append(f'background-color: {hex_value}')
        css_rows.append(css)
    
    return np.array(css_rows)

def text_color(s, filter_cols=None):
    sum_df = get_sum_df(GROUP)
    if filter_cols:
        sum_df = sum_df[filter_cols]
    a = sum_df.applymap(fraction_creation).fillna(np.nan)
    
    
    def get_color(x):
        if np.isnan(x):
            return "color: #000000"
        if x > 0.5:
            return "color: #ffffff"
        return "color: #000000"

    colors = np.array([
        list(map(get_color, row))
        for row in a.values
    ])
    return colors

In [18]:
GROUP = GROUP2
filter_cols = None
# filter_cols = ["Triangle", "3-Dash"]
text_styler = add_text_styler(GROUP, filter_cols=filter_cols)

text_styler.apply(background_color, cmap='Greens', axis=None, filter_cols=filter_cols)
text_styler.apply(text_color, axis=None, filter_cols=filter_cols)

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,CSquare,DCSquare
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Motif,-,0 / 5,3 / 5,0 / 5,2 / 5,0 / 5,0 / 5,0 / 5,0 / 5
Anti-Motif,-,3 / 5,0 / 5,1 / 5,0 / 5,2 / 5,5 / 5,0 / 5,0 / 5
\pmotif,MPR,0 / 5,2 / 5,0 / 5,2 / 5,0 / 5,0 / 5,1 / 5,-
\pmotif,Degree,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,3 / 5,-
\pmotif,HD (min),0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,0 / 5,-
\pmotif,HD (max),2 / 5,0 / 5,1 / 5,3 / 5,2 / 5,1 / 5,3 / 5,-
\pmotif,HD (mean),2 / 5,2 / 5,2 / 5,3 / 5,2 / 5,3 / 5,2 / 5,-


In [19]:
turn_styler_to_latex(text_styler)

\definecolor{f7fcf5}{HTML}{f7fcf5}
\definecolor{4bb062}{HTML}{4bb062}
\definecolor{00441b}{HTML}{00441b}
\definecolor{ffffff}{HTML}{ffffff}
\definecolor{d3eecd}{HTML}{d3eecd}
\definecolor{000000}{HTML}{000000}
\definecolor{98d594}{HTML}{98d594}
\definecolor{d3d3d3}{HTML}{d3d3d3}

\begin{table}[htb]
\caption{Summary of the motif and \pmotif detection results of the five graphs generated by the \barabasi $m_2$ model.}
\begin{adjustbox}{max width=\textwidth}
\begin{tabular}{ll|llllllll}
 &  & Triangle & 3-Dash & 4-Dash & Fork & Spoon & Square & CSquare & DCSquare \\
Detection & \pmetric &  &  &  &  &  &  &  &  \\
\hline

Motif & - & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{4bb062} \color{ffffff} 3 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{98d594} \color{000000} 2 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 \\
\hline

Anti-Motif & - & 