Produces a tabel overview over all instances of a random graph configuration, and a summary of all five! 

In [1]:
# Pmotif Defintion Config
# Effect Size
CORR_COEF_THRESHOLD = 0.3
# MWU significance
GLOBAL_ALPHA = 0.01
SIG_THRESH = 0.8

# Motif Definition Detection
Z_SCORE_THRESHOLD = 2.5

In [2]:
from os import listdir
from pathlib import Path
from collections import defaultdict

BASE_PATH = Path("/home/timgarrels/masterthesis/copied_results_data/prelim_analysis/_analysis_out")
datasets = sorted(listdir(BASE_PATH))

random_datasets = [d for d in datasets if d[0].isnumeric()]
random_datasets

random_groups = defaultdict(list)
for r in random_datasets:
    number, *group = r.split("_")
    group = "_".join(group)
    random_groups[group].append(r)
list(random_groups.keys())

['barabasi_albert_graph_m_1',
 'barabasi_albert_graph_m_2',
 'barabasi_albert_graph_m_3',
 'ferdos_renyi_graph_m_2000',
 'scale_free_graph_a_28_b_7_g_02',
 'scale_free_graph_a_35_b_3_g_35',
 'scale_free_graph_a_65_b_1_g_25']

In [3]:
group_label_lookup = {
    'barabasi_albert_graph_m_1': r"\barabasi $m_1$",
    'barabasi_albert_graph_m_2': r"\barabasi $m_2$",
    'barabasi_albert_graph_m_3': r"\barabasi $m_3$",
    'ferdos_renyi_graph_m_2000': r"\erdos",
    'scale_free_graph_a_28_b_7_g_02': r"\scale{28}",
    'scale_free_graph_a_35_b_3_g_35': r"\scale{35}",
    'scale_free_graph_a_65_b_1_g_25': r"\scale{65}",
}

In [4]:
GROUP1 = "barabasi_albert_graph_m_1"
GROUP2 = "barabasi_albert_graph_m_2"
GROUP3 = "barabasi_albert_graph_m_3"
GROUP4 = "ferdos_renyi_graph_m_2000"
GROUP5 = "scale_free_graph_a_28_b_7_g_02"
GROUP6 = "scale_free_graph_a_65_b_1_g_25"
GROUP7 = "scale_free_graph_a_35_b_3_g_35"

GROUP = GROUP1
assert GROUP in random_groups

In [5]:
from pmotif_lib.graphlet_representation import graphlet_classes_from_size, graphlet_class_to_name, get_graphlet_size_from_class

graphlet_classes = list(map(graphlet_class_to_name, graphlet_classes_from_size(4)))
graphlet_classes

['4-Dash',
 'Fork',
 'Spoon',
 'Square',
 'Crossed Square',
 'Double Crossed Square']

In [6]:
graphlet_classes_from_size(4)

['0110 1001 1000 0100',
 '0111 1000 1000 1000',
 '0111 1010 1100 1000',
 '0110 1001 1001 0110',
 '0111 1011 1100 1100',
 '0111 1011 1101 1110']

In [7]:
def load_artifacts_obj(dataset: str, graphlet_size: int):
    print(listdir(BASE_PATH / dataset / "artifacts" / str(graphlet_size)))
     
    motifs = []

load_artifacts_obj(datasets[0], 3)

['report.html', 'local', 'global', 'meta.json']


In [8]:
import json


def get_table_cell(dataset: str, graphlet_class: str, p_metric_name: str):
    """Produces the cell input for the overview table"""

    graphlet_size = get_graphlet_size_from_class(graphlet_class)
    graphlet_class_name = graphlet_class_to_name(graphlet_class)
    base_path = BASE_PATH / dataset / "artifacts" / str(graphlet_size) / "global"
        
    with open(base_path / f"{graphlet_class_name}_frequency_split.json", "r") as f:
        data = json.load(f)
        z_score = data["z-score"]
    is_motif = z_score > Z_SCORE_THRESHOLD
    is_antimotif = z_score < -Z_SCORE_THRESHOLD
    
    pmetric_dir = base_path / p_metric_name
    with open(pmetric_dir / f"{graphlet_class_name}_pairwise.json", "r") as f:
        pairwise_data = json.load(f)
        
    if pairwise_data["error"] is not None:
        return {"is_motif": is_motif, "is_antimotif": is_antimotif}

    corr_coef_relevant = abs(pairwise_data["mean_corr_coef"]) > CORR_COEF_THRESHOLD
    
    usable_graphs = pairwise_data["real_total"]
    local_alpha = GLOBAL_ALPHA / usable_graphs
    relevant_p_values = [p for p in pairwise_data["p-values"] if p > local_alpha]
    
    percentage_relevant = len(relevant_p_values) / usable_graphs
    mwu_relevant = percentage_relevant < SIG_THRESH
    
    # TODO: P-value significance
    
    
    return {"is_motif": is_motif, "is_antimotif": is_antimotif, "pmotif": mwu_relevant and corr_coef_relevant, "mwu": mwu_relevant, "corr_coef": corr_coef_relevant}

In [9]:
import pandas as pd

def create_row(pmetric: str, dataset: str, flag=None):
    """Collects the data of one pmetric and dataset in a dataframe"""
    graphlet_classes = graphlet_classes_from_size(3) + graphlet_classes_from_size(4)
    
    row = {}
    for gc in graphlet_classes:
        try:
            if flag:
                row[gc] = get_table_cell(dataset, gc, pmetric)[flag]
            else:
                row[gc] = get_table_cell(dataset, gc, pmetric)
        except FileNotFoundError:
            # Dataset was not processed
            row[gc] = 0.6 # "Failed to process"
        except KeyError:
            # Graphlet Class missing
            row[gc] = 0.3

        row["name"] = dataset
    
    df = pd.DataFrame([row])
    df.set_index("name", inplace=True)
    return df

In [10]:
def create_dataframe(group: str):
    metric_name_lookup = {
        'graph module participation ratio': "MPR",
        'degree': "Degree",
        'min normalized anchor hop distance': "HD (min)",
        'max normalized anchor hop distance': "HD (max)",
        'mean normalized anchor hop distance': "HD (mean)",
    }

    dfs = []
    for d in random_groups[group]:

        rows = []
        # Metric Rows
        for metric, metric_label in metric_name_lookup.items():
            row = create_row(metric, d, flag="pmotif")
            row[r"\pmetric"] = metric_label
            row.set_index(r"\pmetric", inplace=True)
            rows.append(row)

        # Motif Row
        motif_row = create_row(metric, d, flag="is_motif")
        motif_row[r"\pmetric"] = "Network Motif"
        motif_row.set_index(r"\pmetric", inplace=True)

        # Anti Motif Row
        antimotif_row = create_row(metric, d, flag="is_antimotif")
        antimotif_row[r"\pmetric"] = "Network Anti-Motif"
        antimotif_row.set_index(r"\pmetric", inplace=True)

        rows = [motif_row, antimotif_row] + rows

        df = pd.concat(rows)
        dfs.append(df)


    df = pd.concat(dfs, keys=random_groups[group])
    df = df.replace(True, 1).replace(False, 0)
    df = df.rename(graphlet_class_to_name, axis=1)
    return df

In [11]:
def turn_styler_to_latex(styler):
    text = styler.to_latex()
    # extract colors
    parts = text.split("#")
    colors = {p[:6] for p in parts[1:]}
    color_definitions = [
        r"\definecolor{" + c + "}{HTML}{" + c + "}"
        for c in colors
    ]
    print("\n".join(color_definitions))
    print()
    
    # Replace commands
    for c in colors:
        text = text.replace(r"\background-color#" + c, r"\cellcolor{" + c + "}")
        text = text.replace(r"\color#" + c, r"\color{" + c + "}")
        
    # Add vertical lines
    text = text.replace(r"begin{tabular}{llr", "begin{tabular}{ll|r")
    # Add horizontal lines
    lines = text.split("\n")
    addadge = "\n" + r"\hline" + "\n"
    
    ne_lines = []
    for l in lines:
        if (r"Detection & \pmetric" in l) or ("Motif & -" in l) or ("Anti-Motif & -" in l):
            l += addadge
        ne_lines.append(l)
    text = "\n".join(ne_lines)
    
    # Add scaling and centering
    text = text.replace(r"\begin{tabular}", r"\begin{adjustbox}{max width=\textwidth}" +"\n" + r"\begin{tabular}")
    text = text.replace(r"\end{tabular}", r"\end{tabular}" + "\n" + r"\end{adjustbox}")
    
    print(text)

In [12]:
from pandas.io.formats import style as pd_style

df = create_dataframe(GROUP)

# df = df[["3-Dash", "4-Dash", "Fork"]]  # m1
# df = df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]  # m2
# df = df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square']]  # erdos

styler = pd_style.Styler(df)
styler.background_gradient(cmap='Greens', axis=None)
styler.set_caption(f"{GROUP}")

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,Triangle,3-Dash,4-Dash,Fork,Spoon,Square,Crossed Square,Double Crossed Square
Unnamed: 0_level_1,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0_barabasi_albert_graph_m_1,Network Motif,0.0,1,0,1,0.0,0.0,0.0,0.0
0_barabasi_albert_graph_m_1,Network Anti-Motif,1.0,0,0,0,1.0,1.0,0.0,0.0
0_barabasi_albert_graph_m_1,MPR,0.3,1,1,1,0.3,0.3,0.3,0.3
0_barabasi_albert_graph_m_1,Degree,0.3,0,0,0,0.3,0.3,0.3,0.3
0_barabasi_albert_graph_m_1,HD (min),0.3,1,0,0,0.3,0.3,0.3,0.3
0_barabasi_albert_graph_m_1,HD (max),0.3,0,1,1,0.3,0.3,0.3,0.3
0_barabasi_albert_graph_m_1,HD (mean),0.3,0,0,0,0.3,0.3,0.3,0.3
1_barabasi_albert_graph_m_1,Network Motif,0.0,1,0,1,0.0,0.0,0.0,0.0
1_barabasi_albert_graph_m_1,Network Anti-Motif,1.0,0,0,0,1.0,1.0,0.0,0.0
1_barabasi_albert_graph_m_1,MPR,0.3,1,1,1,0.3,0.3,0.3,0.3


In [13]:
def brief_graphlet_name(name):
    if name == "Crossed Square":
        return "CSquare"
    if name == "Double Crossed Square":
        return "DCSquare"
    return name

In [14]:
sum_df = df.groupby(r"\pmetric", sort=False).agg(lambda x: sum(x) / 5)

p1 = pd.concat([sum_df.iloc[[0]]], keys=['Motif'], names=["Detection"])
p2 = pd.concat([sum_df.iloc[[1]]], keys=['Anti-Motif'], names=["Detection"])
p3 = pd.concat([sum_df.iloc[[2, 3, 4, 5, 6]]], keys=[r"\pmotif"], names=["Detection"])

sum_df = pd.concat([p1, p2, p3])
sum_df = sum_df.rename(index={'Network Motif': '-', "Network Anti-Motif": "-"})

if 1:
    if GROUP == "barabasi_albert_graph_m_1":
        sum_df = sum_df[["3-Dash", "4-Dash", "Fork"]]
    elif GROUP == "barabasi_albert_graph_m_2":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]
    elif GROUP == "ferdos_renyi_graph_m_2000":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square']]
    elif GROUP == "scale_free_graph_a_65_b_1_g_25":
        sum_df = sum_df[['Triangle', '3-Dash', '4-Dash', 'Fork', 'Spoon', 'Square', 'Crossed Square',]]

styler = pd_style.Styler(sum_df)
styler.background_gradient(cmap='Greens', axis=None, vmin=0, vmax=1)

prefix = r"Summary of the motif and \pmotif detection results of the five graphs generated by the "
styler.set_caption(prefix+ f"{group_label_lookup[GROUP]} model.")
styler.format(formatter=lambda x: f"{int(x * 5)} / 5")
styler.format_index(brief_graphlet_name, axis=1)

styler

Unnamed: 0_level_0,Unnamed: 1_level_0,3-Dash,4-Dash,Fork
Detection,\pmetric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Motif,-,5 / 5,0 / 5,5 / 5
Anti-Motif,-,0 / 5,3 / 5,0 / 5
\pmotif,MPR,5 / 5,5 / 5,5 / 5
\pmotif,Degree,0 / 5,0 / 5,0 / 5
\pmotif,HD (min),2 / 5,1 / 5,1 / 5
\pmotif,HD (max),0 / 5,1 / 5,2 / 5
\pmotif,HD (mean),0 / 5,1 / 5,3 / 5


In [15]:
turn_styler_to_latex(styler)

\definecolor{f7fcf5}{HTML}{f7fcf5}
\definecolor{000000}{HTML}{000000}
\definecolor{4bb062}{HTML}{4bb062}
\definecolor{f1f1f1}{HTML}{f1f1f1}
\definecolor{d3eecd}{HTML}{d3eecd}
\definecolor{98d594}{HTML}{98d594}
\definecolor{00441b}{HTML}{00441b}

\begin{table}
\caption{Summary of the motif and \pmotif detection results of the five graphs generated by the \barabasi $m_1$ model.}
\begin{adjustbox}{max width=\textwidth}
\begin{tabular}{ll|rrr}
 &  & 3-Dash & 4-Dash & Fork \\
Detection & \pmetric &  &  &  \\
\hline

Motif & - & \cellcolor{00441b} \color{f1f1f1} 5 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{00441b} \color{f1f1f1} 5 / 5 \\
\hline

Anti-Motif & - & \cellcolor{f7fcf5} \color{000000} 0 / 5 & \cellcolor{4bb062} \color{f1f1f1} 3 / 5 & \cellcolor{f7fcf5} \color{000000} 0 / 5 \\
\hline

\multirow[c]{5}{*}{\pmotif} & MPR & \cellcolor{00441b} \color{f1f1f1} 5 / 5 & \cellcolor{00441b} \color{f1f1f1} 5 / 5 & \cellcolor{00441b} \color{f1f1f1} 5 / 5 \\
 & Degree & \cellcolo

# m_3Specifics

In [16]:
# Only 3-Graphlets
styler = pd_style.Styler(sum_df[["Triangle", "3-Dash"]])
styler.background_gradient(cmap='Greens', axis=None, vmin=0, vmax=1)

prefix = r"Summary of the motif and \pmotif detection results on $3$-graphlets of the five graphs generated by the "
styler.set_caption(prefix+ f"{group_label_lookup[GROUP]} model.")
styler.format(formatter=lambda x: f"{int(x * 5)} / 5")
styler.format_index(brief_graphlet_name, axis=1)

styler

KeyError: "['Triangle'] not in index"

In [None]:
turn_styler_to_latex(styler)

In [None]:
# m3 specialized view
df = create_dataframe(GROUP)
df = df.drop(index=[
    "1_barabasi_albert_graph_m_3",
    "2_barabasi_albert_graph_m_3",
    "3_barabasi_albert_graph_m_3",
])
df = df.drop(columns=["Triangle", "3-Dash"])

sum_df = df.groupby(r"\pmetric", sort=False).agg(lambda x: sum(x) / 2)

p1 = pd.concat([sum_df.iloc[[0]]], keys=['Motif'], names=["Detection"])
p2 = pd.concat([sum_df.iloc[[1]]], keys=['Anti-Motif'], names=["Detection"])
p3 = pd.concat([sum_df.iloc[[2, 3, 4, 5, 6]]], keys=[r"\pmotif"], names=["Detection"])

sum_df = pd.concat([p1, p2, p3])
sum_df = sum_df.rename(index={'Network Motif': '-', "Network Anti-Motif": "-"})

styler = pd_style.Styler(sum_df)
styler.background_gradient(cmap='Greens', axis=None, vmin=0, vmax=1)

prefix = r"Summary of the motif and \pmotif detection results on $4$-graphlets of two of the five graphs generated by the "
styler.set_caption(prefix+ f"{group_label_lookup[GROUP]} model.")
styler.format(formatter=lambda x: f"{int(x * 2)} / 2")
styler.format_index(brief_graphlet_name, axis=1)

styler

In [None]:
turn_styler_to_latex(styler)

# Barabasi Alber M3

Unable to collect data for k=4 for all but 1 version

3-Graphlets: Always Anti-Motifs
Only Pmotifs: Graph Module Participation for both (0_m3), max for Triangle (3_m3)

4-Graphlets, only for 0_m3: only pmotifs are graph module participation ratio, all except Double Crossed Square

# Erdos Renyi m2000

Usually does not contain crossed square and double crossed square instances.
All gaphlets are anti-motifs

The only pmotifs are with
- graph module participation ratio, once for 3-Dash, 4-Dash, Fork (1), and once for Spoon (4)
- degree with Crossed Square (0), mean with Crossed Square (0)

# Barabasi Albert M2
Motifs differ:
- 2 have as Motifs (3-Dash, Fork), rest anti-motif
- 2 have all anti-motif
- 1 has all anti-motif except 3-Dash

Pmotifs:
- None has a double crossed square
- graph module participation ratio: 3 with none, 1 with 3-Dash and Fork, 1 with 3-Dash, Fork and Crossed Square
- degree: 3 with only crossed square, 2 with none
- min: 5 with none
- max: 1 with none, 1 with Fork, Spoon, Crossed-Square, 1 with crossed square, 1 with triangle, 4-Dash, Fork, Square, 1 with Triangle, Fork, Spoon, Crossed Square
- mean: 1 with all except Crossed Square, 1 with all, 1 with only crossed square, 1 with Fork and Square, 1 with none

# Barabasi Albert M1
m1 is a tree, no instances of graphlets with circles possible (only 3-Dash, 4-Dash, Fork)
All five instances compared:

Same Motifs/Anti-Motifs (Motif: 3-Dash, Fork, Anti-Motif:  	4-Dash 	)

Pmotifs
- Graph MOdule Participation Ratio: All three (3-Dash 	4-Dash 	Fork)
- Degree: All three (3-Dash 	4-Dash 	Fork)
- Min: 3 with none, 1 with all, 1 with only 3-Dash pmotif
- Max: 3 with none, 1 with 4-Dash and Fork pmotif, 1 with Fork pmotif
- mean: 2 with none, 1 with 4-Dash and Fork, 2 with fork

