In [1]:
# Pmotif Defintion Config
# Effect Size
CORR_COEF_THRESHOLD = 0.3
# MWU significance
GLOBAL_ALPHA = 0.01
SIG_THRESH = 0.8

# Motif Definition Detection
Z_SCORE_THRESHOLD = 2.5

In [2]:
from os import listdir
from pathlib import Path

BASE_PATH = Path("_analysis_out")
datasets = sorted(listdir(BASE_PATH))
datasets

['0_barabasi_albert_graph_m_1',
 '0_barabasi_albert_graph_m_2',
 '0_barabasi_albert_graph_m_3',
 '0_ferdos_renyi_graph_m_2000',
 '1_barabasi_albert_graph_m_1',
 '1_barabasi_albert_graph_m_2',
 '1_barabasi_albert_graph_m_3',
 '1_ferdos_renyi_graph_m_2000',
 '2_barabasi_albert_graph_m_1',
 '2_barabasi_albert_graph_m_2',
 '2_barabasi_albert_graph_m_3',
 '2_ferdos_renyi_graph_m_2000',
 '3_barabasi_albert_graph_m_1',
 '3_barabasi_albert_graph_m_2',
 '3_barabasi_albert_graph_m_3',
 '3_ferdos_renyi_graph_m_2000',
 '4_barabasi_albert_graph_m_1',
 '4_barabasi_albert_graph_m_2',
 '4_barabasi_albert_graph_m_3',
 '4_ferdos_renyi_graph_m_2000',
 'human_brain_development_cutoff_0.772.edgelist',
 'human_cancer_cutoff_0.935.edgelist',
 'kaggle_so_tags.edgelist',
 'kaggle_star_wars.edgelist',
 'yeastInter_st.txt']

In [3]:
from pmotif_lib.graphlet_representation import graphlet_classes_from_size, graphlet_class_to_name, get_graphlet_size_from_class

graphlet_classes = list(map(graphlet_class_to_name, graphlet_classes_from_size(4)))
graphlet_classes

['4-Dash',
 'Fork',
 'Spoon',
 'Square',
 'Crossed Square',
 'Double Crossed Square']

In [4]:
graphlet_classes_from_size(4)

['0110 1001 1000 0100',
 '0111 1000 1000 1000',
 '0111 1010 1100 1000',
 '0110 1001 1001 0110',
 '0111 1011 1100 1100',
 '0111 1011 1101 1110']

In [5]:
def load_artifacts_obj(dataset: str, graphlet_size: int):
    print(listdir(BASE_PATH / dataset / "artifacts" / str(graphlet_size)))
     
    motifs = []

load_artifacts_obj(datasets[0], 3)

['report.html', 'local', 'global', 'meta.json']


In [6]:
import json


def get_table_cell(dataset: str, graphlet_class: str, p_metric_name: str):
    """Produces the cell input for the overview table"""

    graphlet_size = get_graphlet_size_from_class(graphlet_class)
    graphlet_class_name = graphlet_class_to_name(graphlet_class)
    base_path = BASE_PATH / dataset / "artifacts" / str(graphlet_size) / "global"
        
    with open(base_path / f"{graphlet_class_name}_frequency_split.json", "r") as f:
        data = json.load(f)
        z_score = data["z-score"]
    is_motif = abs(z_score) > Z_SCORE_THRESHOLD
    
    pmetric_dir = base_path / p_metric_name
    with open(pmetric_dir / f"{graphlet_class_name}_pairwise.json", "r") as f:
        pairwise_data = json.load(f)
        
    if pairwise_data["error"] is not None:
        return {"motif": is_motif}

    corr_coef_relevant = abs(pairwise_data["mean_corr_coef"]) > CORR_COEF_THRESHOLD
    
    usable_graphs = pairwise_data["real_total"]
    local_alpha = GLOBAL_ALPHA / usable_graphs
    relevant_p_values = [p for p in pairwise_data["p-values"] if p > local_alpha]
    
    percentage_relevant = len(relevant_p_values) / usable_graphs
    mwu_relevant = percentage_relevant < SIG_THRESH
    
    # TODO: P-value significance
    
    
    return {"motif": is_motif, "pmotif": mwu_relevant and corr_coef_relevant, "mwu": mwu_relevant, "corr_coef": corr_coef_relevant}

get_table_cell(datasets[-4], "0111 1011 1101 1110", "degree")    

{'motif': True, 'pmotif': True, 'mwu': True, 'corr_coef': True}

In [7]:
import pandas as pd

def create_row(pmetric: str, dataset: str, pmotif: bool=True):
    graphlet_classes = graphlet_classes_from_size(3) + graphlet_classes_from_size(4)
    
    flag = "pmotif" if pmotif else "motif"
    row = {}
    for gc in graphlet_classes:
        try:
            row[gc] = get_table_cell(dataset, gc, pmetric)[flag]
        except FileNotFoundError:
            # Dataset was not processed
            row[gc] = 0.6 # "Failed to process"
        except KeyError:
            # Graphlet Class missing
            row[gc] = 0.3

        row["name"] = dataset
    
    df = pd.DataFrame([row])
    df.set_index("name", inplace=True)
    df = df.replace(False, 0).replace(True, 1)
    return df

df = create_row("degree", 'human_cancer_cutoff_0.935.edgelist')
df.style.background_gradient(cmap='Blues', axis=None)

Unnamed: 0_level_0,011 101 110,011 100 100,0110 1001 1000 0100,0111 1000 1000 1000,0111 1010 1100 1000,0110 1001 1001 0110,0111 1011 1100 1100,0111 1011 1101 1110
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
human_cancer_cutoff_0.935.edgelist,0,0,0,0,0,0,0,1


In [8]:
import pandas as pd


def create_table(pmetric: str, pmotif: bool=True):
    graphlet_classes = graphlet_classes_from_size(3) + graphlet_classes_from_size(4)
    
    flag = "pmotif" if pmotif else "motif"
    rows = [create_row(pmetric, d, pmotif) for d in datasets]
    df = pd.concat(rows)
    df = df.replace(False, 0).replace(True, 1)
    return df

In [9]:
metrics = [
    'graph module participation ratio',
    'degree',
    'min normalized anchor hop distance',
    'max normalized anchor hop distance',
    'mean normalized anchor hop distance',
]

df = create_table(metrics[1])

styler = pd.io.formats.style.Styler(df)
styler.background_gradient(cmap='Reds', axis=None)
styler.set_caption(metrics[1])

styler

Unnamed: 0_level_0,011 101 110,011 100 100,0110 1001 1000 0100,0111 1000 1000 1000,0111 1010 1100 1000,0110 1001 1001 0110,0111 1011 1100 1100,0111 1011 1101 1110
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0_barabasi_albert_graph_m_1,0.3,0,0.0,0.0,0.3,0.3,0.3,0.3
0_barabasi_albert_graph_m_2,0.0,0,0.0,0.0,0.0,0.0,0.0,0.3
0_barabasi_albert_graph_m_3,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
0_ferdos_renyi_graph_m_2000,0.0,0,0.6,0.6,0.6,0.6,0.6,0.6
1_barabasi_albert_graph_m_1,0.3,0,0.0,0.0,0.3,0.3,0.3,0.3
1_barabasi_albert_graph_m_2,0.0,0,0.0,0.0,0.0,0.0,0.0,0.3
1_barabasi_albert_graph_m_3,0.0,0,0.6,0.6,0.6,0.6,0.6,0.6
1_ferdos_renyi_graph_m_2000,0.0,0,0.0,0.0,0.0,0.0,0.3,0.3
2_barabasi_albert_graph_m_1,0.3,0,0.0,0.0,0.3,0.3,0.3,0.3
2_barabasi_albert_graph_m_2,0.0,0,0.0,0.0,0.0,0.0,1.0,0.3


{'0_ferdos_renyi_graph_m_2000',
 '1_barabasi_albert_graph_m_1',
 '1_barabasi_albert_graph_m_2',
 '1_barabasi_albert_graph_m_3',
 '1_ferdos_renyi_graph_m_2000',
 '2_barabasi_albert_graph_m_1',
 '2_barabasi_albert_graph_m_2',
 '2_barabasi_albert_graph_m_3',
 '2_ferdos_renyi_graph_m_2000',
 '3_barabasi_albert_graph_m_1',
 '3_barabasi_albert_graph_m_2',
 '3_barabasi_albert_graph_m_3',
 '3_ferdos_renyi_graph_m_2000',
 '4_barabasi_albert_graph_m_1',
 '4_barabasi_albert_graph_m_2',
 '4_barabasi_albert_graph_m_3',
 '4_ferdos_renyi_graph_m_2000'}