In [None]:
# Loading of Env Vars to enable parameterized command line usage
import os

GRAPHLET_SIZE = os.environ.get('GRAPHLET_SIZE', 3)
DATASET = os.environ.get('DATASET', "yeastInter_st.txt")
EXPERIMENT_OUT = os.environ.get('EXPERIMENT_OUT', "yeastInter_st")
METRIC_NAME = os.environ.get('METRIC_NAME', "degree")

In [None]:
from pmotifs.analysis_utilities.metric_consolidation import metrics

potential_metrics = metrics.keys()
assert METRIC_NAME in potential_metrics

In [None]:
from pmotifs.analysis_utilities.loading import Result
from pmotifs.config import config

r = Result.load_result(
    config.DATASET_DIRECTORY /  DATASET,
    config.EXPERIMENT_OUT / EXPERIMENT_OUT,
    GRAPHLET_SIZE,
)

g = r.pmotif_graph
df = r.positional_metric_df

In [None]:
graphlet_classes = set(df["graphlet_class"])
graphlet_classes

In [None]:
from typing import Union

from pmotifs.GraphletPositionalMetrics import GraphletPositionalMetrics


def get_positional_metric(result: Result) -> Union[int, float]:
    """A wrapper to consolidate metrics"""
    return metrics[METRIC_NAME](result)

In [None]:
df[METRIC_NAME] = get_positional_metric(r)

# Graphlet Occurrence Choice

In [None]:
CHOICE = df.iloc[10110]["nodes"]
CHOICE

In [None]:
chosen_occurrence = df[df["nodes"].isin([CHOICE])]
assert len(chosen_occurrence) == 1, "CHOICE does not specify exactly one row!"

# Analysis

In [None]:
chosen_occurrence

In [None]:
metrics = df[METRIC_NAME].astype(float)
pivot = float(chosen_occurrence[METRIC_NAME])

below = []
above = []
same_val_count = 0
for e in metrics:
    if e < pivot:
        below.append(e)
    elif e > pivot:
        above.append(e)
    else:
        same_val_count += 1

total = df.shape[0]
below_percent = (len(below) / (total - same_val_count)) * 100
above_percent = (len(above) / (total - same_val_count)) * 100
print(
    f"{CHOICE} has " + 
    f"higher {METRIC_NAME} than {round(below_percent, 2)}% and " + 
    f"lower {METRIC_NAME} than {round(above_percent, 2)}% of occurrences!"
)

In [None]:
"""Tukey Method"""
from scipy.stats import iqr
from scipy.stats.mstats import mquantiles

q1, q2, q3 = mquantiles(metrics)
inner_quantile_range = iqr(metrics)

lower_outlier_cut = q1 - 1.5 * inner_quantile_range
upper_outlier_cut = q3 + 1.5 * inner_quantile_range

print(f"Outliers: x < {lower_outlier_cut}, {upper_outlier_cut} > x")

call = ""
if pivot < lower_outlier_cut:
    call = "lower"
if pivot > upper_outlier_cut:
    call = "upper"

if call != "":
    print(f"{CHOICE}({pivot}) is an {call} outlier!")

In [None]:
import matplotlib.pyplot as plt

from pmotifs.graphlet_representation import graphlet_class_to_name


graphlet_class_df = df[df["graphlet_class"] == chosen_occurrence["graphlet_class"].values[0]]
fig, ax = plt.subplots(1, 1)
bars = graphlet_class_df[METRIC_NAME].plot.hist(bins=len(set(graphlet_class_df[METRIC_NAME])), ax=ax)
ax.axvline(
    pivot,
    label="CHOICE",
    color="tab:orange",
)

ax.legend()
ax.set_title(graphlet_class_to_name(chosen_occurrence["graphlet_class"].values[0]))

In [None]:
"""Prepare Graph Plotting"""
import networkx as nx

from pmotifs.analysis_utilities.plotting import get_kamada_kawai_layout


pos = get_kamada_kawai_layout(g)
nx_g = g.load_graph()

In [None]:
import matplotlib.pyplot as plt

from pmotifs.analysis_utilities.plotting import plot_graph_with_motif_highlight


fig, ax = plt.subplots(1,1)
plot_graph_with_motif_highlight(nx_g, list(chosen_occurrence.nodes), pos, ax)

In [None]:
from pmotifs.analysis_utilities.plotting import get_zommed_graph, plot_graph_with_motif_highlight


fig, ax = plt.subplots(1,1)
plot_graph_with_motif_highlight(
    get_zommed_graph(nx_g, chosen_occurrence["nodes"].values[0]),
    chosen_occurrence["nodes"].values[0],
    pos,
    ax,
)