In [1]:
from pmotifs.analysis_utilities.loading import Result
from pmotifs.config import config

GRAPHLET_SIZE = 3

r = Result.load_result(
    config.DATASET_DIRECTORY /  "yeastInter_st.txt",
    config.EXPERIMENT_OUT / "yeastInter_st",
    GRAPHLET_SIZE,
)

g = r.pmotif_graph
df = r.positional_metric_df

Loading graphlet metrics: 100%|███████| 13150/13150 [00:00<00:00, 159620.47it/s]
Loading anchor nodes: 100%|█████████████████| 42/42 [00:00<00:00, 405900.39it/s]
Loading anchor node shortest paths: 100%|████| 42/42 [00:00<00:00, 28303.47it/s]
Loading graph modules: 100%|████████████████| 27/27 [00:00<00:00, 176947.20it/s]


In [2]:
graphlet_classes = set(df["graphlet_class"])
graphlet_classes

{'011 100 100', '011 101 110'}

# Anchor Hop Distance

In [3]:
from statistics import mean

from pmotifs.analysis_utilities.loading import Result


def normalize_anchor_hop_distances(result: Result):
    # Normalization by Closeness Centrality
    anchor_nodes = result.positional_metric_meta.anchor_nodes
    shortest_paths = result.positional_metric_meta.anchor_node_shortest_paths

    closeness_centrality = {
        anchor_node: mean(shortest_path_lookup.values())
        for anchor_node, shortest_path_lookup in shortest_paths.items()
    }

    def normalize_by_closeness_centrality(distances):
        normalized = []
        for i, anchor_node in enumerate(anchor_nodes):
            normalized.append(distances[i] / closeness_centrality[anchor_node])
        return normalized

    r.positional_metric_df["normalized_anchor_node_distances"] = r.positional_metric_df["anchor_node_distances"].apply(normalize_by_closeness_centrality)
    

In [4]:
normalize_anchor_hop_distances(r)

In [5]:
r.positional_metric_df

Unnamed: 0,graphlet_class,nodes,degree,anchor_node_distances,graph_module_participation,normalized_anchor_node_distances
0,011 100 100,"[1, 199, 352]",10,"[4, 3, 2, 1, 2, 5, 2, 3, 4, 4, 3, 2, 3, 2, 3, ...",[0],"[0.874793524942187, 0.6695886716115981, 0.5005..."
1,011 100 100,"[1, 199, 203]",8,"[4, 3, 3, 1, 2, 4, 1, 4, 4, 3, 4, 3, 3, 3, 3, ...","[0, 10]","[0.874793524942187, 0.6695886716115981, 0.7508..."
2,011 100 100,"[1, 199, 200]",9,"[3, 3, 3, 1, 1, 4, 1, 3, 4, 3, 3, 3, 3, 3, 3, ...","[0, 10]","[0.6560951437066402, 0.6695886716115981, 0.750..."
3,011 100 100,"[1, 199, 198]",11,"[3, 2, 2, 1, 2, 4, 1, 3, 3, 3, 3, 2, 3, 2, 3, ...",[0],"[0.6560951437066402, 0.4463924477410654, 0.500..."
4,011 100 100,"[1, 199, 197]",17,"[3, 2, 2, 0, 2, 4, 1, 3, 3, 3, 3, 2, 2, 2, 2, ...",[0],"[0.6560951437066402, 0.4463924477410654, 0.500..."
...,...,...,...,...,...,...
13145,011 100 100,"[673, 678, 676]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519..."
13146,011 100 100,"[673, 678, 675]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519..."
13147,011 100 100,"[673, 677, 676]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519..."
13148,011 100 100,"[673, 677, 675]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519..."


In [8]:
from statistics import mean
r.positional_metric_df["normalized_anchor_node_distances"].apply(mean)

0        0.682611
1        0.708885
2        0.682014
3        0.630578
4        0.562669
           ...   
13145    1.447486
13146    1.447486
13147    1.447486
13148    1.447486
13149    1.447486
Name: normalized_anchor_node_distances, Length: 13150, dtype: float64

In [9]:
r.positional_metric_df["normalized_anchor_node_distances"].apply(max)

0        0.971387
1        1.032359
2        1.023184
3        0.965216
4        0.965216
           ...   
13145    1.780168
13146    1.780168
13147    1.780168
13148    1.780168
13149    1.780168
Name: normalized_anchor_node_distances, Length: 13150, dtype: float64

In [10]:
r.positional_metric_df["normalized_anchor_node_distances"].apply(min)

0        0.253349
1        0.255204
2        0.211637
3        0.255204
4        0.000000
           ...   
13145    0.000000
13146    0.000000
13147    0.000000
13148    0.000000
13149    0.000000
Name: normalized_anchor_node_distances, Length: 13150, dtype: float64

# Graph Modules

In [17]:
total_module_count = len(r.positional_metric_meta.graph_modules)

In [18]:
r.positional_metric_df["ratio_graph_module_participation"] = r.positional_metric_df["graph_module_participation"].apply(lambda l: len(l) / total_module_count)

In [19]:
r.positional_metric_df

Unnamed: 0,graphlet_class,nodes,degree,anchor_node_distances,graph_module_participation,normalized_anchor_node_distances,ratio_graph_module_participation
0,011 100 100,"[1, 199, 352]",10,"[4, 3, 2, 1, 2, 5, 2, 3, 4, 4, 3, 2, 3, 2, 3, ...",[0],"[0.874793524942187, 0.6695886716115981, 0.5005...",0.037037
1,011 100 100,"[1, 199, 203]",8,"[4, 3, 3, 1, 2, 4, 1, 4, 4, 3, 4, 3, 3, 3, 3, ...","[0, 10]","[0.874793524942187, 0.6695886716115981, 0.7508...",0.074074
2,011 100 100,"[1, 199, 200]",9,"[3, 3, 3, 1, 1, 4, 1, 3, 4, 3, 3, 3, 3, 3, 3, ...","[0, 10]","[0.6560951437066402, 0.6695886716115981, 0.750...",0.074074
3,011 100 100,"[1, 199, 198]",11,"[3, 2, 2, 1, 2, 4, 1, 3, 3, 3, 3, 2, 3, 2, 3, ...",[0],"[0.6560951437066402, 0.4463924477410654, 0.500...",0.037037
4,011 100 100,"[1, 199, 197]",17,"[3, 2, 2, 0, 2, 4, 1, 3, 3, 3, 3, 2, 2, 2, 2, ...",[0],"[0.6560951437066402, 0.4463924477410654, 0.500...",0.037037
...,...,...,...,...,...,...,...
13145,011 100 100,"[673, 678, 676]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519...",0.037037
13146,011 100 100,"[673, 678, 675]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519...",0.037037
13147,011 100 100,"[673, 677, 676]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519...",0.037037
13148,011 100 100,"[673, 677, 675]",8,"[4, 5, 7, 5, 6, 8, 6, 6, 6, 8, 8, 7, 5, 6, 5, ...",[14],"[0.874793524942187, 1.1159811193526634, 1.7519...",0.037037
