In [2]:
import numpy as np
import networkx as nx
import yaml
import pandas as pd
import matplotlib.pyplot as plt
from statistics import mean
%matplotlib inline

### Initial Node Metric Calculations

These metrics were the first 5 calculated for each node. They're calculated over the entire graph (i.e. community structure is irrelevant).

In [30]:
def calc_node_metrics(G):
    node_degrees = dict(G.degree())
    node_clustering_coefficients = nx.clustering(G)
    node_betweenness = nx.betweenness_centrality(G)
    node_closeness = nx.closeness_centrality(G)
    node_av_shortest_paths = {}
    for i in range(G.number_of_nodes()):
        shortest_paths = nx.algorithms.shortest_paths.generic.shortest_path_length(G, source=i)
        average_shortest_path = mean(list(shortest_paths.values())[1:])
        node_av_shortest_paths[i] = average_shortest_path
    node_metrics = {'Degree': node_degrees, 'Clustering Coefficient': node_clustering_coefficients, 'Betweenness': node_betweenness, 
                    'Closeness': node_closeness, 'Shortest Path': node_av_shortest_paths}
    return node_metrics

In [35]:
for i in [1, 2, 3, 4]:
    for j in [1, 2, 3, 4, 5]:
        with open('../lfr_graphs/mu_0_{0}/graph_0{1}_mu_0_{0}.yml'.format(i, j)) as f:
            graph_info = yaml.load(f, Loader=yaml.Loader)
        G = graph_info['G']
        node_metrics = calc_node_metrics(G)
        with open('../lfr_graphs/mu_0_{0}/graph_0{1}_mu_0_{0}_node_features.yml'.format(i, j), 'w') as f:
            yaml.dump(node_metrics, f)

### Community node metrics

Here, I later added additional node metrics based on community membership.

In [108]:
def initialise_new_metrics():
    e_in_list = {i: [] for i in range(200)}
    e_out_list = {i: [] for i in range(200)}

    e_in_over_e_out = {i: [] for i in range(200)}
    odf = {i: [] for i in range(200)}

    expansion = {i: [] for i in range(200)}
    cut_ratio = {i: [] for i in range(200)}
    conductance = {i: [] for i in range(200)}
    normalised_cut = {i: [] for i in range(200)}

    triangle_participation = {i: [] for i in range(200)}
    
    new_metric_dict = {'E In': e_in_list, 'E Out': e_out_list, 'E In Over E Out': e_in_over_e_out,
                       'ODF': odf, 'Expansion': expansion, 'Cut Ratio': cut_ratio,
                       'Conductance': conductance, 'Normalised Cut': normalised_cut, 
                       'Triangle Participation': triangle_participation}
    return new_metric_dict

In [1]:
def calc_new_metrics(new_metrics, G, partitions):
    for part in partitions:
        for comm in part:

            comm_subgraph = G.subgraph(comm)
            comm_degrees = comm_subgraph.degree()
            
            w = len(comm)
            N = G.number_of_nodes()
            m = G.number_of_edges()

            # In order to find the triangle participation for all nodes, find all the triangles in a community
            all_cliques = nx.enumerate_all_cliques(comm_subgraph)
            triangle_cliques = [k for k in all_cliques if len(k) == 3]

            for nod in dict(comm_degrees).keys():
                e_in = comm_degrees[nod]
                e_out = node_degrees[nod] - e_in

                new_metrics['E In'][nod].append(e_in)
                new_metrics['E Out'][nod].append(e_out)

                # For e_in divided by e_out, if e_out is 0, just return the value of e_in
                try:
                    new_metrics['E In Over E Out'][nod].append(e_in/e_out)
                except ZeroDivisionError:
                    new_metrics['E In Over E Out'][nod].append(e_in)

                new_metrics['ODF'][nod].append(e_out/node_degrees[nod])

                new_metrics['Expansion'][nod].append(e_out/w)
                new_metrics['Cut Ratio'][nod].append(e_out/(N-w))

                ct = e_out/(node_degrees[nod] + e_in)
                new_metrics['Conductance'][nod].append(ct)

                nc = ct + e_out/(2*m - 2*e_in + e_out)
                new_metrics['Normalised Cut'][nod].append(nc)

                # Calculate triangle participation
                nods_in_triangles = []
                for triangle in triangle_cliques:
                    if nod in triangle:
                        nods_in_triangles += triangle
                tp = len(set(nods_in_triangles))/w
                new_metrics['Triangle Participation'][nod].append(tp)
                
    return new_metrics

In [116]:
def average_metrics(new_metrics):
    averaged_metrics = new_metrics.copy()
    for met in averaged_metrics.keys():
        for nod in averaged_metrics[met].keys():
            averaged_metrics[met][nod] = mean(new_metrics[met][nod])
    return averaged_metrics

In [111]:
def new_node_metrics(node_metrics, partitions):
    new_metrics = initialise_new_metrics()
    new_metrics = calc_new_metrics(new_metrics, G, partitions)
    new_metrics = average_metrics(new_metrics)
    updated_node_metrics = node_metrics.copy()
    updated_node_metrics.update(new_metrics)
    return updated_node_metrics

In [112]:
from tqdm import tqdm

In [117]:
x = [(i,j) for i in [1,2,3,4] for j in [1,2,3,4,5]]

for i, j in tqdm(x):
    with open('../lfr_graphs/mu_0_{0}/graph_0{1}/graph_0{1}_mu_0_{0}_node_features.yml'.format(i, j)) as f:
        node_metrics = yaml.load(f, Loader=yaml.Loader)
    with open('../lfr_graphs/mu_0_{0}/graph_0{1}/graph_0{1}_mu_0_{0}_partitions.yml'.format(i, j)) as f:
        partitions = yaml.load(f, Loader=yaml.Loader)
    node_metrics = new_node_metrics(node_metrics, partitions)
    with open('../lfr_graphs/mu_0_{0}/graph_0{1}/graph_0{1}_mu_0_{0}_node_features.yml'.format(i, j), 'w') as f:
        yaml.dump(node_metrics, f)



  0%|          | 0/20 [00:00<?, ?it/s][A[A

  5%|▌         | 1/20 [00:26<08:19, 26.32s/it][A[A

 10%|█         | 2/20 [00:49<07:35, 25.32s/it][A[A

 15%|█▌        | 3/20 [01:12<06:58, 24.61s/it][A[A

 20%|██        | 4/20 [01:34<06:21, 23.82s/it][A[A

 25%|██▌       | 5/20 [01:56<05:50, 23.38s/it][A[A

 30%|███       | 6/20 [02:20<05:27, 23.42s/it][A[A

 35%|███▌      | 7/20 [02:41<04:55, 22.69s/it][A[A

 40%|████      | 8/20 [03:03<04:30, 22.58s/it][A[A

 45%|████▌     | 9/20 [03:27<04:11, 22.89s/it][A[A

 50%|█████     | 10/20 [03:52<03:56, 23.68s/it][A[A

 55%|█████▌    | 11/20 [04:13<03:26, 22.95s/it][A[A

 60%|██████    | 12/20 [04:34<02:58, 22.26s/it][A[A

 65%|██████▌   | 13/20 [04:54<02:32, 21.73s/it][A[A

 70%|███████   | 14/20 [05:15<02:09, 21.51s/it][A[A

 75%|███████▌  | 15/20 [05:36<01:46, 21.31s/it][A[A

 80%|████████  | 16/20 [05:58<01:25, 21.33s/it][A[A

 85%|████████▌ | 17/20 [06:18<01:03, 21.14s/it][A[A

 90%|█████████ | 18/20 [06