In [2]:
import numpy as np
import networkx as nx
import yaml
import pandas as pd

In [7]:
'''
Load pandas dataframes of individual node metrics
'''

train_nodes = pd.read_csv('../node_x_train.csv', index_col=0)
test_nodes = pd.read_csv('../node_x_test.csv', index_col=0)

nodes = pd.concat([train_nodes, test_nodes])

In [23]:
'''
Create dictionary of node pairs with metrics from both individual nodes
'''
metrics_list = [metric + '_A' for metric in list(nodes.columns)] + [metric + '_B' for metric in list(nodes.columns)]

node_pair_metrics = {metric: {} for metric in metrics_list}

for mu in [1,2,3,4]:
    for g_num in [1,2,3,4,5]:
        for node1 in range(200):
            for node2 in range(node1 + 1, 200):
                node_1_values = np.array(nodes.loc['graph_{0}_{1}_node_{2}'.format(mu, g_num, node1)])
                node_2_values = np.array(nodes.loc['graph_{0}_{1}_node_{2}'.format(mu, g_num, node2)])
                pair_values = np.concatenate([node_1_values, node_2_values])
                pair_name = 'graph_{0}_{1}_nodes_{2}_{3}'.format(mu, g_num, node1, node2)
                for i, metric in enumerate(metrics_list):
                    node_pair_metrics[metric][pair_name] = pair_values[i]

In [35]:
'''
Convert the dictionary to our new node pairs dataframe
'''

node_pairs = pd.DataFrame(node_pair_metrics)

In [84]:
def calc_path_deg_metrics(G, shortest_path, pair_name, new_metrics):
    degrees_on_shortest_path = list(dict(G.degree(shortest_path)).values())
    new_metrics['Mean Deg. on Path'][pair_name] = np.mean(degrees_on_shortest_path)
    new_metrics['Median Deg. on Path'][pair_name] = np.median(degrees_on_shortest_path)
    new_metrics['Max Deg. on Path'][pair_name] = np.max(degrees_on_shortest_path)
    new_metrics['Min Deg. on Path'][pair_name] = np.min(degrees_on_shortest_path)
    new_metrics['Var of Deg. on Path'][pair_name] = np.var(degrees_on_shortest_path)
    return new_metrics

In [86]:
'''
Calculate new metrics that relate to pairs of nodes
'''

new_metrics = {'Shortest Path Length': {}, 'Triadic Closure': {}, 'Mean Deg. on Path': {}, 'Median Deg. on Path': {}, 
              'Max Deg. on Path': {}, 'Min Deg. on Path': {}, 'Var of Deg. on Path': {}}

for mu in [1,2,3,4]:
    for g_num in [1,2,3,4,5]:
        graph_yml = '../lfr_graphs/mu_0_{0}/graph_0{1}/graph_0{1}_mu_0_{0}.yml'.format(mu, g_num)
        with open(graph_yml) as f:
            graph_info = yaml.load(f, Loader=yaml.Loader)
        G = graph_info['G']
        paths = nx.shortest_path(G)
        for node1 in range(200):
            for node2 in range(node1 + 1, 200):
                pair_name = 'graph_{0}_{1}_nodes_{2}_{3}'.format(mu, g_num, node1, node2)
                shortest_path = paths[node1][node2]
                path_len = len(shortest_path) - 1
                new_metrics['Shortest Path Length'][pair_name] = path_len
                if path_len == 1:
                    new_metrics['Triadic Closure'][pair_name] = len(sorted(nx.common_neighbors(G, node1, node2)))
                else:
                    new_metrics['Triadic Closure'][pair_name] = 0
                new_metrics = calc_path_deg_metrics(G, shortest_path, pair_name, new_metrics)

In [88]:
pair_metrics = pd.DataFrame(new_metrics)

In [96]:
'''
Combine the individual metrics with the pair metrics
'''

node_pairs_df = pd.concat([node_pairs, pair_metrics], axis=1)

In [98]:
'''
Temporarily save the whole dataframe as we do not yet have classification labels
'''

node_pairs_df.to_csv('../node_pairs_x.csv')