In [1]:
import igraph as ig
import networkx as nx
import pandas as pd
from tqdm import tqdm

In [2]:
from util_funcs import *
from single_funcs import *
from pair_funcs import *

In [4]:
graph_name = "power"
gml_file_path = "datasets/" + graph_name + ".gml"
output_path = graph_name + "_data"

try:
    graph_ig = ig.Graph.Read_GML(gml_file_path)
    print("Graph read successfully.")
except OSError as e:
    print("File not found.")
    quit()

Graph read successfully.


In [5]:
# Converting igraph to networkx
graph_nx = ig_to_nx(graph_ig)

In [6]:
# Assigning a name attribute to each vertex, whose value is the same as the vertex index
graph_ig.vs["name"] = [v.index for v in graph_ig.vs()]

In [7]:
# Listing all functions
single_funcs = [k_degree, eccentricity, triangles, clique_num, coreness, betweenness_centrality, closeness_centrality, harmonic_centrality, eigenvector_centrality, decay_centrality, pagerank, katz_centrality, local_clustering_coeff, global_clustering_coeff, shannon_diversity, h_index, neighborhood_density, rwr, lpi, lrw]
pair_funcs = [are_connected, common_triangles, common_neighbors, cosine_similarity, pearson_similarity, euclidean_distance, manhattan_distance, hamming_distance, covariance, rand_index, jaccard_index, sorenson_dice, hub_promoted, hub_depressed, lhn, res_alloc, adamic_adar, pref_attach, overlap_coeff, nbd_overlap, edge_conn, vertex_conn, katz_index, avg_commute_time, cosine_l, l_plus, mfi]

In [8]:
# Create dataframe to store function outputs
total_vertices = total_vertices(graph_ig)
row_count = int(total_vertices * (total_vertices-1) / 2)
node_1 = []
node_2 = []
for i in range(total_vertices):
  for j in range(i, total_vertices):
    if i!=j:
      node_1.append(i)
      node_2.append(j)

df = pd.DataFrame()
df['node_1'] = node_1
df['node_2'] = node_2

In [12]:
# Computing subgraphs

level = 1 # Keeping it 1 for now
subgraphs_ig = []
subgraphs_nx = []
for vertex_id in range(total_vertices):
    induced_ig = level_induced_subgraph(graph_ig, vertex_id, level)
    subgraphs_ig.append(induced_ig)
    induced_edge_list = induced_ig.get_edgelist()
    induced_nx = nx.Graph(induced_edge_list)
    subgraphs_nx.append(induced_nx)

In [17]:
# Single node features
for func in single_funcs:
    print(func.__name__)
    node_1 = []
    node_2 = []
    for i in tqdm(range(row_count)):
        index_1 = int(df['node_1'][i])
        index_2 = int(df['node_2'][i])
        subgraph_1 = subgraphs_ig[index_1]
        subgraph_2 = subgraphs_ig[index_2]
        new_index_1 = reset_id(subgraph_1, index_1)
        new_index_2 = reset_id(subgraph_2, index_2)
        node_1.append(func(subgraph_1, new_index_1))
        node_2.append(func(subgraph_2, new_index_2))

    df[func.__name__ + '_1'] = node_1
    df[func.__name__ + '_2'] = node_2

k_degree


  6%|▌         | 693225/12204270 [00:26<07:11, 26652.13it/s]


KeyboardInterrupt: 

In [14]:
for func in tqdm(single_funcs):
    print(func.__name__)

100%|██████████| 20/20 [00:00<00:00, 19953.87it/s]

k_degree
eccentricity
triangles
clique_num
coreness
betweenness_centrality
closeness_centrality
harmonic_centrality
eigenvector_centrality
decay_centrality
pagerank
katz_centrality
local_clustering_coeff
global_clustering_coeff
shannon_diversity
h_index
neighborhood_density
rwr
lpi
lrw



