In [1]:
import sys
sys.path.append("..") 
import argparse
import logging

import networkx as nx
import numpy as np

from deeplp.utils import (
    community_features, edge_centralities, get_d_to_b_indices,
    get_r_to_b_indices, get_u_to_b_indices, link_predictions, load_graph,
    node_centralities, node_feature_reduction, node_feature_similarities,
    node_partitions, pos_normalize)

  from ._conv import register_converters as _register_converters


In [2]:
# U, D, B, R, node_features = load_graph('linqs_cora')

In [3]:
U, D, B, R, node_features = load_graph('linqs_pubmed_planetoid')

In [4]:
edges = np.array(B.edges())
sources, sinks = edges[:, 0], edges[:, 1]
subU_nodes = max(nx.connected_components(U), key=len)
subU = U.subgraph(subU_nodes)
in_subU = (np.in1d(sources,list(subU_nodes))) & (np.in1d(sinks,list(subU_nodes)))
edges_sub = np.vstack([sources[in_subU], sinks[in_subU]]).T

# if args.raw_only:
#     node_feature_reduction = node_feature_reduction(
#         node_features, sources, n_components=10)
#     features = node_feature_reduction
# else:
u_to_b_indices = get_u_to_b_indices(subU, edges_sub)
d_to_b_indices = get_d_to_b_indices(D, edges)
r_to_b_indices = get_r_to_b_indices(R, edges)


In [5]:
len(subU.nodes()), edges_sub.shape,len(subU), len(subU_nodes), len(u_to_b_indices)

(19717, (88651, 2), 19717, 19717, 88651)

In [6]:
import logging
logging.basicConfig(
    format='%(asctime)s: %(message)s',
    level='INFO',
    datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger("deeplp")

In [7]:
# U, D, B, R, node_features = load_graph('linqs_cora_planetoid')

# nx.write_edgelist(
#     B, f'data/{args.data}/graph_symmetric.csv', delimiter=',', data=False)
# get list of edges

U.remove_edges_from(nx.selfloop_edges(U))

node_feature_similarities = node_feature_similarities(
    node_features, sources, sinks)
node_feature_reduction = node_feature_reduction(node_features, sources)
node_centralities = node_centralities(B, D, R, U, sources, sinks)
node_partitions = node_partitions(U, sources, sinks)
edge_centralities = edge_centralities(B, D, R, U, subU, in_subU, edges, d_to_b_indices,
                                      r_to_b_indices, u_to_b_indices)
# link_predictions = link_predictions(U, edges)
if nx.is_connected(U):
    community_features = community_features(U, node_features, edges, sinks,
                                            sources)
    features = np.hstack([
        node_feature_similarities, node_feature_reduction, node_centralities,
        node_partitions, edge_centralities, link_predictions,
        community_features
    ])
else:
    features = np.hstack([
        node_feature_similarities, node_feature_reduction, node_centralities,
        node_partitions, edge_centralities, link_predictions
    ])

normalized_features = pos_normalize(features)



10/23/2018 10:14:10 PM: node_feature_similarities generated: (88651, 3)
10/23/2018 10:14:11 PM: node_feature_reduction generated: (88651, 3)
10/23/2018 10:14:12 PM: node_centralities generated: (88651, 3)
10/23/2018 10:14:18 PM: node_partitions generated: (88651, 2)
10/24/2018 08:08:41 AM: edge_centralities generated: (88651, 3)


ZeroDivisionError: float division by zero

In [8]:
features = np.hstack([
    node_feature_similarities, node_feature_reduction, node_centralities,
    node_partitions, edge_centralities
])

normalized_features = pos_normalize(features)

In [9]:
normalized_features.shape

(88651, 14)

In [10]:
feature_fname = f'../data/linqs_pubmed_planetoid/features.csv'
np.savetxt(feature_fname, normalized_features, delimiter=',')

In [20]:
def listify(d):
    return np.array(list(dict(d).values()))
edge_current_flow_betweenness = listify(
    nx.edge_current_flow_betweenness_centrality(subU))

In [21]:
edge_current_flow_betweenness.shape

(3679,)