In [1]:
import numpy as np

In [17]:
from __future__ import print_function
from pathlib import Path
from random import shuffle
import random

import argparse
import copy
import numpy as np
import sys
import tensorflow as tf
import scipy.sparse as sp
import sys

import sys
sys.path.append("..") 
from deeplp.models.deeplp_att import DeepLP_ATT
from deeplp.models.deeplp_edge import DeepLP_Edge
from deeplp.models.deeplp_wrbf import DeepLP_WRBF
from deeplp.models.lp import LP
from deeplp.utils import (calc_masks, create_seed_features, load_data,
                          num_layers_dict, prepare_data, random_unlabel, rbf)

from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import rbf_kernel
from scipy.sparse import coo_matrix, csr_matrix

In [13]:
x = np.loadtxt('../data/linqs_cora/features_raw.csv', delimiter=',')

In [19]:
raw_features = node_features_np_to_sparse(x)

In [15]:
def node_features_np_to_sparse(node_features, values=False):
    """
    Convert np array with each row being (row_index,col_index,value)
    of a graph to a scipy csr matrix.
    """
    num_rows = int(max(node_features[:, 0]) + 1)
    num_cols = int(max(node_features[:, 1]) + 1)
    vals = np.ones(len(node_features[:, 0]))
    csr = csr_matrix(
        (vals, (node_features[:, 0], node_features[:, 1])),
        shape=(num_rows, num_cols))
    return csr

In [20]:
true_labels, _, weights = load_data(
    'linqs_cora', model='edge', feature_type='all')

In [24]:
def create_weighted_graph(features,graph):
    """Use RBF kernel to calculate the weights of edges
    Input:
        features: features to calculate the RBF kernel
        sigma: RBF kernel parameter if None, set it to num_features * 10
        graph: if [], construct a graph using RBF kernel, if given, use it
        num_neighbors: number of neighbors (based on edge weights) to form edges
    Returns:
        weight matrix and graph matrix
    """

    print('Constructing weights...')

    features_dense = features.toarray()

    # estimate sigma by taking the average of the lowest weights for each node
    def get_lowest_weight(row):
        return np.sort(row[np.nonzero(row)])[::-1][-1]
    D = euclidean_distances(features_dense, features_dense)
    lowest_dists = np.apply_along_axis(get_lowest_weight,0,D*graph)
    sigma = np.mean(lowest_dists)**2

    # use rbf kernel to estimate weights between nodes
    weights_dense = rbf_kernel(features_dense, gamma=1/sigma)

    weights_sp = graph.multiply(weights_dense)

    print('Done!')

    return weights_sp, graph, sigma

In [25]:
create_weighted_graph(raw_features, weights)

Constructing weights...
Done!


(<2485x2485 sparse matrix of type '<class 'numpy.float64'>'
 	with 10138 stored elements in COOrdinate format>,
 <2485x2485 sparse matrix of type '<class 'numpy.float64'>'
 	with 10138 stored elements in Compressed Sparse Row format>,
 248.01779798013024)

In [8]:
x.shape,weights.shape

((2708, 1433), (2485, 2485))

In [29]:
np.set_printoptions(suppress=True)

In [36]:
x = np.loadtxt('../data/linqs_pubmed/graph_directed.csv', delimiter=',')

In [37]:
np.savetxt('../data/linqs_pubmed/graph_directed.txt',x.astype(int),fmt='%d')

In [31]:
x.astype(int)

array([[   1, 1147],
       [   1, 1705],
       [   1, 2205],
       ...,
       [2482, 1893],
       [2482, 1895],
       [2484, 1894]])