In [23]:
import networkx as nx
import pandas as pd
from scipy.sparse import *
import numpy as np

In [24]:
def graph_reader(path):
    """
    Function to read the graph from the path.
    :param path: Path to the edge list.
    :return graph: NetworkX object returned.
    """
    graph = nx.from_edgelist(pd.read_csv(path).values.tolist())
    return graph

def feature_reader(path):
    """
    Reading the sparse feature matrix stored as csv from the disk.
    :param path: Path to the csv file.
    :return features: Dense matrix of features.
    """
    features = pd.read_csv(path)
    node_index = features["node_id"].values.tolist()
    feature_index = features["feature_id"].values.tolist()
    feature_values = features["value"].values.tolist()
    node_count = max(node_index)+1
    feature_count = max(feature_index)+1
    features = coo_matrix((feature_values, (node_index, feature_index)), shape=(node_count, feature_count)).toarray()
    return features

def target_reader(path):
    """
    Reading the target vector from disk.
    :param path: Path to the target.
    :return target: Target vector.
    """
    target = np.array(pd.read_csv(path)["target"]).reshape(-1,1)
    return target

In [25]:
G = graph_reader('data/edges.csv')

In [26]:
features = pd.read_csv('data/features.csv')
features

Unnamed: 0,node_id,feature_id,value
0,0,1,87
1,0,2,2011
2,0,3,0
3,1,1,87
4,1,2,2011
...,...,...,...
44233,14744,2,2013
44234,14744,3,0
44235,14745,1,90
44236,14745,2,2012


In [27]:
target = target_reader('data/target.csv')

In [28]:
features = feature_reader('data/features.csv')

## Simple message passing
$ f(H^{i},A) = \sigma(AH^{i}W^{i}) $

In [29]:
A = nx.to_pandas_adjacency(G)
A = A.to_numpy(copy=True)
A

array([[0., 1., 1., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 1.],
       [0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 1., 1., 0.]])

In [30]:
X = features[:,1:]
X

array([[  87, 2011,    0],
       [  87, 2011,    1],
       [  87, 2011,    2],
       ...,
       [  91, 2005,    4],
       [  90, 2013,    0],
       [  90, 2012,    0]])

In [31]:
np.shape(X)

(14746, 3)

In [32]:
I = np.matrix(np.eye(A.shape[0]))

A_hat = A + I

np.shape(A_hat)

(14746, 14746)

## Normalizing the Feature Representations

$ f(X, A) = D^{-1}AX $
then
$ f(H^{i},A) = \sigma(AH^{i}W^{i}) $

In [33]:
D_hat = np.array(np.sum(A_hat, axis=0))[0]
D_hat = np.matrix(np.diag(D_hat))

D_hat_inv  = np.linalg.inv(D_hat)

In [34]:
np.shape(D_hat_inv)

(14746, 14746)

In [35]:
#random assignmentof weights
W =  np.matrix(
    [[1, 0, 0],
    [0, 1, 0],
    [0, 0, 1]])

In [36]:
#adding activation function
def relu(x):
    return (np.maximum(0, X))

In [37]:
f = D_hat_inv * A_hat

In [38]:
f = f * X * W
f

matrix([[8.76428571e+01, 2.01264286e+03, 1.42857143e+00],
        [8.78000000e+01, 2.01280000e+03, 1.33333333e+00],
        [8.78571429e+01, 2.01278571e+03, 1.42857143e+00],
        ...,
        [9.08181818e+01, 2.01127273e+03, 2.00000000e+00],
        [9.08181818e+01, 2.01127273e+03, 2.00000000e+00],
        [9.08181818e+01, 2.01127273e+03, 2.00000000e+00]])

In [39]:
f = relu(f)

In [40]:
f

array([[  87, 2011,    0],
       [  87, 2011,    1],
       [  87, 2011,    2],
       ...,
       [  91, 2005,    4],
       [  90, 2013,    0],
       [  90, 2012,    0]])