In [2]:
import numpy as np
import numpy as np
import scipy.sparse as sp
import torch
from sklearn.preprocessing import LabelBinarizer

In [3]:
"""
load data from data set.

return: adjacency matrix(sparse matrix), features(sparse) and labels
"""
def load_data():
    cat_data = np.load("F:\\3710report\\facebook.npz")
    n = len(cat_data["target"])
    x = np.zeros((n, n), dtype=np.float32)
    for i in cat_data["edges"]:
        x[i[0]][i[1]] = 1
    return sp.csr_matrix(x), sp.csr_matrix(cat_data["features"], dtype=np.float32), cat_data["target"]


"""
normalize the data.

Param: adjacency matrix.
return: normalized adjacency matrix.
"""
def normalize_adj(adjacency):
    adjacency += sp.eye(adjacency.shape[0])
    degree = np.array(adjacency.sum(1))
    d_hat = sp.diags(np.power(degree, -0.5).flatten())
    return d_hat.dot(adjacency).dot(d_hat).tocoo()


"""
normalize features.

Param: featrues.
return: normalized features.
"""
def normalize_features(features):
    return features / features.sum(1)

In [4]:
adjacency, features, labels = load_data()

#encoded to one hot
onehot = LabelBinarizer()
labels = onehot.fit_transform(labels)

#normalize adjacency matrix and features
adjacency = normalize_adj(adjacency)
features = normalize_features(features)

#transform to tensor.
features = torch.FloatTensor(np.array(features))
labels = torch.LongTensor(np.where(labels)[1])

print(adjacency)
print(features)
print(labels)

  (0, 0)	0.5000000000000001
  (0, 18427)	0.09805806756909202
  (1, 1)	0.028571428571428567
  (1, 2812)	0.02492223931396134
  (1, 4987)	0.016903085094570332
  (1, 5228)	0.020965696734438363
  (1, 5307)	0.015240998561973751
  (1, 5755)	0.020348923188911988
  (1, 6829)	0.031943828249996996
  (1, 7136)	0.033149677206589796
  (1, 8049)	0.012135707849456652
  (1, 8533)	0.027788500718836418
  (1, 8894)	0.023218173010628604
  (1, 9934)	0.019920476822239894
  (1, 10281)	0.04517539514526256
  (1, 10379)	0.007805119495830757
  (1, 10554)	0.02238868314198225
  (1, 11557)	0.019783564706223267
  (1, 12305)	0.020498001542269693
  (1, 13737)	0.021295885499998
  (1, 14344)	0.02366905341655754
  (1, 15026)	0.02577696311132335
  (1, 15785)	0.013894250359418209
  (1, 16260)	0.03253000243161777
  (1, 16590)	0.018018749253911177
  :	:
  (22467, 5339)	0.038235955645093626
  (22467, 6181)	0.04233337566673017
  (22467, 8565)	0.03984095364447979
  (22467, 9367)	0.02703690352179376
  (22467, 9986)	0.033671751485

In [6]:
node_number = features.shape[0]

#get train mask validation mask and test mask. (split the data set)
train_mask = np.zeros(node_number, dtype=bool)
val_mask = np.zeros(node_number, dtype=bool)
test_mask = np.zeros(node_number, dtype=bool)
train_mask[:500] = True
val_mask[500:1000] = True
test_mask[1000:1500] = True

In [None]:
"""
Graph convolution network layer.

"""
class GCNLayer(nn.Module):
    """
    initialiser of graph convolution layer.
    
    Params:
    input_dim: input dimension of this layer.
    out_dim: output dimension of this layer.
    use_bias: if use bias(optional).
    
    """
    def __init__(self, input_dim, output_dim, use_bias=True):
        super(GCNLayer, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.use_bias = use_bias
        self.weight = torch.nn.Parameter(torch.Tensor(input_dim, output_dim))
        if self.use_bias:
            self.bias = torch.nn.Parameter(torch.Tensor(output_dim))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
    
    """
    reset parameters.
    """
    def reset_parameters(self):
        #initialise weight.
        init.kaiming_uniform_(self.weight)
        if self.use_bias:
            init.zeros_(self.bias)

    """
    Define computation performed at every call.
    
    Params:
    adjacency: adjacency matrix.
    input_feature: features of every data in dataset.
    """
    def forward(self, adjacency, input_feature):
        device = "cpu"
        support = torch.mm(input_feature, self.weight.to(device))
        # adjacency is sparse matrix so it need torch.sparse.mm instead of torch.mm
        output = torch.sparse.mm(adjacency, support)
        if self.use_bias:
            output += self.bias.to(device)
        return output