# IIC-3641 GML UC

## Actividad en clase

Vamos a usar el VGAE para trabajar en esta actividad.

- Corra el VGAE y vea que puede reproducir el ejemplo de la clase.
- Use el dataset **pubmed** que dejé en la carpeta data de github. 
- Entrene el VGAE. Pruebe dos variantes del VGAE y evalúe no solo el accuracy en la tarea sino que la reconstrucción de A. Corra 100 epochs para cada variante.
- Cuanto termine, me avisa para entregarle una **L (logrado)**.
- Recuerde que las L otorgan un bono en la nota final de la asignatura.

***Tiene hasta el final de la clase.***

In [1]:
import numpy as np
import sys
import pickle as pkl
import networkx as nx
from scipy.sparse import csr_matrix, dia_matrix, lil_matrix, eye, vstack, isspmatrix_coo, coo_matrix, diags, triu

def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'tx', 'allx', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
    x, tx, allx, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended

    features = vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    return adj, features

In [2]:
def sparse_to_tuple(sparse_mx):
    if not isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape

def preprocess_graph(adj):
    adj = coo_matrix(adj)
    adj_ = adj + eye(adj.shape[0])
    rowsum = np.array(adj_.sum(1))
    degree_mat_inv_sqrt = diags(np.power(rowsum, -0.5).flatten())
    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
    return sparse_to_tuple(adj_normalized)

def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false

In [3]:
dataset = 'pubmed'
model = 'VGAE'

input_dim = 500 
hidden1_dim = 32
hidden2_dim = 16
use_feature = True

num_epoch = 100
learning_rate = 0.01

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os

class VGAE(nn.Module):
    def __init__(self, adj):
        super(VGAE,self).__init__()
        self.base_gcn = GraphConvSparse(input_dim, hidden1_dim, adj)
        self.gcn_mean = GraphConvSparse(hidden1_dim, hidden2_dim, adj, activation=lambda x:x)
        self.gcn_logstddev = GraphConvSparse(hidden1_dim, hidden2_dim, adj, activation=lambda x:x)

    def encode(self, X):
        hidden = self.base_gcn(X)
        self.mean = self.gcn_mean(hidden)
        self.logstd = self.gcn_logstddev(hidden)
        gaussian_noise = torch.randn(X.size(0), hidden2_dim)
        sampled_z = gaussian_noise*torch.exp(self.logstd) + self.mean
        return sampled_z

    def forward(self, X):
        Z = self.encode(X)
        A_pred = dot_product_decode(Z)
        return A_pred

    
class GraphConvSparse(nn.Module):
    def __init__(self, input_dim, output_dim, adj, activation = F.relu, **kwargs):
        super(GraphConvSparse, self).__init__(**kwargs)
        self.weight = glorot_init(input_dim, output_dim) 
        self.adj = adj
        self.activation = activation

    def forward(self, inputs):
        x = inputs
        x = torch.mm(x,self.weight)
        x = torch.mm(self.adj, x)
        outputs = self.activation(x)
        return outputs


def dot_product_decode(Z):
    A_pred = torch.sigmoid(torch.matmul(Z,Z.t()))
    return A_pred

def glorot_init(input_dim, output_dim):
    init_range = np.sqrt(6.0/(input_dim + output_dim))
    initial = torch.rand(input_dim, output_dim)*2*init_range - init_range
    return nn.Parameter(initial)

In [5]:
from torch.optim import Adam
from sklearn.metrics import roc_auc_score, average_precision_score
import time


# Train on CPU (hide GPU) due to memory constraints
#os.environ['CUDA_VISIBLE_DEVICES'] = ""

adj, features = load_data(dataset)

A = adj

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
adj = adj_train

# Some preprocessing
adj_norm = preprocess_graph(adj)


num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]

  objects.append(pkl.load(f, encoding='latin1'))


In [6]:
A.shape

(19717, 19717)

In [7]:
# Create Model
pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

adj_label = adj_train + eye(adj_train.shape[0])
adj_label = sparse_to_tuple(adj_label)

adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), 
                            torch.FloatTensor(adj_norm[1]), 
                            torch.Size(adj_norm[2]))
adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), 
                            torch.FloatTensor(adj_label[1]), 
                            torch.Size(adj_label[2]))
features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), 
                            torch.FloatTensor(features[1]), 
                            torch.Size(features[2]))

weight_mask = adj_label.to_dense().view(-1) == 1
weight_tensor = torch.ones(weight_mask.size(0)) 
weight_tensor[weight_mask] = pos_weight

  adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T),


In [8]:
model = VGAE(adj_norm)
optimizer = Adam(model.parameters(), lr=learning_rate)

def get_scores(edges_pos, edges_neg, adj_rec):

    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    # Predict on test set of edges
    preds = []
    pos = []
    for e in edges_pos:
        # print(e)
        # print(adj_rec[e[0], e[1]])
        preds.append(sigmoid(adj_rec[e[0], e[1]].item()))
        pos.append(adj_orig[e[0], e[1]])

    preds_neg = []
    neg = []
    for e in edges_neg:

        preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data))
        neg.append(adj_orig[e[0], e[1]])

    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    return roc_score, ap_score


def get_acc(adj_rec, adj_label):
    labels_all = adj_label.to_dense().view(-1).long()
    preds_all = (adj_rec > 0.5).view(-1).long()
    accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0)
    return accuracy

In [9]:
for epoch in range(num_epoch):
    t = time.time()

    A_pred = model(features)
    optimizer.zero_grad()
    loss = log_lik = norm*F.binary_cross_entropy(A_pred.view(-1), adj_label.to_dense().view(-1), weight = weight_tensor)
    kl_divergence = 0.5/ A_pred.size(0) * (1 + 2*model.logstd - model.mean**2 - torch.exp(model.logstd)**2).sum(1).mean()
    loss -= kl_divergence

    loss.backward()
    optimizer.step()

    train_acc = get_acc(A_pred,adj_label)

    val_roc, val_ap = get_scores(val_edges, val_edges_false, A_pred)
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()),
          "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc),
          "val_ap=", "{:.5f}".format(val_ap),
          "time=", "{:.5f}".format(time.time() - t))


test_roc, test_ap = get_scores(test_edges, test_edges_false, A_pred)
print("End of training!", "test_roc=", "{:.5f}".format(test_roc),
      "test_ap=", "{:.5f}".format(test_ap))

Epoch: 0001 train_loss= 1.74199 train_acc= 0.50001 val_roc= 0.49352 val_ap= 0.50256 time= 3.72806
Epoch: 0002 train_loss= 1.70254 train_acc= 0.49977 val_roc= 0.49066 val_ap= 0.48943 time= 3.71223
Epoch: 0003 train_loss= 1.67070 train_acc= 0.49861 val_roc= 0.48446 val_ap= 0.48707 time= 3.70257
Epoch: 0004 train_loss= 1.60658 train_acc= 0.49624 val_roc= 0.51563 val_ap= 0.50776 time= 3.71039
Epoch: 0005 train_loss= 1.54948 train_acc= 0.49252 val_roc= 0.50333 val_ap= 0.49554 time= 3.66675
Epoch: 0006 train_loss= 1.49437 train_acc= 0.48589 val_roc= 0.52313 val_ap= 0.51392 time= 3.66979
Epoch: 0007 train_loss= 1.43495 train_acc= 0.47502 val_roc= 0.51132 val_ap= 0.51271 time= 3.67103
Epoch: 0008 train_loss= 1.35413 train_acc= 0.45943 val_roc= 0.52758 val_ap= 0.52328 time= 3.87068
Epoch: 0009 train_loss= 1.29856 train_acc= 0.43834 val_roc= 0.53999 val_ap= 0.52796 time= 3.75251
Epoch: 0010 train_loss= 1.23275 train_acc= 0.41456 val_roc= 0.56145 val_ap= 0.55194 time= 3.77590
Epoch: 0011 train_lo

Epoch: 0085 train_loss= 0.57499 train_acc= 0.46392 val_roc= 0.82834 val_ap= 0.83978 time= 3.72192
Epoch: 0086 train_loss= 0.57502 train_acc= 0.46356 val_roc= 0.82765 val_ap= 0.83920 time= 3.69672
Epoch: 0087 train_loss= 0.57498 train_acc= 0.46224 val_roc= 0.82528 val_ap= 0.83829 time= 3.76685
Epoch: 0088 train_loss= 0.57450 train_acc= 0.46255 val_roc= 0.82439 val_ap= 0.83779 time= 3.74239
Epoch: 0089 train_loss= 0.57459 train_acc= 0.46361 val_roc= 0.82649 val_ap= 0.83660 time= 3.73788
Epoch: 0090 train_loss= 0.57462 train_acc= 0.46516 val_roc= 0.82574 val_ap= 0.83672 time= 3.77322
Epoch: 0091 train_loss= 0.57382 train_acc= 0.46706 val_roc= 0.82704 val_ap= 0.83944 time= 3.86350
Epoch: 0092 train_loss= 0.57322 train_acc= 0.46887 val_roc= 0.83197 val_ap= 0.84509 time= 3.82832
Epoch: 0093 train_loss= 0.57330 train_acc= 0.46805 val_roc= 0.82779 val_ap= 0.83858 time= 3.78912
Epoch: 0094 train_loss= 0.57220 train_acc= 0.46734 val_roc= 0.83508 val_ap= 0.84537 time= 3.77758
Epoch: 0095 train_lo

In [10]:
Z = model.encode(features)
A_pred = torch.sigmoid(torch.matmul(Z,Z.t()))
dense_A = torch.tensor(A.toarray())
l1_diff = torch.norm(A_pred - dense_A, p=1)
print(l1_diff)

tensor(1.6777e+08, grad_fn=<LinalgVectorNormBackward0>)


In [11]:
hidden1_dim = 16
hidden2_dim = 8

adj, features = load_data(dataset)

A = adj

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
adj = adj_train

# Some preprocessing
adj_norm = preprocess_graph(adj)


num_nodes = adj.shape[0]

features = sparse_to_tuple(features.tocoo())
num_features = features[2][1]
features_nonzero = features[1].shape[0]
# Create Model
pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

adj_label = adj_train + eye(adj_train.shape[0])
adj_label = sparse_to_tuple(adj_label)

adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), 
                            torch.FloatTensor(adj_norm[1]), 
                            torch.Size(adj_norm[2]))
adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), 
                            torch.FloatTensor(adj_label[1]), 
                            torch.Size(adj_label[2]))
features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), 
                            torch.FloatTensor(features[1]), 
                            torch.Size(features[2]))

weight_mask = adj_label.to_dense().view(-1) == 1
weight_tensor = torch.ones(weight_mask.size(0)) 
weight_tensor[weight_mask] = pos_weight

model = VGAE(adj_norm)
optimizer = Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epoch):
    t = time.time()

    A_pred = model(features)
    optimizer.zero_grad()
    loss = log_lik = norm*F.binary_cross_entropy(A_pred.view(-1), adj_label.to_dense().view(-1), weight = weight_tensor)
    kl_divergence = 0.5/ A_pred.size(0) * (1 + 2*model.logstd - model.mean**2 - torch.exp(model.logstd)**2).sum(1).mean()
    loss -= kl_divergence

    loss.backward()
    optimizer.step()

    train_acc = get_acc(A_pred,adj_label)

    val_roc, val_ap = get_scores(val_edges, val_edges_false, A_pred)
    print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()),
          "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc),
          "val_ap=", "{:.5f}".format(val_ap),
          "time=", "{:.5f}".format(time.time() - t))


test_roc, test_ap = get_scores(test_edges, test_edges_false, A_pred)
print("End of training!", "test_roc=", "{:.5f}".format(test_roc),
      "test_ap=", "{:.5f}".format(test_ap))

  objects.append(pkl.load(f, encoding='latin1'))


Epoch: 0001 train_loss= 1.32252 train_acc= 0.49992 val_roc= 0.51164 val_ap= 0.51012 time= 3.66858
Epoch: 0002 train_loss= 1.28053 train_acc= 0.49968 val_roc= 0.50459 val_ap= 0.49802 time= 3.74474
Epoch: 0003 train_loss= 1.25676 train_acc= 0.49905 val_roc= 0.50801 val_ap= 0.50366 time= 3.73450
Epoch: 0004 train_loss= 1.24472 train_acc= 0.49699 val_roc= 0.50140 val_ap= 0.50404 time= 3.71059
Epoch: 0005 train_loss= 1.19477 train_acc= 0.49449 val_roc= 0.51429 val_ap= 0.50667 time= 3.87550
Epoch: 0006 train_loss= 1.17450 train_acc= 0.49005 val_roc= 0.51093 val_ap= 0.49438 time= 3.78142
Epoch: 0007 train_loss= 1.13654 train_acc= 0.48233 val_roc= 0.51329 val_ap= 0.50293 time= 3.76425
Epoch: 0008 train_loss= 1.08973 train_acc= 0.47345 val_roc= 0.50779 val_ap= 0.50264 time= 3.76641
Epoch: 0009 train_loss= 1.06274 train_acc= 0.45859 val_roc= 0.53389 val_ap= 0.52653 time= 3.73679
Epoch: 0010 train_loss= 1.02962 train_acc= 0.44248 val_roc= 0.54375 val_ap= 0.53495 time= 3.77916
Epoch: 0011 train_lo

Epoch: 0085 train_loss= 0.58116 train_acc= 0.46430 val_roc= 0.82751 val_ap= 0.83878 time= 3.97235
Epoch: 0086 train_loss= 0.58063 train_acc= 0.46421 val_roc= 0.82567 val_ap= 0.83708 time= 3.79700
Epoch: 0087 train_loss= 0.57949 train_acc= 0.46398 val_roc= 0.82700 val_ap= 0.83667 time= 3.74549
Epoch: 0088 train_loss= 0.57943 train_acc= 0.46403 val_roc= 0.83008 val_ap= 0.83987 time= 3.78613
Epoch: 0089 train_loss= 0.57819 train_acc= 0.46486 val_roc= 0.82751 val_ap= 0.83668 time= 3.76624
Epoch: 0090 train_loss= 0.57670 train_acc= 0.46781 val_roc= 0.83428 val_ap= 0.84328 time= 3.76623
Epoch: 0091 train_loss= 0.57607 train_acc= 0.46725 val_roc= 0.83493 val_ap= 0.84416 time= 3.75100
Epoch: 0092 train_loss= 0.57465 train_acc= 0.46739 val_roc= 0.83654 val_ap= 0.84405 time= 3.75778
Epoch: 0093 train_loss= 0.57320 train_acc= 0.46548 val_roc= 0.84162 val_ap= 0.84982 time= 3.79701
Epoch: 0094 train_loss= 0.57188 train_acc= 0.46625 val_roc= 0.83731 val_ap= 0.84580 time= 3.83603
Epoch: 0095 train_lo

In [12]:
Z = model.encode(features)
A_pred = torch.sigmoid(torch.matmul(Z,Z.t()))
dense_A = torch.tensor(A.toarray())
l1_diff = torch.norm(A_pred - dense_A, p=1)
print(l1_diff)

tensor(1.6777e+08, grad_fn=<LinalgVectorNormBackward0>)
