In [1]:
import os
os.chdir('..')
os.getcwd()

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

import pickle 
import numpy as np
import os

In [168]:
parent_dir = os.path.abspath('..')
test_file = '/data/cora/cora.pkl'
with open(parent_dir+test_file, 'rb') as f:
    [adj, features, labels] = pickle.load(f)

In [4]:
adj

<2708x2708 sparse matrix of type '<class 'numpy.int64'>'
	with 10556 stored elements in Compressed Sparse Row format>

In [5]:
X = features.todense()

In [6]:
classes = np.argmax(labels, axis=1)

## t-SNE with L2 norm

In [109]:
embedder = TSNE(n_components=2, method='exact', perplexity=30, verbose=1)
X_emb = embedder.fit_transform(X)

[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 1000 / 2708
[t-SNE] Computed conditional probabilities for sample 2000 / 2708
[t-SNE] Computed conditional probabilities for sample 2708 / 2708
[t-SNE] Mean sigma: 0.884479
[t-SNE] KL divergence after 250 iterations with early exaggeration: 137.396054
[t-SNE] KL divergence after 1000 iterations: 3.070219


In [10]:
from util.plot_bokeh_embedding import plot_embedding
from bokeh.io import output_notebook
output_notebook()

In [11]:
target_names = [str(i) for i in range(7)]
named_labels = [target_names[l] for l in classes]

In [110]:
plot_embedding(X_emb, classes, named_labels)

## Cosine distances

In [107]:
from sklearn.manifold import TSNE
embedder = TSNE(n_components=2, metric='cosine', method='exact', perplexity=30, verbose=1)
X_emb_cos = embedder.fit_transform(X)

[t-SNE] Computing pairwise distances...
[t-SNE] Computed conditional probabilities for sample 1000 / 2708
[t-SNE] Computed conditional probabilities for sample 2000 / 2708
[t-SNE] Computed conditional probabilities for sample 2708 / 2708
[t-SNE] Mean sigma: 0.171553
[t-SNE] KL divergence after 250 iterations with early exaggeration: 83.246704
[t-SNE] KL divergence after 1000 iterations: 1.916355


In [108]:
plot_embedding(X_emb_cos, classes, named_labels)

## Spectral embeddings

In [13]:
from sklearn.manifold import SpectralEmbedding

In [14]:
embedding = SpectralEmbedding(n_components=2, affinity='precomputed')

In [15]:
X_spectral = embedding.fit_transform(adj.todense())



In [16]:
plot_embedding(X_spectral, classes, named_labels)

In [64]:
W = adj.toarray()

In [65]:
D = np.sum(W, axis=1)
D.shape

(2708,)

In [66]:
D = np.diag(D)

In [67]:
D.shape

(2708, 2708)

In [138]:
L = D - W

In [147]:
foo = np.trace(np.matmul(np.matmul(X_spectral.T, L), X_spectral))
foo

1.78447981793407e-18

In [149]:
foo = np.trace(np.matmul(np.matmul(X_emb_cos.T, L), X_emb_cos))
foo

28120430.13794901

In [26]:
from scipy.sparse.csgraph import connected_components

In [27]:
n_connected_components, _ = connected_components(W)
n_connected_components

78

In [52]:
import torch
if torch.cuda.is_available():
    dtypeFloat = torch.cuda.FloatTensor
    dtypeLong = torch.cuda.LongTensor
    dtypeDouble = torch.cuda.DoubleTensor
else:
    dtypeFloat = torch.FloatTensor
    dtypeLong = torch.LongTensor
    dtypeDouble = torch.DoubleTensor

In [151]:
def graph_torch_loss(adj, X_emb):
    from scipy.sparse.csgraph import laplacian
    L = laplacian(adj, normed=False, return_diag=False)

    L = torch.from_numpy(L.toarray()).type(dtypeFloat)
    cut = torch.trace(torch.mm(torch.mm(torch.t(X_emb), L), X_emb))
    return cut

In [153]:
X_test = torch.from_numpy(X_emb_cos).type(dtypeFloat)
loss = graph_torch_loss(adj, X_test)
print(loss)

tensor(28120428.)


In [95]:
adj.shape

(2708, 2708)

In [101]:
inputs = torch.from_numpy(X).type(dtypeFloat)

In [104]:
print(inputs.shape)
print(classes.shape)
print(X_emb.shape)
print(adj.shape)

torch.Size([2708, 1433])
(2708,)
(2708, 2)
(2708, 2708)


In [105]:
filename = '/data/cora/cora_train.pkl'
with open(parent_dir+filename, 'wb') as f:
    pickle.dump([inputs, classes, X_emb, adj], f)

In [112]:
adj

<2708x2708 sparse matrix of type '<class 'numpy.int64'>'
	with 10556 stored elements in Compressed Sparse Row format>

In [113]:
import scipy.sparse as sp
test = sp.coo_matrix(adj)  # sparse matrix

In [117]:
boo = adj[random_idx, :][:, random_idx]

In [118]:
boo

<500x500 sparse matrix of type '<class 'numpy.int64'>'
	with 334 stored elements in Compressed Sparse Row format>

In [119]:
test = sp.coo_matrix(boo)  # sparse matrix

In [123]:
test.toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [133]:
torch.set_printoptions(precision=10)
a = torch.FloatTensor(5,5).uniform_()
print(a)

tensor([[0.1551389098, 0.2436388731, 0.5545241833, 0.4926251173, 0.2751442194],
        [0.9644980431, 0.9566310644, 0.1698558331, 0.4392726421, 0.0705533624],
        [0.2522225976, 0.6503455043, 0.5542429090, 0.7338926792, 0.8931249976],
        [0.8218232393, 0.8646702170, 0.8255894184, 0.1104744077, 0.7994009256],
        [0.3116976619, 0.0828239918, 0.3466773629, 0.7330866456, 0.5388040543]])


In [143]:
X_test

tensor([[ 14.4339828491,  11.9211511612],
        [  6.2974553108,   7.0298943520],
        [ -9.4841909409,   9.4321279526],
        ...,
        [-16.3240242004,   6.5191717148],
        [  4.3245906830,  13.3320980072],
        [ 13.4151973724, -15.8165264130]])

In [142]:
X_emb

array([[ 14.433983 ,  11.921151 ],
       [  6.2974553,   7.0298944],
       [ -9.484191 ,   9.432128 ],
       ...,
       [-16.324024 ,   6.5191717],
       [  4.3245907,  13.332098 ],
       [ 13.415197 , -15.816526 ]], dtype=float32)

In [154]:
loss.item()

28120428.0

# Interesting plots

In [222]:
import bokeh.plotting as bp
from bokeh.plotting import show
from bokeh.models.glyphs import Segment

X_emb = X_emb_cos
labels = classes
adj = sp.coo_matrix(adj)  # sparse matrix

# 20 colors
colormap = np.array([
    "#1f77b4", "#aec7e8", "#ff7f0e", "#ffbb78", "#2ca02c",
    "#98df8a", "#d62728", "#ff9896", "#9467bd", "#c5b0d5",
    "#8c564b", "#c49c94", "#e377c2", "#f7b6d2", "#7f7f7f",
    "#c7c7c7", "#bcbd22", "#dbdb8d", "#17becf", "#9edae5"
])

plot_fig = bp.figure(plot_width=1000, plot_height=700,
                     tools="pan, wheel_zoom, box_zoom, reset, hover, previewsave",
                     x_axis_type=None, y_axis_type=None, min_border=1)

data_dict = {'x': X_emb[:, 0],
             'y': X_emb[:, 1],
             'color': colormap[labels],
             'label': named_labels}

mySource = bp.ColumnDataSource(data_dict)

classA = labels[adj.row]
classB = labels[adj.col]
mask = classA==classB
edge_colormask = mask * (classA + 1) - 1

edge_colormask = edge_colormask[mask]
p0 = X_emb[adj.row[mask],:]
p1 = X_emb[adj.col[mask],:]

source = bp.ColumnDataSource(dict(
        x0=p0[:,0],
        y0=p0[:,1],
        x1=p1[:,0],
        y1=p1[:,1],
        color=colormap[edge_colormask]
    )
)

glyph = Segment(x0="x0", y0="y0", x1="x1", y1="y1", line_color="color", line_width=1, line_alpha=0.1)
plot_fig.add_glyph(source, glyph)

plot_fig.circle(x='x', y='y', color='color', legend='label', source=mySource)
plot_fig.legend.location = (0, 70)
new_legend = plot_fig.legend[0]
plot_fig.legend[0].plot = None
plot_fig.add_layout(new_legend, 'right')
plot_fig.legend.label_text_font_size = '7pt'

show(plot_fig)


In [161]:
W = adj.toarray()

In [171]:
adj = sp.coo_matrix(adj)  # sparse matrix

In [198]:
classA = labels[adj.row]
classB = labels[adj.col]

In [200]:
mask = classA==classB

In [212]:
boo = mask * (classA + 1) - 1

In [216]:
# Percentage of intra-class connections
sum(mask) / len(classA) * 100

45.130731337627886

In [220]:
classA[mask].shape

(4764,)