In [1]:
import os
os.chdir('..')
os.getcwd()

'/home/leowyaoyang/fyp-graph-clustering'

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

import pickle 
import numpy as np
import pandas as pd
import torch
import os
from timeit import default_timer as timer

In [3]:
from core.GraphConvNet2 import GraphConvNet2
from core.SimpleNet import SimpleNet
from core.DataEmbeddingGraph import DataEmbeddingGraph

In [4]:
if torch.cuda.is_available():
    print('cuda available')
    device = 'cuda'
else:
    print('cuda not available')
    device = 'cpu'

cuda available


In [5]:
parent_dir = os.path.abspath('..')
test_file = '/data/mnist/mnist_test_tsne.pkl'
with open(parent_dir+test_file, 'rb') as f:
    [inputs, labels, X_emb] = pickle.load(f)

In [6]:
print(inputs.shape)
print(len(labels))
print(X_emb.shape)

torch.Size([3000, 1, 28, 28])
3000
(3000, 2)


In [7]:
G = DataEmbeddingGraph(inputs, [], None)

In [8]:
net_parameters = {}
net_parameters['n_components'] = 2
net_parameters['D'] = 784 # input dimension
net_parameters['H'] = 50 # number of hidden units
net_parameters['L'] = 10 # number of hidden layers

In [9]:
# Simple net
net = SimpleNet(net_parameters)
if torch.cuda.is_available(): net.cuda()
root = 'results/mnist_5/'
filename = root + 'simple_net_800.pkl'
checkpoint = torch.load(filename, map_location=device)
net.load_state_dict(checkpoint['state_dict'])

In [10]:
# Net
if torch.cuda.is_available():   
    y_pred = net.forward(G).cpu().detach().numpy()
else:    
    y_pred = net.forward(G).detach().numpy()

In [11]:
from util.plot_bokeh_embedding import plot_embedding

In [12]:
target_names = [str(i) for i in range(10)]
named_labels = [target_names[l] for l in labels]

In [13]:
from bokeh.io import output_notebook
output_notebook()

In [14]:
# Simple net
plot_embedding(y_pred, labels, named_labels)

In [15]:
# tSNE
plot_embedding(X_emb, labels, named_labels)

In [16]:
# Graph net
net = GraphConvNet2(net_parameters)
if torch.cuda.is_available(): net.cuda()
root = 'results/mnist_6/'
filename = root + 'graph_net_800.pkl'
checkpoint = torch.load(filename, map_location=device)
net.load_state_dict(checkpoint['state_dict'])

In [17]:
# Net
y_pred_new = np.zeros((inputs.shape[0], 2))
for i in range(0, inputs.shape[0], 1000):
    G_small = DataEmbeddingGraph(inputs[i:i+1000], [], None)
    if torch.cuda.is_available():   
        y_pred_new[i:i+1000] = net.forward(G_small).cpu().detach().numpy()
    else:    
        y_pred_new[i:i+1000] = net.forward(G_small).detach().numpy()

In [18]:
# Graph net
plot_embedding(y_pred_new, labels, named_labels)

In [25]:
from core.OldGraphConvNet2 import OldGraphConvNet2 
# Graph net
net = OldGraphConvNet2(net_parameters)
if torch.cuda.is_available(): net.cuda()
root = 'results/mnist_7/'
filename = root + 'graph_net_800.pkl'
checkpoint = torch.load(filename, map_location=device)
net.load_state_dict(checkpoint['state_dict'])

In [26]:
# Net
y_pred_new_2 = np.zeros((inputs.shape[0], 2))
for i in range(0, inputs.shape[0], 1000):
    G_small = DataEmbeddingGraph(inputs[i:i+1000], [], None)
    if torch.cuda.is_available():   
        y_pred_new_2[i:i+1000] = net.forward(G_small).cpu().detach().numpy()
    else:    
        y_pred_new_2[i:i+1000] = net.forward(G_small).detach().numpy()

In [27]:
# Graph net
plot_embedding(y_pred_new_2, labels, named_labels)

In [19]:
from util.evaluation_metrics import nearest_neighbours_generalisation_accuracy
from sklearn.manifold.t_sne import trustworthiness

In [20]:
X_data = G.data.view(G.data.shape[0], -1).numpy()

In [21]:
X_data.shape

(3000, 784)

In [22]:
X_to_compare = y_pred
labels_to_compare = labels
one_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=1)
five_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=5)
trust = trustworthiness(X_data, X_to_compare, n_neighbors=12)
print(one_nn)
print(five_nn)
print(trust)

0.6073333333333333
0.6663333333333333
0.8903155477295173


In [23]:
X_to_compare = y_pred_new
labels_to_compare = labels
one_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=1)
five_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=5)
trust = trustworthiness(X_data, X_to_compare, n_neighbors=12)
print(one_nn)
print(five_nn)
print(trust)

0.6216666666666668
0.6973333333333332
0.882898690070248


In [24]:
X_to_compare = X_emb
labels_to_compare = labels
one_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=1)
five_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=5)
trust = trustworthiness(X_data, X_to_compare, n_neighbors=12)
print(one_nn)
print(five_nn)
print(trust)

0.9256666666666666
0.9189999999999999
0.9719406432258185


In [28]:
X_to_compare = y_pred_new_2
labels_to_compare = labels
one_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=1)
five_nn = nearest_neighbours_generalisation_accuracy(X_to_compare, labels_to_compare, n_neighbors=5)
trust = trustworthiness(X_data, X_to_compare, n_neighbors=12)
print(one_nn)
print(five_nn)
print(trust)

0.6686666666666666
0.7236666666666667
0.8870274004509289
