In [1]:
import sys
sys.path.append('../scripts/')
from data import Dataset
import numpy as np
from utils import spectral_clustering
from metrics import purity_score, nmi_score, ri_score
%reload_ext autoreload
%autoreload 2   
#!pip install tabulate
from tabulate import tabulate

In [2]:
def clustering_spectral(dataset,verbose=True): 
    dataset = Dataset(dataset, preprocess=False)
    W = dataset.MLG
    nb_layers = len(W)
    k = np.unique(dataset.labels).shape[0]
    print("number of clusers ",k)
    true_labels = dataset.labels
    N = len(true_labels)
    ground_truth_clustering = {i: true_labels[i] for i in range(N)}
    results = []
    for layer_id in range(nb_layers):
        print(f"Layer {layer_id}")
        labels = spectral_clustering(W[layer_id],k)
        clustering = {i: labels[i] for i in range(N)}
        purity = purity_score(clustering, ground_truth_clustering)
        nmi = nmi_score(clustering, ground_truth_clustering)
        ri = ri_score(clustering, ground_truth_clustering)
        if verbose:
            print("purity ",purity)
            print("nmi ",nmi)
            print("ri ",ri)
        results.append([f"Layer {layer_id}", purity, nmi, ri])
        
    headers = ["Layer", "Purity", "NMI", "RI"]
    print(tabulate(results, headers=headers, tablefmt="grid"))
            
 


# Aucs

In [5]:
clustering_spectral("AUCS",verbose=False)

number of clusers  7
Layer 0
Layer 1
Layer 2
Layer 3
Layer 4
+---------+----------+----------+----------+
| Layer   |   Purity |      NMI |       RI |
| Layer 0 | 0.87037  | 0.848737 | 0.937107 |
+---------+----------+----------+----------+
| Layer 1 | 0.537037 | 0.449799 | 0.704403 |
+---------+----------+----------+----------+
| Layer 2 | 0.62963  | 0.537216 | 0.792453 |
+---------+----------+----------+----------+
| Layer 3 | 0.777778 | 0.739481 | 0.88819  |
+---------+----------+----------+----------+
| Layer 4 | 0.481481 | 0.391529 | 0.654787 |
+---------+----------+----------+----------+


# MIT 

In [7]:
clustering_spectral("MIT",verbose=False)

number of clusers  7
Layer 0
Layer 1
Layer 2
+---------+----------+----------+----------+
| Layer   |   Purity |      NMI |       RI |
| Layer 0 | 0.588889 | 0.347634 | 0.693633 |
+---------+----------+----------+----------+
| Layer 1 | 0.5      | 0.180501 | 0.414232 |
+---------+----------+----------+----------+
| Layer 2 | 0.588889 | 0.295921 | 0.64819  |
+---------+----------+----------+----------+


# Cora

In [9]:
clustering_spectral("Cora",verbose=False)

number of clusers  3
Layer 0
Layer 1
Layer 2
+---------+----------+------------+----------+
| Layer   |   Purity |        NMI |       RI |
| Layer 0 | 0.521643 | 0.00723857 | 0.40495  |
+---------+----------+------------+----------+
| Layer 1 | 0.609323 | 0.236837   | 0.523628 |
+---------+----------+------------+----------+
| Layer 2 | 0.709212 | 0.364078   | 0.631807 |
+---------+----------+------------+----------+


# Cora small

In [10]:
dataset = Dataset("Cora", extended=False, preprocess=False)
W = dataset.MLG
nb_layers = len(W)
k = np.unique(dataset.labels).shape[0]
print("number of clusers ",k)
true_labels = dataset.labels
N = len(true_labels)
ground_truth_clustering = {i: true_labels[i] for i in range(N)}

results = []
for layer_id in range(nb_layers):
    print(f"Layer {layer_id}")
    labels = spectral_clustering(W[layer_id],k)
    clustering = {i: labels[i] for i in range(N)}
    purity = purity_score(clustering, ground_truth_clustering)
    nmi = nmi_score(clustering, ground_truth_clustering)
    ri = ri_score(clustering, ground_truth_clustering)
    results.append([f"Layer {layer_id}", purity, nmi, ri])
    
headers = ["Layer", "Purity", "NMI", "RI"]
print(tabulate(results, headers=headers, tablefmt="grid"))

number of clusers  3
Layer 0
Layer 1
Layer 2
+---------+----------+-----------+----------+
| Layer   |   Purity |       NMI |       RI |
| Layer 0 | 0.444444 | 0.0846439 | 0.372784 |
+---------+----------+-----------+----------+
| Layer 1 | 0.722222 | 0.390399  | 0.666167 |
+---------+----------+-----------+----------+
| Layer 2 | 0.455556 | 0.104469  | 0.480899 |
+---------+----------+-----------+----------+
