# This notebook compare the different clustering algorithms

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
from utils import *

In [3]:
import os
import pandas as pd
import time

In [4]:
from tqdm import tqdm_notebook

## For a simple SBM graph

In [5]:
paths_to_graphs = ['../data/simple_sbm1/', '../data/simple_sbm2/','../data/complex_sbm1/','../data/complex_sbm2/']

In [6]:
res = []

In [19]:
nb_clusters = 3
number_of_rounds = 1

In [8]:
for round_number in range(0, number_of_rounds):
    for path in paths_to_graphs:
        graphs = os.listdir(path)
        for graph in tqdm_notebook(graphs) : 
            adjency = np.load(path + graph)
            """
            First classical spherical clustering based on Laplacian
            """
            for norm in ['sym', 'rw', None]:
                t1 = time.clock()
                clusters = spectral_clustering(adjency,nb_clusters, laplacian_normalization=norm)
                tot_time = time.clock() - t1
                acc = accuracy_clustering(clusters, 3*[1000])
                res.append({'name' : graph, 'norm': str(norm), 'accuracy': acc, 'time':tot_time, 
                            'input_graph' : 'Laplacian', 'algo' : 'Kmeans', 'source' : path})
            """
            Clustering based on the adjency matrix : 
            - Kmeans
            - Spherical Kmeans
            """
            t1 = time.clock()
            clusters = spherical_clustering_from_adjency(adjency, nb_clusters)
            tot_time = time.clock() - t1
            acc = accuracy_clustering(clusters, 3*[1000])
            res.append({'name' : graph, 'norm': str(None), 'accuracy': acc, 'time':tot_time,
                        'input_graph' : 'Adjency', 'algo' : 'Spherical-Kmeans','source' : path})
            t1 = time.clock()
            clusters = clustering_from_adjency(adjency, nb_clusters)
            tot_time = time.clock() - t1
            acc = accuracy_clustering(clusters, 3*[1000])
            res.append({'name' : graph, 'norm': str(None), 'accuracy': acc, 'time':tot_time,
                        'input_graph' : 'Adjency', 'algo' : 'Kmeans', 'source' : path})
        







In [15]:
final_results = pd.read_json('./res_2_clusters.json').drop('round', axis = 1)

In [7]:
final_results = pd.DataFrame.from_records(res)

NameError: name 'res' is not defined

In [16]:
final_results.head(20)

Unnamed: 0,accuracy,algo,input_graph,name,norm,source,time
0,1.0,Kmeans,Laplacian,matrix_49.npy,sym,../data/simple_sbm1/,0.527315
1,1.0,Kmeans,Laplacian,matrix_49.npy,rw,../data/simple_sbm1/,1.913224
10,1.0,Kmeans,Laplacian,matrix_28.npy,sym,../data/simple_sbm1/,1.816381
100,1.0,Kmeans,Laplacian,matrix_45.npy,sym,../data/simple_sbm1/,0.930318
1000,1.0,Kmeans,Laplacian,matrix_49.npy,sym,../data/simple_sbm1/,0.884655
1001,1.0,Kmeans,Laplacian,matrix_49.npy,rw,../data/simple_sbm1/,2.086665
1002,1.0,Kmeans,Laplacian,matrix_49.npy,,../data/simple_sbm1/,1.460389
1003,0.5,Spherical-Kmeans,Adjency,matrix_49.npy,,../data/simple_sbm1/,2.598611
1004,0.5,Kmeans,Adjency,matrix_49.npy,,../data/simple_sbm1/,0.849183
1005,1.0,Kmeans,Laplacian,matrix_15.npy,sym,../data/simple_sbm1/,1.54906


In [17]:
final_output = final_results.groupby(['algo','input_graph','norm','source']).mean()

In [18]:
final_output

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,accuracy,time
algo,input_graph,norm,source,Unnamed: 4_level_1,Unnamed: 5_level_1
Kmeans,Adjency,,../data/complex_sbm1/,0.725036,0.935084
Kmeans,Adjency,,../data/complex_sbm2/,0.615032,0.938747
Kmeans,Adjency,,../data/simple_sbm1/,0.5,0.892481
Kmeans,Adjency,,../data/simple_sbm2/,0.5,0.937912
Kmeans,Laplacian,,../data/complex_sbm1/,1.0,2.14461
Kmeans,Laplacian,,../data/complex_sbm2/,1.0,2.335604
Kmeans,Laplacian,,../data/simple_sbm1/,1.0,2.152081
Kmeans,Laplacian,,../data/simple_sbm2/,1.0,2.35271
Kmeans,Laplacian,rw,../data/complex_sbm1/,1.0,2.194236
Kmeans,Laplacian,rw,../data/complex_sbm2/,1.0,1.974757


In [24]:
final_output.to_json('res.json')

In [23]:
print final_output.to_latex('res.json')

None


In [19]:
print final_output.to_latex()

\begin{tabular}{llllrr}
\toprule
       &         &      &                       &  accuracy &      time \\
algo & input\_graph & norm & source &           &           \\
\midrule
Kmeans & Adjency & None & ../data/complex\_sbm1/ &  0.725036 &  0.935084 \\
       &         &      & ../data/complex\_sbm2/ &  0.615032 &  0.938747 \\
       &         &      & ../data/simple\_sbm1/ &  0.500000 &  0.892481 \\
       &         &      & ../data/simple\_sbm2/ &  0.500000 &  0.937912 \\
       & Laplacian &      & ../data/complex\_sbm1/ &  1.000000 &  2.144610 \\
       &         &      & ../data/complex\_sbm2/ &  1.000000 &  2.335604 \\
       &         &      & ../data/simple\_sbm1/ &  1.000000 &  2.152081 \\
       &         &      & ../data/simple\_sbm2/ &  1.000000 &  2.352710 \\
       &         & rw & ../data/complex\_sbm1/ &  1.000000 &  2.194236 \\
       &         &      & ../data/complex\_sbm2/ &  1.000000 &  1.974757 \\
       &         &      & ../data/simple\_sbm1/ &  1.000000 &  2

In [7]:
nb_clusters = 2
number_of_rounds = 1

for round_number in range(0, number_of_rounds):
    for path in paths_to_graphs:
        graphs = os.listdir(path)
        for graph in tqdm_notebook(graphs) :
            adjency = np.load(path + graph)
            """
            First classical spherical clustering based on Laplacian
            """
            for norm in ['sym', 'rw', None]:
                t1 = time.clock()
                clusters = spectral_clustering(adjency,nb_clusters, laplacian_normalization=norm)
                tot_time = time.clock() - t1
                acc = accuracy_clustering(clusters, nb_clusters*[1000])
                res.append({'name' : graph, 'norm': str(norm), 'accuracy': acc, 'time':tot_time,
                            'input_graph' : 'Laplacian', 'algo' : 'Kmeans', 'source' : path,
                            'round' : str(round_number)})
            """
            Clustering based on the adjency matrix :
            - Kmeans
            - Spherical Kmeans
            """
            t1 = time.clock()
            clusters = spherical_clustering_from_adjency(adjency, nb_clusters)
            tot_time = time.clock() - t1
            acc = accuracy_clustering(clusters, nb_clusters*[1000])
            res.append({'name' : graph, 'norm': str(None), 'accuracy': acc, 'time':tot_time,
                        'input_graph' : 'Adjency', 'algo' : 'Spherical-Kmeans','source' : path,
                        'round' : str(round_number)})
            t1 = time.clock()
            clusters = clustering_from_adjency(adjency, nb_clusters)
            tot_time = time.clock() - t1
            acc = accuracy_clustering(clusters, nb_clusters*[1000])
            res.append({'name' : graph, 'norm': str(None), 'accuracy': acc, 'time':tot_time,
                        'input_graph' : 'Adjency', 'algo' : 'Kmeans', 'source' : path,
                        'round' : str(round_number)})







In [8]:
f = pd.DataFrame.from_records(res)

In [9]:
f.head()

Unnamed: 0,accuracy,algo,input_graph,name,norm,round,source,time
0,1.0,Kmeans,Laplacian,matrix_49.npy,sym,0,../data/simple_sbm1/,0.480049
1,1.0,Kmeans,Laplacian,matrix_49.npy,rw,0,../data/simple_sbm1/,1.902022
2,1.0,Kmeans,Laplacian,matrix_49.npy,,0,../data/simple_sbm1/,1.889271
3,0.5,Spherical-Kmeans,Adjency,matrix_49.npy,,0,../data/simple_sbm1/,2.44993
4,0.5,Kmeans,Adjency,matrix_49.npy,,0,../data/simple_sbm1/,0.689414
