In [1]:
import scipy as sp
import numpy as np
import time

try:
    from localgraphclustering import *
except:
    # when the package is not installed, import the local version instead. 
    # the notebook must be placed in the original "notebooks/" folder
    sys.path.append("../")
    from localgraphclustering import * 

import time

import networkx as nx

import random

import statistics as stat_

## Load data

In [2]:
g = GraphLocal('../datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml','graphml',' ')


Loading a graphml is not efficient, we suggest using an edgelist format for this API.



## TEMP

In [3]:
G = nx.read_graphml('../datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml')

In [4]:
# groups = np.loadtxt('./datasets/ppi_mips.class', dtype = 'float')
groups = np.loadtxt('../datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh_ground_truth.csv', dtype = 'str')

groups_by_id = dict()

for node in groups:
    groups_by_id[node[0]] = node[1]

ids_clusters = set()

for node in groups:
    ids_clusters.add(node[1])
    
ids_clusters = list(ids_clusters)

ground_truth_clusters_by_id = dict()

for node in groups:
    ground_truth_clusters_by_id[node[1]] =  []
    
for node in groups:
    ground_truth_clusters_by_id[node[1]].append(node[0])
    
ground_truth_clusters_by_number = dict()

for node in groups:
    ground_truth_clusters_by_number[node[1]] =  []

counter = 0
for node in G.node:
    
    if node == '1.0':
        counter += 1
        continue
    
    what_group = groups_by_id[node]
    ground_truth_clusters_by_number[what_group].append(counter)
    counter += 1
    
all_clusters = []
    
counter = 0
for cluster_id in ground_truth_clusters_by_number:
    
    cluster = ground_truth_clusters_by_number[cluster_id]
    
    if len(cluster) == 1 or len(cluster) == 0:
        counter += 1
        continue

    cond = g.compute_conductance(cluster)
    counter += 1
    
    if cond <= 0.57 and len(cluster) >= 10:
        print("Id: ", cluster_id)
        print("Cluster: ", counter, " conductance: ", cond, "Size: ", len(cluster))
        all_clusters.append(cluster)

Id:  AMP
Cluster:  6  conductance:  0.5607205113306217 Size:  28
Id:  urease.0
Cluster:  29  conductance:  0.42254370102471367 Size:  100


## Collect data for ACL (with rounding)

In [5]:
nodes = {}
external_best_cond_acl = {}
external_best_pre_cond_acl = {}
vol_best_cond_acl = {}
vol_best_pre_acl = {}
size_clust_best_cond_acl = {}
size_clust_best_pre_acl = {}
f1score_best_cond_acl = {}
f1score_best_pre_acl = {}
true_positives_best_cond_acl = {}
true_positives_best_pre_acl = {}
precision_best_cond_acl = {}
precision_best_pre_acl = {}
recall_best_cond_acl = {}
recall_best_pre_acl = {}
cuts_best_cond_acl = {}
cuts_best_pre_acl = {}
cuts_acl_ALL = {}

ct_outer = 0

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    ct = 0
    
    start = time.time()
    
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "acl", rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            cuts_acl_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_acl_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_acl_ = set(rr).intersection(S)
            if len(true_positives_acl_) == 0:
                true_positives_acl_ = set(ref_node)
                vol_ = g.d[ref_node][0,0]
            precision = sum(g.d[np.array(list(true_positives_acl_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_acl_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_pre_acl[ct_outer,node] = vol_
                
                size_clust_best_pre_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_pre_acl[ct_outer,node] = true_positives_acl_
                precision_best_pre_acl[ct_outer,node] = precision
                recall_best_pre_acl[ct_outer,node] = recall
                f1score_best_pre_acl[ct_outer,node] = f1_score_
                
                cuts_best_pre_acl[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_cond_acl[ct_outer,node] = vol_
                
                size_clust_best_cond_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_cond_acl[ct_outer,node] = true_positives_acl_
                precision_best_cond_acl[ct_outer,node] = precision
                recall_best_cond_acl[ct_outer,node] = recall
                f1score_best_cond_acl[ct_outer,node] = f1_score_
                
                cuts_best_cond_acl[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_acl[ct_outer,node], 'f1score: ', f1score_best_cond_acl[ct_outer,node], 'precision: ', precision_best_cond_acl[ct_outer,node], 'recall: ', recall_best_cond_acl[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time ACL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  215  completed:  0.0  degree:  65.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  195  completed:  0.03571428571428571  degree:  76.0
conductance:  0.43638525564803804 f1score:  0.8110273327049954 precision:  0.6821244550138724 recall:  1.0
outer: 0 number of node:  197  completed:  0.07142857142857142  degree:  52.0
conductance:  0.4329023629068212 f1score:  0.8683148335015136 precision:  0.7672759696834597 recall:  1.0
outer: 0 number of node:  198  completed:  0.10714285714285714  degree:  52.0
conductance:  0.4329023629068212 f1score:  0.8683148335015136 precision:  0.7672759696834597 recall:  1.0
outer: 0 number of node:  216  completed:  0.14285714285714285  degree:  72.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  202  completed:  0.17857142857142858  degree:  54.0
conductance:  0.

outer: 1 number of node:  112  completed:  0.18  degree:  164.0
conductance:  0.4508435136707388 f1score:  0.6907585168454733 precision:  0.7026613057629715 recall:  0.6792522672589302
outer: 1 number of node:  91  completed:  0.19  degree:  155.0
conductance:  0.4413695456300682 f1score:  0.8588224199176385 precision:  0.8755287783617485 recall:  0.8427416867172558
outer: 1 number of node:  62  completed:  0.2  degree:  160.0
conductance:  0.44914816726897266 f1score:  0.7207484381376951 precision:  0.7337637432881616 recall:  0.7081868097970263
outer: 1 number of node:  97  completed:  0.21  degree:  163.0
conductance:  0.42488888888888887 f1score:  0.7188185990820195 precision:  0.5610591900311527 recall:  1.0
outer: 1 number of node:  72  completed:  0.22  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.7173552255980173 precision:  0.5592781726588917 recall:  1.0
outer: 1 number of node:  116  completed:  0.23  degree:  156.0
conductance:  0.4406714009038089 f1score:  0

outer: 1 number of node:  89  completed:  0.66  degree:  149.0
conductance:  0.4370341814443588 f1score:  0.76488909863143 precision:  0.7804314329738059 recall:  0.7499537294095873
outer: 1 number of node:  117  completed:  0.67  degree:  163.0
conductance:  0.46531586758727495 f1score:  0.6766542610231502 precision:  0.6921984900303284 recall:  0.6617928311431921
outer: 1 number of node:  77  completed:  0.68  degree:  167.0
conductance:  0.44236902050113897 f1score:  0.7179430393763565 precision:  0.5599930903437554 recall:  1.0
outer: 1 number of node:  17  completed:  0.69  degree:  164.0
conductance:  0.46182519280205653 f1score:  0.6843148981711733 precision:  0.6985861182519281 recall:  0.6706150903818866
outer: 1 number of node:  88  completed:  0.7  degree:  161.0
conductance:  0.4126234435379991 f1score:  0.7200799644602399 precision:  0.5625976189649786 recall:  1.0
outer: 1 number of node:  48  completed:  0.71  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.7

## Performance of ACL (with rounding).

In [6]:
all_data = []
xlabels_ = []

print('Results for ACL with rounding')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_acl[i,j])
        temp_rec.append(recall_best_cond_acl[i,j])
        temp_f1.append(f1score_best_cond_acl[i,j])
        temp_conductance.append(external_best_cond_acl[i,j])
    
    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))

np.save('results/f1score_best_pre_acl_ORKUT', f1score_best_pre_acl) 
np.save('results/precision_best_pre_acl_ORKUT', precision_best_pre_acl) 
np.save('results/recall_best_pre_acl_ORKUT', recall_best_pre_acl) 

Results for ACL with rounding
Feature: 0 Precision 0.7616010784971015 Recall 1.0 F1 0.8639590510056822 Cond. 0.4298763868141641
Feature: 1 Precision 0.6213475956513441 Recall 0.869310876673453 F1 0.7087703363309276 Cond. 0.4418880201742084


## Collect data for l1-reg. PR (with rounding)

In [7]:
nodes = {}
external_best_cond_acl = {}
external_best_pre_cond_acl = {}
vol_best_cond_acl = {}
vol_best_pre_acl = {}
size_clust_best_cond_acl = {}
size_clust_best_pre_acl = {}
f1score_best_cond_acl = {}
f1score_best_pre_acl = {}
true_positives_best_cond_acl = {}
true_positives_best_pre_acl = {}
precision_best_cond_acl = {}
precision_best_pre_acl = {}
recall_best_cond_acl = {}
recall_best_pre_acl = {}
cuts_best_cond_acl = {}
cuts_best_pre_acl = {}
cuts_acl_ALL = {}

ct_outer = 0

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    ct = 0
    
    start = time.time()
    
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "l1reg-rand", epsilon=1.0e-2, rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            cuts_acl_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_acl_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_acl_ = set(rr).intersection(S)
            if len(true_positives_acl_) == 0:
                true_positives_acl_ = set(ref_node)
                vol_ = g.d[ref_node][0,0]
            precision = sum(g.d[np.array(list(true_positives_acl_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_acl_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_pre_acl[ct_outer,node] = vol_
                
                size_clust_best_pre_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_pre_acl[ct_outer,node] = true_positives_acl_
                precision_best_pre_acl[ct_outer,node] = precision
                recall_best_pre_acl[ct_outer,node] = recall
                f1score_best_pre_acl[ct_outer,node] = f1_score_
                
                cuts_best_pre_acl[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_cond_acl[ct_outer,node] = vol_
                
                size_clust_best_cond_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_cond_acl[ct_outer,node] = true_positives_acl_
                precision_best_cond_acl[ct_outer,node] = precision
                recall_best_cond_acl[ct_outer,node] = recall
                f1score_best_cond_acl[ct_outer,node] = f1_score_
                
                cuts_best_cond_acl[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_acl[ct_outer,node], 'f1score: ', f1score_best_cond_acl[ct_outer,node], 'precision: ', precision_best_cond_acl[ct_outer,node], 'recall: ', recall_best_cond_acl[ct_outer,node])
        ct += 1

    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time l1-reg. with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  208  completed:  0.0  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  202  completed:  0.03571428571428571  degree:  54.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  213  completed:  0.07142857142857142  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  216  completed:  0.10714285714285714  degree:  72.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  195  completed:  0.14285714285714285  degree:  76.0
conductance:  0.43638525564803804 f1score:  0.8110273327049954 precision:  0.6821244550138724 recall:  1.0
outer: 0 number of node:  160  completed:  0.17857142857142858  degree:  66.0
conductance:  0.

outer: 1 number of node:  65  completed:  0.24  degree:  165.0
conductance:  0.45503701319600903 f1score:  0.7919783743006223 precision:  0.8073053508490868 recall:  0.777222530692825
outer: 1 number of node:  85  completed:  0.25  degree:  144.0
conductance:  0.412263787119167 f1score:  0.9098750275443069 precision:  0.928911171101684 recall:  0.8916034301931026
outer: 1 number of node:  64  completed:  0.26  degree:  167.0
conductance:  0.44236902050113897 f1score:  0.7179430393763565 precision:  0.5599930903437554 recall:  1.0
outer: 1 number of node:  114  completed:  0.27  degree:  163.0
conductance:  0.45672952725393956 f1score:  0.7814216224697944 precision:  0.7952222789984671 recall:  0.7680918008513788
outer: 1 number of node:  130  completed:  0.28  degree:  170.0
conductance:  0.4683935820606998 f1score:  0.7535300050428644 precision:  0.770281590308654 recall:  0.7374915170584243
outer: 1 number of node:  81  completed:  0.29  degree:  160.0
conductance:  0.443700077299665

outer: 1 number of node:  80  completed:  0.75  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.7173552255980173 precision:  0.5592781726588917 recall:  1.0
outer: 1 number of node:  52  completed:  0.76  degree:  159.0
conductance:  0.4385805277525023 f1score:  0.7179907421762529 precision:  0.5600511367562712 recall:  1.0
outer: 1 number of node:  20  completed:  0.77  degree:  181.0
conductance:  0.49607515120319134 f1score:  0.6596957576139334 precision:  0.6738514991635568 recall:  0.6461225245234129
outer: 1 number of node:  84  completed:  0.78  degree:  157.0
conductance:  0.42488888888888887 f1score:  0.7188185990820195 precision:  0.5610591900311527 recall:  1.0
outer: 1 number of node:  103  completed:  0.79  degree:  150.0
conductance:  0.4158541754980924 f1score:  0.7205601244721048 precision:  0.5631840450297071 recall:  1.0
outer: 1 number of node:  51  completed:  0.8  degree:  157.0
conductance:  0.4126234435379991 f1score:  0.7200799644602399 precision:  0

## Performance of l1-reg. PR (with rounding).

In [8]:
all_data = []
xlabels_ = []

print('Results for l1-reg with rounding')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_acl[i,j])
        temp_rec.append(recall_best_cond_acl[i,j])
        temp_f1.append(f1score_best_cond_acl[i,j])
        temp_conductance.append(external_best_cond_acl[i,j])
    
    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))



Results for l1-reg with rounding
Feature: 0 Precision 0.7616010784971015 Recall 1.0 F1 0.8639590510056822 Cond. 0.4298763868141641
Feature: 1 Precision 0.6756571753039263 Recall 0.9029434264914553 F1 0.7552208139349571 Cond. 0.43918351578430237


## Function for seed set expansion using BFS

In [9]:
import queue
def seed_grow_bfs_steps(g,seeds,steps,vol_target,target_cluster):
    """
    grow the initial seed set through BFS until its size reaches 
    a given ratio of the total number of nodes.
    """
    Q = queue.Queue()
    visited = np.zeros(g._num_vertices)
    visited[seeds] = 1
    for s in seeds:
        Q.put(s)
    if isinstance(seeds,np.ndarray):
        seeds = seeds.tolist()
    else:
        seeds = list(seeds)
    for step in range(steps):
        for k in range(Q.qsize()):
            node = Q.get()
            si,ei = g.adjacency_matrix.indptr[node],g.adjacency_matrix.indptr[node+1]
            neighs = g.adjacency_matrix.indices[si:ei]
            for i in range(len(neighs)):
                if visited[neighs[i]] == 0:
                    visited[neighs[i]] = 1
                    seeds.append(neighs[i])
                    Q.put(neighs[i])
                    
                    vol_seeds = np.sum(g.d[seeds])
                    vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
                    sigma = vol_target_intersection_input/vol_target
                    
                    if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
                        break
                 
            vol_seeds = np.sum(g.d[seeds])
            vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
            sigma = vol_target_intersection_input/vol_target   
            
            if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
                break
               
        vol_seeds = np.sum(g.d[seeds])
        vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
        sigma = vol_target_intersection_input/vol_target
                
        if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
            break
    return seeds

## Collect data for seed set expansion + FlowImprove, try a lot of parameters

In [10]:
nodes = {}
external_best_cond_flBFS = {}
external_best_pre_cond_flBFS = {}
vol_best_cond_flBFS = {}
vol_best_pre_flBFS = {}
size_clust_best_cond_flBFS = {}
size_clust_best_pre_flBFS = {}
f1score_best_cond_flBFS = {}
f1score_best_pre_flBFS = {}
true_positives_best_cond_flBFS = {}
true_positives_best_pre_flBFS = {}
precision_best_cond_flBFS = {}
precision_best_pre_flBFS = {}
recall_best_cond_flBFS = {}
recall_best_pre_flBFS = {}
cuts_best_cond_flBFS = {}
cuts_best_pre_flBFS = {}
cuts_flBFS_ALL = {}

ct_outer = 0

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    n_step = 24
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    
    start = time.time()
    
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
                
        seeds = seed_grow_bfs_steps(g,[node],g._num_vertices,vol_target,rr)

        vol_input = np.sum(g.d[seeds])

        vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(seeds))])

        vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(seeds)))])

        gamma = vol_input/vol_graph_minus_input
                
        sigma = max(vol_target_intersection_input/vol_target,gamma)
        
        delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)
                        
        S = flow_clustering(g,seeds,method="sl",delta=delta)[0]
        number_experiments += 1

        cuts_flBFS_ALL[ct_outer,node] = S

        size_clust_flBFS_ = len(S)

        cond_val_l1pr = g.compute_conductance(S)

        vol_ = sum(g.d[S])
        true_positives_flBFS_ = set(rr).intersection(S)
        if len(true_positives_flBFS_) == 0:
            true_positives_flBFS_ = set(ref_node)
            vol_ = g.d[ref_node][0]
        precision = sum(g.d[np.array(list(true_positives_flBFS_))])/vol_
        recall = sum(g.d[np.array(list(true_positives_flBFS_))])/sum(g.d[rr])
        f1_score_ = 2*(precision*recall)/(precision + recall)

        if f1_score_ >= max_precision:

            max_precision = f1_score_

            external_best_pre_cond_flBFS[ct_outer,node] = cond_val_l1pr
            vol_best_pre_flBFS[ct_outer,node] = vol_

            size_clust_best_pre_flBFS[ct_outer,node] = size_clust_flBFS_
            true_positives_best_pre_flBFS[ct_outer,node] = true_positives_flBFS_
            precision_best_pre_flBFS[ct_outer,node] = precision
            recall_best_pre_flBFS[ct_outer,node] = recall
            f1score_best_pre_flBFS[ct_outer,node] = f1_score_

            cuts_best_pre_flBFS[ct_outer,node] = S

        if cond_val_l1pr <= min_conduct:

            min_conduct = cond_val_l1pr

            external_best_cond_flBFS[ct_outer,node] = cond_val_l1pr
            vol_best_cond_flBFS[ct_outer,node] = vol_

            size_clust_best_cond_flBFS[ct_outer,node] = size_clust_flBFS_
            true_positives_best_cond_flBFS[ct_outer,node] = true_positives_flBFS_
            precision_best_cond_flBFS[ct_outer,node] = precision
            recall_best_cond_flBFS[ct_outer,node] = recall
            f1score_best_cond_flBFS[ct_outer,node] = f1_score_

            cuts_best_cond_flBFS[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_flBFS[ct_outer,node], 'f1score: ', f1score_best_cond_flBFS[ct_outer,node], 'precision: ', precision_best_cond_flBFS[ct_outer,node], 'recall: ', recall_best_cond_flBFS[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time BFS+SL: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  206  completed:  0.0  degree:  62.0
conductance:  0.6493030080704328 f1score:  0.8657587548638133 precision:  0.979457079970653 recall:  0.7757117954677513
outer: 0 number of node:  200  completed:  0.03571428571428571  degree:  45.0
conductance:  0.6539325842696629 f1score:  0.8736910994764397 precision:  1.0 recall:  0.7757117954677513
outer: 0 number of node:  160  completed:  0.07142857142857142  degree:  66.0
conductance:  0.6539325842696629 f1score:  0.8736910994764397 precision:  1.0 recall:  0.7757117954677513
outer: 0 number of node:  204  completed:  0.10714285714285714  degree:  52.0
conductance:  0.6539325842696629 f1score:  0.8736910994764397 precision:  1.0 recall:  0.7757117954677513
outer: 0 number of node:  214  completed:  0.14285714285714285  degree:  67.0
conductance:  0.6570155902004454 f1score:  0.878096479791395 precision:  1.0 recall:  0.7826844857640907
outer: 0 number of node:  218  completed:  0.17857142857142858  degree:  52.0
co

outer: 1 number of node:  67  completed:  0.16  degree:  166.0
conductance:  0.7102953899394389 f1score:  0.37292181069958846 precision:  0.5600049437646768 recall:  0.27953606021346167
outer: 1 number of node:  61  completed:  0.17  degree:  155.0
conductance:  0.7294538029606943 f1score:  0.4573092118943647 precision:  0.7016334864726902 recall:  0.3391942747856129
outer: 1 number of node:  114  completed:  0.18  degree:  163.0
conductance:  0.7344770195877609 f1score:  0.44462947543713566 precision:  0.68365126104212 recall:  0.3294466037386637
outer: 1 number of node:  119  completed:  0.19  degree:  178.0
conductance:  0.7059480938780579 f1score:  0.34844612975022665 precision:  0.5248975537066931 recall:  0.26078104756616693
outer: 1 number of node:  81  completed:  0.2  degree:  160.0
conductance:  0.7371794871794872 f1score:  0.430838435586655 precision:  0.6630769230769231 recall:  0.31908199148621136
outer: 1 number of node:  74  completed:  0.21  degree:  163.0
conductance: 

outer: 1 number of node:  46  completed:  0.61  degree:  161.0
conductance:  0.7377132671250318 f1score:  0.4165731621161119 precision:  0.6381461675579323 recall:  0.30921093219816154
outer: 1 number of node:  99  completed:  0.62  degree:  157.0
conductance:  0.7254452926208651 f1score:  0.46989904025925455 precision:  0.7194656488549618 recall:  0.34888025171201187
outer: 1 number of node:  100  completed:  0.63  degree:  163.0
conductance:  0.7361288672973664 f1score:  0.4444259498148225 precision:  0.6826898491434416 recall:  0.3294466037386637
outer: 1 number of node:  89  completed:  0.64  degree:  149.0
conductance:  0.7251269035532995 f1score:  0.4826269251525592 precision:  0.7376903553299492 recall:  0.35862792275896105
outer: 1 number of node:  64  completed:  0.65  degree:  167.0
conductance:  0.709068167604753 f1score:  0.400512312014543 precision:  0.606253908692933 recall:  0.2990314023073601
outer: 1 number of node:  50  completed:  0.66  degree:  171.0
conductance:  0

## Performance of BFS+FlowImp.

In [11]:
all_data = []
xlabels_ = []

print('Results for BFS+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_flBFS[i,j])
        temp_rec.append(recall_best_cond_flBFS[i,j])
        temp_f1.append(f1score_best_cond_flBFS[i,j])
        temp_conductance.append(external_best_cond_flBFS[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))



Results for BFS+SL
Feature: 0 Precision 0.983088390480847 Recall 0.7768739105171412 F1 0.8671746612477108 Cond. 0.6527827968252733
Feature: 1 Precision 0.6497698357105165 Recall 0.318420630513912 F1 0.4273749111275312 Cond. 0.7208422960860279


## Collect data for APPR+SL

In [12]:
nodes = {}
external_best_cond_apprSL = {}
external_best_pre_cond_apprSL = {}
vol_best_cond_apprSL = {}
vol_best_pre_apprSL = {}
size_clust_best_cond_apprSL = {}
size_clust_best_pre_apprSL = {}
f1score_best_cond_apprSL = {}
f1score_best_pre_apprSL = {}
true_positives_best_cond_apprSL = {}
true_positives_best_pre_apprSL = {}
precision_best_cond_apprSL = {}
precision_best_pre_apprSL = {}
recall_best_cond_apprSL = {}
recall_best_pre_apprSL = {}
cuts_best_cond_apprSL = {}
cuts_best_pre_apprSL = {}
cuts_apprSL_ALL = {}

ct_outer = 0

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    
    start = time.time()
    
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "acl", rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            vol_input = np.sum(g.d[S])

            vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(S))])

            vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(S)))])

            gamma = vol_input/vol_graph_minus_input

            sigma = max(vol_target_intersection_input/vol_target,gamma)

            delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)

            S = flow_clustering(g,S,method="sl",delta=delta)[0]
            
            cuts_apprSL_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_apprSL_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_apprSL_ = set(rr).intersection(S)
            if len(true_positives_apprSL_) == 0:
                true_positives_apprSL_ = set(ref_node)
                vol_ = g.d[ref_node][0]
            precision = sum(g.d[np.array(list(true_positives_apprSL_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_apprSL_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_apprSL[ct_outer,node] = cond_val_l1pr
                vol_best_pre_apprSL[ct_outer,node] = vol_
                
                size_clust_best_pre_apprSL[ct_outer,node] = size_clust_apprSL_
                true_positives_best_pre_apprSL[ct_outer,node] = true_positives_apprSL_
                precision_best_pre_apprSL[ct_outer,node] = precision
                recall_best_pre_apprSL[ct_outer,node] = recall
                f1score_best_pre_apprSL[ct_outer,node] = f1_score_
                
                cuts_best_pre_apprSL[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_apprSL[ct_outer,node] = cond_val_l1pr
                vol_best_cond_apprSL[ct_outer,node] = vol_
                
                size_clust_best_cond_apprSL[ct_outer,node] = size_clust_apprSL_
                true_positives_best_cond_apprSL[ct_outer,node] = true_positives_apprSL_
                precision_best_cond_apprSL[ct_outer,node] = precision
                recall_best_cond_apprSL[ct_outer,node] = recall
                f1score_best_cond_apprSL[ct_outer,node] = f1_score_
                
                cuts_best_cond_apprSL[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_apprSL[ct_outer,node], 'f1score: ', f1score_best_cond_apprSL[ct_outer,node], 'precision: ', precision_best_cond_apprSL[ct_outer,node], 'recall: ', recall_best_cond_apprSL[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time APPR+SL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  160  completed:  0.0  degree:  66.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0



divide by zero encountered in double_scalars



outer: 0 number of node:  201  completed:  0.03571428571428571  degree:  63.0
conductance:  0.4411492122335496 f1score:  0.8873420984789894 precision:  0.7974976830398517 recall:  1.0
outer: 0 number of node:  200  completed:  0.07142857142857142  degree:  45.0
conductance:  0.4935930292157868 f1score:  0.9373638344226579 precision:  0.8821117375704767 recall:  1.0
outer: 0 number of node:  213  completed:  0.10714285714285714  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  216  completed:  0.14285714285714285  degree:  72.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  203  completed:  0.17857142857142858  degree:  53.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  208  completed:  0.21428571428571427  degree:  63.0
cond

outer: 1 number of node:  132  completed:  0.19  degree:  162.0
conductance:  0.4126234435379991 f1score:  0.019791094007696537 precision:  1.0 recall:  0.009994447529150472
outer: 1 number of node:  89  completed:  0.2  degree:  149.0
conductance:  0.4355987055016181 f1score:  0.7679332891121009 precision:  0.7867961165048544 recall:  0.7499537294095873
outer: 1 number of node:  43  completed:  0.21  degree:  151.0
conductance:  0.4375885611232771 f1score:  0.7650858673389003 precision:  0.7819142084245781 recall:  0.7489666234807822
outer: 1 number of node:  57  completed:  0.22  degree:  162.0
conductance:  0.4126234435379991 f1score:  0.019791094007696537 precision:  1.0 recall:  0.009994447529150472
outer: 1 number of node:  96  completed:  0.23  degree:  168.0
conductance:  0.4126234435379991 f1score:  0.020516578127862246 precision:  1.0 recall:  0.01036461225245234
outer: 1 number of node:  127  completed:  0.24  degree:  157.0
conductance:  0.4473563218390805 f1score:  0.01918

outer: 1 number of node:  69  completed:  0.66  degree:  152.0
conductance:  0.4126234435379991 f1score:  0.01858077134649471 precision:  1.0 recall:  0.009377506323647356
outer: 1 number of node:  52  completed:  0.67  degree:  159.0
conductance:  0.4385805277525023 f1score:  0.019428152492668622 precision:  1.0 recall:  0.009809365167499537
outer: 1 number of node:  130  completed:  0.68  degree:  170.0
conductance:  0.4588311268969971 f1score:  0.6363980564144632 precision:  0.6512754278333871 recall:  0.622185205749892
outer: 1 number of node:  83  completed:  0.69  degree:  148.0
conductance:  0.4126234435379991 f1score:  0.018096227914654275 precision:  1.0 recall:  0.00913072984144611
outer: 1 number of node:  122  completed:  0.7  degree:  166.0
conductance:  0.4677027683341428 f1score:  0.020274809160305347 precision:  1.0 recall:  0.010241224011351718
outer: 1 number of node:  80  completed:  0.71  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.020274809160305347

## Performance of APPR+SL

In [13]:
all_data = []
xlabels_ = []

print('Results for APPR+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_apprSL[i,j])
        temp_rec.append(recall_best_cond_apprSL[i,j])
        temp_f1.append(f1score_best_cond_apprSL[i,j])
        temp_conductance.append(external_best_cond_apprSL[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))



Results for APPR+SL
Feature: 0 Precision 0.7683214659749378 Recall 1.0 F1 0.8685372472776756 Cond. 0.427663502821075
Feature: 1 Precision 0.89414672073611 Recall 0.2574433956443951 F1 0.2699017442541204 Cond. 0.4376144431843467


## Collect data for L1+SL

In [14]:
nodes = {}
external_best_cond_l1SL = {}
external_best_pre_cond_l1SL = {}
vol_best_cond_l1SL = {}
vol_best_pre_l1SL = {}
size_clust_best_cond_l1SL = {}
size_clust_best_pre_l1SL = {}
f1score_best_cond_l1SL = {}
f1score_best_pre_l1SL = {}
true_positives_best_cond_l1SL = {}
true_positives_best_pre_l1SL = {}
precision_best_cond_l1SL = {}
precision_best_pre_l1SL = {}
recall_best_cond_l1SL = {}
recall_best_pre_l1SL = {}
cuts_best_cond_l1SL = {}
cuts_best_pre_l1SL = {}
cuts_l1SL_ALL = {}

ct_outer = 0

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    
    start = time.time()
    
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "l1reg-rand", epsilon=1.0e-2, rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            vol_input = np.sum(g.d[S])

            vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(S))])

            vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(S)))])

            gamma = vol_input/vol_graph_minus_input

            sigma = max(vol_target_intersection_input/vol_target,gamma)

            delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)

            S = flow_clustering(g,S,method="sl",delta=delta)[0]
            
            cuts_l1SL_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_l1SL_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_l1SL_ = set(rr).intersection(S)
            if len(true_positives_l1SL_) == 0:
                true_positives_l1SL_ = set(ref_node)
                vol_ = g.d[ref_node][0]
            precision = sum(g.d[np.array(list(true_positives_l1SL_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_l1SL_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_l1SL[ct_outer,node] = cond_val_l1pr
                vol_best_pre_l1SL[ct_outer,node] = vol_
                
                size_clust_best_pre_l1SL[ct_outer,node] = size_clust_l1SL_
                true_positives_best_pre_l1SL[ct_outer,node] = true_positives_l1SL_
                precision_best_pre_l1SL[ct_outer,node] = precision
                recall_best_pre_l1SL[ct_outer,node] = recall
                f1score_best_pre_l1SL[ct_outer,node] = f1_score_
                
                cuts_best_pre_l1SL[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_l1SL[ct_outer,node] = cond_val_l1pr
                vol_best_cond_l1SL[ct_outer,node] = vol_
                
                size_clust_best_cond_l1SL[ct_outer,node] = size_clust_l1SL_
                true_positives_best_cond_l1SL[ct_outer,node] = true_positives_l1SL_
                precision_best_cond_l1SL[ct_outer,node] = precision
                recall_best_cond_l1SL[ct_outer,node] = recall
                f1score_best_cond_l1SL[ct_outer,node] = f1_score_
                
                cuts_best_cond_l1SL[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_l1SL[ct_outer,node], 'f1score: ', f1score_best_cond_l1SL[ct_outer,node], 'precision: ', precision_best_cond_l1SL[ct_outer,node], 'recall: ', recall_best_cond_l1SL[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time L1+SL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  218  completed:  0.0  degree:  52.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  156  completed:  0.03571428571428571  degree:  79.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  215  completed:  0.07142857142857142  degree:  65.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  211  completed:  0.10714285714285714  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0



divide by zero encountered in double_scalars



outer: 0 number of node:  209  completed:  0.14285714285714285  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  196  completed:  0.17857142857142858  degree:  67.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  216  completed:  0.21428571428571427  degree:  72.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  195  completed:  0.25  degree:  76.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  186  completed:  0.2857142857142857  degree:  63.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  200  completed:  0.32142857142857145  degree:  45.0
conductance:  0.49359

outer: 1 number of node:  41  completed:  0.22  degree:  153.0
conductance:  0.42921696468917436 f1score:  0.8691482649842271 precision:  0.8892905558065973 recall:  0.849898204701092
outer: 1 number of node:  68  completed:  0.23  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.020274809160305347 precision:  1.0 recall:  0.010241224011351718
outer: 1 number of node:  82  completed:  0.24  degree:  162.0
conductance:  0.43960084033613445 f1score:  0.8438026780318693 precision:  0.8708639705882353 recall:  0.8183725090998828
outer: 1 number of node:  75  completed:  0.25  degree:  167.0
conductance:  0.4256546826453617 f1score:  0.020395701025891548 precision:  1.0 recall:  0.01030291813190203
outer: 1 number of node:  112  completed:  0.26  degree:  164.0
conductance:  0.4351006667961416 f1score:  0.8076825878190549 precision:  0.8276040655143394 recall:  0.7886976371151829
outer: 1 number of node:  98  completed:  0.27  degree:  159.0
conductance:  0.431287589730324 f1scor

outer: 1 number of node:  100  completed:  0.69  degree:  163.0
conductance:  0.43623376623376625 f1score:  0.8503907115062166 precision:  0.8727272727272727 recall:  0.8291689801961873
outer: 1 number of node:  53  completed:  0.7  degree:  179.0
conductance:  0.4823670852257547 f1score:  0.734549886148616 precision:  0.7648944696767299 recall:  0.7065210685421679
outer: 1 number of node:  62  completed:  0.71  degree:  160.0
conductance:  0.4467534149025701 f1score:  0.8062294667677534 precision:  0.8261151032562957 recall:  0.7872786723425258
outer: 1 number of node:  116  completed:  0.72  degree:  156.0
conductance:  0.4318697710616772 f1score:  0.860629821676995 precision:  0.8826772164212984 recall:  0.8396569806897403
outer: 1 number of node:  55  completed:  0.73  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.019428152492668622 precision:  1.0 recall:  0.009809365167499537
outer: 1 number of node:  20  completed:  0.74  degree:  181.0
conductance:  0.495197085127

## Performance of l1+SL

In [15]:
all_data = []
xlabels_ = []

print('Results for L1+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_l1SL[i,j])
        temp_rec.append(recall_best_cond_l1SL[i,j])
        temp_f1.append(f1score_best_cond_l1SL[i,j])
        temp_conductance.append(external_best_cond_l1SL[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))
    

Results for L1+SL
Feature: 0 Precision 0.7683214659749378 Recall 1.0 F1 0.8685372472776756 Cond. 0.427663502821075
Feature: 1 Precision 0.9205054772869868 Recall 0.3664982417175643 F1 0.381185986391101 Cond. 0.4354129237780968
