In [118]:
import scipy as sp
import numpy as np
import time

try:
    from localgraphclustering import *
except:
    # when the package is not installed, import the local version instead. 
    # the notebook must be placed in the original "notebooks/" folder
    sys.path.append("../")
    from localgraphclustering import * 

import time

import networkx as nx

import random

import statistics as stat_

## Load data

In [119]:
g = GraphLocal('./datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml','graphml',' ')

## TEMP

In [120]:
G = nx.read_graphml('./datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh.graphml')

In [121]:
# groups = np.loadtxt('./datasets/ppi_mips.class', dtype = 'float')
groups = np.loadtxt('./datasets/sfld_brown_et_al_amidohydrolases_protein_similarities_for_beh_ground_truth.csv', dtype = 'str')

groups_by_id = dict()

for node in groups:
    groups_by_id[node[0]] = node[1]

ids_clusters = set()

for node in groups:
    ids_clusters.add(node[1])
    
ids_clusters = list(ids_clusters)

ground_truth_clusters_by_id = dict()

for node in groups:
    ground_truth_clusters_by_id[node[1]] =  []
    
for node in groups:
    ground_truth_clusters_by_id[node[1]].append(node[0])
    
ground_truth_clusters_by_number = dict()

for node in groups:
    ground_truth_clusters_by_number[node[1]] =  []

counter = 0
for node in G.node:
    
    if node == '1.0':
        counter += 1
        continue
    
    what_group = groups_by_id[node]
    ground_truth_clusters_by_number[what_group].append(counter)
    counter += 1
    
all_clusters = []
    
counter = 0
for cluster_id in ground_truth_clusters_by_number:
    
    cluster = ground_truth_clusters_by_number[cluster_id]
    
    if len(cluster) == 1 or len(cluster) == 0:
        counter += 1
        continue

    cond = g.compute_conductance(cluster)
    counter += 1
    
    if cond <= 0.57 and len(cluster) >= 10:
        print("Id: ", cluster_id)
        print("Cluster: ", counter, " conductance: ", cond, "Size: ", len(cluster))
        all_clusters.append(cluster)

Id:  AMP
Cluster:  6  conductance:  0.5607205113306217 Size:  28
Id:  urease.0
Cluster:  29  conductance:  0.42254370102471367 Size:  100


## Collect data for ACL (with rounding)

In [122]:
nodes = {}
external_best_cond_acl = {}
external_best_pre_cond_acl = {}
vol_best_cond_acl = {}
vol_best_pre_acl = {}
size_clust_best_cond_acl = {}
size_clust_best_pre_acl = {}
f1score_best_cond_acl = {}
f1score_best_pre_acl = {}
true_positives_best_cond_acl = {}
true_positives_best_pre_acl = {}
precision_best_cond_acl = {}
precision_best_pre_acl = {}
recall_best_cond_acl = {}
recall_best_pre_acl = {}
cuts_best_cond_acl = {}
cuts_best_pre_acl = {}
cuts_acl_ALL = {}

ct_outer = 0

start = time.time()

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    ct = 0
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "acl", rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            cuts_acl_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_acl_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_acl_ = set(rr).intersection(S)
            if len(true_positives_acl_) == 0:
                true_positives_acl_ = set(ref_node)
                vol_ = g.d[ref_node][0,0]
            precision = sum(g.d[np.array(list(true_positives_acl_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_acl_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_pre_acl[ct_outer,node] = vol_
                
                size_clust_best_pre_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_pre_acl[ct_outer,node] = true_positives_acl_
                precision_best_pre_acl[ct_outer,node] = precision
                recall_best_pre_acl[ct_outer,node] = recall
                f1score_best_pre_acl[ct_outer,node] = f1_score_
                
                cuts_best_pre_acl[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_cond_acl[ct_outer,node] = vol_
                
                size_clust_best_cond_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_cond_acl[ct_outer,node] = true_positives_acl_
                precision_best_cond_acl[ct_outer,node] = precision
                recall_best_cond_acl[ct_outer,node] = recall
                f1score_best_cond_acl[ct_outer,node] = f1_score_
                
                cuts_best_cond_acl[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_acl[ct_outer,node], 'f1score: ', f1score_best_pre_acl[ct_outer,node], 'precision: ', precision_best_pre_acl[ct_outer,node], 'recall: ', recall_best_pre_acl[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time ACL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  214  completed:  0.0  degree:  67.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  207  completed:  0.03571428571428571  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
outer: 0 number of node:  198  completed:  0.07142857142857142  degree:  52.0
conductance:  0.4329023629068212 f1score:  0.8683148335015136 precision:  0.7672759696834597 recall:  1.0
outer: 0 number of node:  156  completed:  0.10714285714285714  degree:  79.0
conductance:  0.4397781299524564 f1score:  0.8108362779740872 precision:  0.6818541996830428 recall:  1.0
outer: 0 number of node:  202  completed:  0.14285714285714285  degree:  54.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  210  completed:  0.17857142857142858  degree:  63.0
conductance:  0.4

outer: 1 number of node:  129  completed:  0.18  degree:  169.0
conductance:  0.4411492122335496 f1score:  0.7173552255980173 precision:  0.5592781726588917 recall:  1.0
outer: 1 number of node:  48  completed:  0.19  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.7978114583988427 precision:  0.8135701917527095 recall:  0.7826516133012524
outer: 1 number of node:  104  completed:  0.2  degree:  154.0
conductance:  0.4256546826453617 f1score:  0.8402337123829868 precision:  0.855936 recall:  0.8250971682398668
outer: 1 number of node:  70  completed:  0.21  degree:  169.0
conductance:  0.46107861412868806 f1score:  0.6438330290247435 precision:  0.6573246769942791 recall:  0.6308840767474859
outer: 1 number of node:  41  completed:  0.22  degree:  153.0
conductance:  0.4428947875827495 f1score:  0.8216839386398714 precision:  0.8394259235422834 recall:  0.8046764143377136
outer: 1 number of node:  52  completed:  0.23  degree:  159.0
conductance:  0.4385805277525023 f1score

outer: 1 number of node:  117  completed:  0.65  degree:  163.0
conductance:  0.46531586758727495 f1score:  0.6766542610231502 precision:  0.6921984900303284 recall:  0.6617928311431921
outer: 1 number of node:  102  completed:  0.66  degree:  150.0
conductance:  0.43029680071951687 f1score:  0.8150472186490133 precision:  0.829226251276813 recall:  0.8013449318279968
outer: 1 number of node:  47  completed:  0.67  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.7978114583988427 precision:  0.8135701917527095 recall:  0.7826516133012524
outer: 1 number of node:  82  completed:  0.68  degree:  162.0
conductance:  0.45195591931430046 f1score:  0.6941309966588918 precision:  0.7096088161371399 recall:  0.6793139613794805
outer: 1 number of node:  67  completed:  0.69  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.7173552255980173 precision:  0.5592781726588917 recall:  1.0
outer: 1 number of node:  31  completed:  0.7  degree:  166.0
conductance:  0.4126234435379

## Performance of ACL (with rounding).

In [123]:
all_data = []
xlabels_ = []

print('Results for ACL with rounding')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_acl[i,j])
        temp_rec.append(recall_best_cond_acl[i,j])
        temp_f1.append(f1score_best_cond_acl[i,j])
        temp_conductance.append(external_best_cond_acl[i,j])
    
    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))


Results for ACL with rounding
Feature: 0 Precision 0.7616010784971015 Recall 1.0 F1 0.8639590510056822 Cond. 0.4298763868141641
Feature: 1 Precision 0.6213475956513441 Recall 0.869310876673453 F1 0.7087703363309276 Cond. 0.4418880201742084


## Collect data for l1-reg. PR (with rounding)

In [124]:
nodes = {}
external_best_cond_acl = {}
external_best_pre_cond_acl = {}
vol_best_cond_acl = {}
vol_best_pre_acl = {}
size_clust_best_cond_acl = {}
size_clust_best_pre_acl = {}
f1score_best_cond_acl = {}
f1score_best_pre_acl = {}
true_positives_best_cond_acl = {}
true_positives_best_pre_acl = {}
precision_best_cond_acl = {}
precision_best_pre_acl = {}
recall_best_cond_acl = {}
recall_best_pre_acl = {}
cuts_best_cond_acl = {}
cuts_best_pre_acl = {}
cuts_acl_ALL = {}

ct_outer = 0

start = time.time()

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    ct = 0
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "l1reg-rand", epsilon=1.0e-2, rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            cuts_acl_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_acl_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_acl_ = set(rr).intersection(S)
            if len(true_positives_acl_) == 0:
                true_positives_acl_ = set(ref_node)
                vol_ = g.d[ref_node][0,0]
            precision = sum(g.d[np.array(list(true_positives_acl_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_acl_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_pre_acl[ct_outer,node] = vol_
                
                size_clust_best_pre_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_pre_acl[ct_outer,node] = true_positives_acl_
                precision_best_pre_acl[ct_outer,node] = precision
                recall_best_pre_acl[ct_outer,node] = recall
                f1score_best_pre_acl[ct_outer,node] = f1_score_
                
                cuts_best_pre_acl[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_acl[ct_outer,node] = cond_val_l1pr
                vol_best_cond_acl[ct_outer,node] = vol_
                
                size_clust_best_cond_acl[ct_outer,node] = size_clust_acl_
                true_positives_best_cond_acl[ct_outer,node] = true_positives_acl_
                precision_best_cond_acl[ct_outer,node] = precision
                recall_best_cond_acl[ct_outer,node] = recall
                f1score_best_cond_acl[ct_outer,node] = f1_score_
                
                cuts_best_cond_acl[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_acl[ct_outer,node], 'f1score: ', f1score_best_pre_acl[ct_outer,node], 'precision: ', precision_best_pre_acl[ct_outer,node], 'recall: ', recall_best_pre_acl[ct_outer,node])
        ct += 1

    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time l1-reg. with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
outer: 0 number of node:  195  completed:  0.0  degree:  76.0
conductance:  0.43638525564803804 f1score:  0.8110273327049954 precision:  0.6821244550138724 recall:  1.0
outer: 0 number of node:  185  completed:  0.03571428571428571  degree:  63.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  160  completed:  0.07142857142857142  degree:  66.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
outer: 0 number of node:  196  completed:  0.10714285714285714  degree:  67.0
conductance:  0.4319714399047997 f1score:  0.8114097123998114 precision:  0.682665608885363 recall:  1.0
outer: 0 number of node:  203  completed:  0.14285714285714285  degree:  53.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
outer: 0 number of node:  198  completed:  0.17857142857142858  degree:  52.0
conductance:  0.43

outer: 1 number of node:  45  completed:  0.19  degree:  159.0
conductance:  0.4411492122335496 f1score:  0.791894439208294 precision:  0.8067985404263491 recall:  0.7775310012955765
outer: 1 number of node:  117  completed:  0.2  degree:  163.0
conductance:  0.45672952725393956 f1score:  0.7814216224697944 precision:  0.7952222789984671 recall:  0.7680918008513788
outer: 1 number of node:  107  completed:  0.21  degree:  161.0
conductance:  0.4397112843977573 f1score:  0.8548630309122895 precision:  0.8708954746207515 recall:  0.839410204207539
outer: 1 number of node:  103  completed:  0.22  degree:  150.0
conductance:  0.4158541754980924 f1score:  0.8749097982618518 precision:  0.8901302349336058 recall:  0.860201122832994
outer: 1 number of node:  93  completed:  0.23  degree:  161.0
conductance:  0.4126234435379991 f1score:  0.7622078003906495 precision:  0.7787935363419816 recall:  0.7463137762971189
outer: 1 number of node:  87  completed:  0.24  degree:  157.0
conductance:  0.4

outer: 1 number of node:  133  completed:  0.64  degree:  170.0
conductance:  0.47087857847976305 f1score:  0.730995439534518 precision:  0.7456050301552676 recall:  0.7169473749151706
outer: 1 number of node:  85  completed:  0.65  degree:  144.0
conductance:  0.412263787119167 f1score:  0.9098750275443069 precision:  0.928911171101684 recall:  0.8916034301931026
outer: 1 number of node:  132  completed:  0.66  degree:  162.0
conductance:  0.4126234435379991 f1score:  0.7831203977594561 precision:  0.7992163915473055 recall:  0.7676599420075266
outer: 1 number of node:  109  completed:  0.67  degree:  174.0
conductance:  0.4398547435315479 f1score:  0.718070260931201 precision:  0.5601479075232402 recall:  1.0
outer: 1 number of node:  129  completed:  0.68  degree:  169.0
conductance:  0.4411492122335496 f1score:  0.7412042131740292 precision:  0.755770710438574 recall:  0.7271885989265223
outer: 1 number of node:  74  completed:  0.69  degree:  163.0
conductance:  0.4411492122335496

## Performance of l1-reg. PR (with rounding).

In [125]:
all_data = []
xlabels_ = []

print('Results for l1-reg with rounding')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_acl[i,j])
        temp_rec.append(recall_best_cond_acl[i,j])
        temp_f1.append(f1score_best_cond_acl[i,j])
        temp_conductance.append(external_best_cond_acl[i,j])
    
    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))


Results for l1-reg with rounding
Feature: 0 Precision 0.7616010784971015 Recall 1.0 F1 0.8639590510056822 Cond. 0.4298763868141641
Feature: 1 Precision 0.6757603799100437 Recall 0.9030495403788019 F1 0.7553255802685359 Cond. 0.43916714474443713


## Function for seed set expansion using BFS

In [126]:
import queue
def seed_grow_bfs_steps(g,seeds,steps,vol_target,target_cluster):
    """
    grow the initial seed set through BFS until its size reaches 
    a given ratio of the total number of nodes.
    """
    Q = queue.Queue()
    visited = np.zeros(g._num_vertices)
    visited[seeds] = 1
    for s in seeds:
        Q.put(s)
    if isinstance(seeds,np.ndarray):
        seeds = seeds.tolist()
    else:
        seeds = list(seeds)
    for step in range(steps):
        for k in range(Q.qsize()):
            node = Q.get()
            si,ei = g.adjacency_matrix.indptr[node],g.adjacency_matrix.indptr[node+1]
            neighs = g.adjacency_matrix.indices[si:ei]
            for i in range(len(neighs)):
                if visited[neighs[i]] == 0:
                    visited[neighs[i]] = 1
                    seeds.append(neighs[i])
                    Q.put(neighs[i])
                    
                    vol_seeds = np.sum(g.d[seeds])
                    vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
                    sigma = vol_target_intersection_input/vol_target
                    
                    if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
                        break
                 
            vol_seeds = np.sum(g.d[seeds])
            vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
            sigma = vol_target_intersection_input/vol_target   
            
            if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
                break
               
        vol_seeds = np.sum(g.d[seeds])
        vol_target_intersection_input = np.sum(g.d[list(set(target_cluster).intersection(set(seeds)))])
        sigma = vol_target_intersection_input/vol_target
                
        if sigma > 0.75 or vol_seeds > 0.25*g.vol_G:
            break
    return seeds

## Collect data for seed set expansion + FlowImprove, try a lot of parameters

In [127]:
nodes = {}
external_best_cond_flBFS = {}
external_best_pre_cond_flBFS = {}
vol_best_cond_flBFS = {}
vol_best_pre_flBFS = {}
size_clust_best_cond_flBFS = {}
size_clust_best_pre_flBFS = {}
f1score_best_cond_flBFS = {}
f1score_best_pre_flBFS = {}
true_positives_best_cond_flBFS = {}
true_positives_best_pre_flBFS = {}
precision_best_cond_flBFS = {}
precision_best_pre_flBFS = {}
recall_best_cond_flBFS = {}
recall_best_pre_flBFS = {}
cuts_best_cond_flBFS = {}
cuts_best_pre_flBFS = {}
cuts_flBFS_ALL = {}

ct_outer = 0

start = time.time()

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    n_step = 24
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
                
        seeds = seed_grow_bfs_steps(g,[node],g._num_vertices,vol_target,rr)

        vol_input = np.sum(g.d[seeds])

        vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(seeds))])

        vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(seeds)))])

        gamma = vol_input/vol_graph_minus_input
        
#         print("gamma: ", gamma)
        
        sigma = max(vol_target_intersection_input/vol_target,gamma)
        
#         print("sigma: ", sigma)

        delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)
        
#         print("(1.0/3.0)*(1.0/(1.0/sigma - 1)): ", (1.0/3.0)*(1.0/(1.0/sigma - 1)))

        print("DELTA: ", delta)
                
        S = flow_clustering(g,seeds,method="sl",delta=delta)[0]
        number_experiments += 1

        cuts_flBFS_ALL[ct_outer,node] = S

        size_clust_flBFS_ = len(S)

        cond_val_l1pr = g.compute_conductance(S)

        vol_ = sum(g.d[S])
        true_positives_flBFS_ = set(rr).intersection(S)
        if len(true_positives_flBFS_) == 0:
            true_positives_flBFS_ = set(ref_node)
            vol_ = g.d[ref_node][0]
        precision = sum(g.d[np.array(list(true_positives_flBFS_))])/vol_
        recall = sum(g.d[np.array(list(true_positives_flBFS_))])/sum(g.d[rr])
        f1_score_ = 2*(precision*recall)/(precision + recall)

        if f1_score_ >= max_precision:

            max_precision = f1_score_

            external_best_pre_cond_flBFS[ct_outer,node] = cond_val_l1pr
            vol_best_pre_flBFS[ct_outer,node] = vol_

            size_clust_best_pre_flBFS[ct_outer,node] = size_clust_flBFS_
            true_positives_best_pre_flBFS[ct_outer,node] = true_positives_flBFS_
            precision_best_pre_flBFS[ct_outer,node] = precision
            recall_best_pre_flBFS[ct_outer,node] = recall
            f1score_best_pre_flBFS[ct_outer,node] = f1_score_

            cuts_best_pre_flBFS[ct_outer,node] = S

        if cond_val_l1pr <= min_conduct:

            min_conduct = cond_val_l1pr

            external_best_cond_flBFS[ct_outer,node] = cond_val_l1pr
            vol_best_cond_flBFS[ct_outer,node] = vol_

            size_clust_best_cond_flBFS[ct_outer,node] = size_clust_flBFS_
            true_positives_best_cond_flBFS[ct_outer,node] = true_positives_flBFS_
            precision_best_cond_flBFS[ct_outer,node] = precision
            recall_best_cond_flBFS[ct_outer,node] = recall
            f1score_best_cond_flBFS[ct_outer,node] = f1_score_

            cuts_best_cond_flBFS[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_flBFS[ct_outer,node], 'f1score: ', f1score_best_pre_flBFS[ct_outer,node], 'precision: ', precision_best_pre_flBFS[ct_outer,node], 'recall: ', recall_best_pre_flBFS[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time BFS+SL: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
DELTA:  0.973101483992188
outer: 0 number of node:  213  completed:  0.0  degree:  63.0
conductance:  0.649890590809628 f1score:  0.8686934023285898 precision:  0.9795769511305616 recall:  0.7803602556653109
DELTA:  0.8807909174032307
outer: 0 number of node:  156  completed:  0.03571428571428571  degree:  79.0
conductance:  0.6539325842696629 f1score:  0.8736910994764397 precision:  1.0 recall:  0.7757117954677513
DELTA:  0.9408936422325568
outer: 0 number of node:  208  completed:  0.07142857142857142  degree:  63.0
conductance:  0.6493030080704328 f1score:  0.8657587548638133 precision:  0.979457079970653 recall:  0.7757117954677513
DELTA:  0.9080961733585927
outer: 0 number of node:  185  completed:  0.10714285714285714  degree:  63.0
conductance:  0.6539325842696629 f1score:  0.8736910994764397 precision:  1.0 recall:  0.7757117954677513
DELTA:  0.9888681313935253
outer: 0 number of node:  212  completed:  0.14285714285714285  degree:  55.0
conductance:  0.6539325842696629 f1sc

outer: 1 number of node:  96  completed:  0.12  degree:  168.0
conductance:  0.7119464086341645 f1score:  0.3863205603625875 precision:  0.5815655625852871 recall:  0.28922203713986055
DELTA:  0
outer: 1 number of node:  53  completed:  0.13  degree:  179.0
conductance:  0.7077826725403817 f1score:  0.37078052581928556 precision:  0.5531081742535487 recall:  0.2788574248874082
DELTA:  0
outer: 1 number of node:  77  completed:  0.14  degree:  167.0
conductance:  0.709068167604753 f1score:  0.400512312014543 precision:  0.606253908692933 recall:  0.2990314023073601
DELTA:  0
outer: 1 number of node:  119  completed:  0.15  degree:  178.0
conductance:  0.7059480938780579 f1score:  0.34844612975022665 precision:  0.5248975537066931 recall:  0.26078104756616693
DELTA:  0
outer: 1 number of node:  85  completed:  0.16  degree:  144.0
conductance:  0.7201927955352613 f1score:  0.49474951230647907 precision:  0.7559614408929477 recall:  0.3676969584798569
DELTA:  0
outer: 1 number of node:  6

outer: 1 number of node:  83  completed:  0.56  degree:  148.0
conductance:  0.7273190621814475 f1score:  0.46938521012595086 precision:  0.7194189602446484 recall:  0.34832500462705906
DELTA:  0
outer: 1 number of node:  50  completed:  0.57  degree:  171.0
conductance:  0.7200050358806497 f1score:  0.37429612454455113 precision:  0.5690545134080323 recall:  0.2788574248874082
DELTA:  0
outer: 1 number of node:  84  completed:  0.58  degree:  157.0
conductance:  0.725629931280224 f1score:  0.46993808950014543 precision:  0.7196487655892084 recall:  0.34888025171201187
DELTA:  0
outer: 1 number of node:  59  completed:  0.59  degree:  169.0
conductance:  0.7083436494181728 f1score:  0.37221558858648657 precision:  0.5595444416934885 recall:  0.2788574248874082
DELTA:  0
outer: 1 number of node:  67  completed:  0.6  degree:  166.0
conductance:  0.7102953899394389 f1score:  0.37292181069958846 precision:  0.5600049437646768 recall:  0.27953606021346167
DELTA:  0
outer: 1 number of node:

outer: 1 number of node:  130  completed:  0.99  degree:  170.0
conductance:  0.7113861386138614 f1score:  0.38618304582321217 precision:  0.5804455445544554 recall:  0.2893454253809612
 
Outer:  1  Elapsed time BFS+SL:  27.852164030075073
Outer:  1  Number of experiments:  128
 


## Performance of BFS+FlowImp.

In [128]:
all_data = []
xlabels_ = []

print('Results for BFS+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_flBFS[i,j])
        temp_rec.append(recall_best_cond_flBFS[i,j])
        temp_f1.append(f1score_best_cond_flBFS[i,j])
        temp_conductance.append(external_best_cond_flBFS[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))


Results for BFS+SL
Feature: 0 Precision 0.983088390480847 Recall 0.7768739105171412 F1 0.8671746612477108 Cond. 0.6527827968252733
Feature: 1 Precision 0.6497698357105165 Recall 0.318420630513912 F1 0.4273749111275312 Cond. 0.7208422960860279


## Collect data for APPR+SL

In [129]:
nodes = {}
external_best_cond_apprSL = {}
external_best_pre_cond_apprSL = {}
vol_best_cond_apprSL = {}
vol_best_pre_apprSL = {}
size_clust_best_cond_apprSL = {}
size_clust_best_pre_apprSL = {}
f1score_best_cond_apprSL = {}
f1score_best_pre_apprSL = {}
true_positives_best_cond_apprSL = {}
true_positives_best_pre_apprSL = {}
precision_best_cond_apprSL = {}
precision_best_pre_apprSL = {}
recall_best_cond_apprSL = {}
recall_best_pre_apprSL = {}
cuts_best_cond_apprSL = {}
cuts_best_pre_apprSL = {}
cuts_apprSL_ALL = {}

ct_outer = 0

start = time.time()

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "acl", rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            vol_input = np.sum(g.d[S])

            vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(S))])

            vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(S)))])

            gamma = vol_input/vol_graph_minus_input

    #         print("gamma: ", gamma)

            sigma = max(vol_target_intersection_input/vol_target,gamma)

    #         print("sigma: ", sigma)

            delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)

    #         print("(1.0/3.0)*(1.0/(1.0/sigma - 1)): ", (1.0/3.0)*(1.0/(1.0/sigma - 1)))

            print("DELTA: ", delta)

            S = flow_clustering(g,S,method="sl",delta=delta)[0]
            
            cuts_apprSL_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_apprSL_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_apprSL_ = set(rr).intersection(S)
            if len(true_positives_apprSL_) == 0:
                true_positives_apprSL_ = set(ref_node)
                vol_ = g.d[ref_node][0]
            precision = sum(g.d[np.array(list(true_positives_apprSL_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_apprSL_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_apprSL[ct_outer,node] = cond_val_l1pr
                vol_best_pre_apprSL[ct_outer,node] = vol_
                
                size_clust_best_pre_apprSL[ct_outer,node] = size_clust_apprSL_
                true_positives_best_pre_apprSL[ct_outer,node] = true_positives_apprSL_
                precision_best_pre_apprSL[ct_outer,node] = precision
                recall_best_pre_apprSL[ct_outer,node] = recall
                f1score_best_pre_apprSL[ct_outer,node] = f1_score_
                
                cuts_best_pre_apprSL[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_apprSL[ct_outer,node] = cond_val_l1pr
                vol_best_cond_apprSL[ct_outer,node] = vol_
                
                size_clust_best_cond_apprSL[ct_outer,node] = size_clust_apprSL_
                true_positives_best_cond_apprSL[ct_outer,node] = true_positives_apprSL_
                precision_best_cond_apprSL[ct_outer,node] = precision
                recall_best_cond_apprSL[ct_outer,node] = recall
                f1score_best_cond_apprSL[ct_outer,node] = f1_score_
                
                cuts_best_cond_apprSL[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_apprSL[ct_outer,node], 'f1score: ', f1score_best_pre_apprSL[ct_outer,node], 'precision: ', precision_best_pre_apprSL[ct_outer,node], 'recall: ', recall_best_pre_apprSL[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time APPR+SL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
DELTA:  1
DELTA:  1
DELTA:  0.008886634058147377
DELTA:  0.008886634058147377
outer: 0 number of node:  205  completed:  0.0  degree:  53.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
DELTA:  1
DELTA:  1



divide by zero encountered in double_scalars



DELTA:  0.009558752871906422
DELTA:  0.010462323224576795
outer: 0 number of node:  206  completed:  0.03571428571428571  degree:  62.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.008712789228379291
DELTA:  0.008712789228379291
outer: 0 number of node:  198  completed:  0.07142857142857142  degree:  52.0
conductance:  0.4329023629068212 f1score:  0.8683148335015136 precision:  0.7672759696834597 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.010638639780027298
DELTA:  0.010638639780027298
outer: 0 number of node:  185  completed:  0.10714285714285714  degree:  63.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.009235059634276744
DELTA:  0.009235059634276744
outer: 0 number of node:  212  completed:  0.14285714285714285  degree:  55.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74

DELTA:  0
DELTA:  1
DELTA:  0.15189630423915712
outer: 1 number of node:  58  completed:  0.05  degree:  162.0
conductance:  0.45161499967505037 f1score:  0.6965438663121915 precision:  0.7151491518814583 recall:  0.6788821025356283
DELTA:  0
DELTA:  1
DELTA:  0
DELTA:  0
outer: 1 number of node:  67  completed:  0.06  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.5247431129042426 precision:  0.5364439002384481 recall:  0.5135418594607933
DELTA:  0
DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  96  completed:  0.07  degree:  168.0
conductance:  0.4126234435379991 f1score:  0.47636432303140935 precision:  0.4862190812720848 recall:  0.46690110432475784
DELTA:  1
DELTA:  1
DELTA:  0
DELTA:  0
outer: 1 number of node:  119  completed:  0.08  degree:  178.0
conductance:  0.4794823437601613 f1score:  0.5900715506870133 precision:  0.6060349873187227 recall:  0.5749275094083534
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  120  completed:  0.09  deg

DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  97  completed:  0.42  degree:  163.0
conductance:  0.42488888888888887 f1score:  0.4365124331129997 precision:  0.4456011824432877 recall:  0.4277870318958603
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  1
outer: 1 number of node:  88  completed:  0.43  degree:  161.0
conductance:  0.4126234435379991 f1score:  0.7092993951612903 precision:  0.7246861924686192 recall:  0.6945524091554075
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  1
outer: 1 number of node:  116  completed:  0.44  degree:  156.0
conductance:  0.4356136173498938 f1score:  0.8311212236126913 precision:  0.8519598315516683 recall:  0.8112776852365969
DELTA:  0
DELTA:  1
DELTA:  0
DELTA:  0
outer: 1 number of node:  73  completed:  0.45  degree:  161.0
conductance:  0.4383249886208466 f1score:  0.5669301296237749 precision:  0.581414953634654 recall:  0.5531494848540934
DELTA:  0
DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  85  completed:  0.46  degree:  144.0
conductanc

DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  83  completed:  0.79  degree:  148.0
conductance:  0.4126234435379991 f1score:  0.5933629573603102 precision:  0.6064940085040588 recall:  0.580788450860633
DELTA:  0
DELTA:  1
DELTA:  0
DELTA:  0
outer: 1 number of node:  50  completed:  0.8  degree:  171.0
conductance:  0.4126234435379991 f1score:  0.48457761551051237 precision:  0.4946347105313886 recall:  0.4749213399962984
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  118  completed:  0.81  degree:  175.0
conductance:  0.4791948035243424 f1score:  0.6154088050314467 precision:  0.6276056699377847 recall:  0.6036769695847986
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  129  completed:  0.82  degree:  169.0
conductance:  0.4411492122335496 f1score:  0.020637440468921724 precision:  1.0 recall:  0.010426306373002653
DELTA:  0
DELTA:  1
DELTA:  0
DELTA:  0
outer: 1 number of node:  80  completed:  0.83  degree:  166.0
conductance:  0.44114

## Performance of APPR+SL

In [130]:
all_data = []
xlabels_ = []

print('Results for APPR+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_apprSL[i,j])
        temp_rec.append(recall_best_cond_apprSL[i,j])
        temp_f1.append(f1score_best_cond_apprSL[i,j])
        temp_conductance.append(external_best_cond_apprSL[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))


Results for APPR+SL
Feature: 0 Precision 0.7683214659749378 Recall 1.0 F1 0.8685372472776756 Cond. 0.427663502821075
Feature: 1 Precision 0.89414672073611 Recall 0.2574433956443951 F1 0.2699017442541204 Cond. 0.4376144431843467


## Collect data for L1+SL

In [131]:
nodes = {}
external_best_cond_l1SL = {}
external_best_pre_cond_l1SL = {}
vol_best_cond_l1SL = {}
vol_best_pre_l1SL = {}
size_clust_best_cond_l1SL = {}
size_clust_best_pre_l1SL = {}
f1score_best_cond_l1SL = {}
f1score_best_pre_l1SL = {}
true_positives_best_cond_l1SL = {}
true_positives_best_pre_l1SL = {}
precision_best_cond_l1SL = {}
precision_best_pre_l1SL = {}
recall_best_cond_l1SL = {}
recall_best_pre_l1SL = {}
cuts_best_cond_l1SL = {}
cuts_best_pre_l1SL = {}
cuts_l1SL_ALL = {}

ct_outer = 0

start = time.time()

number_experiments = 0

for rr in all_clusters:
    
    how_many = int(len(rr))
    print(how_many)
    
    random.seed(4)
    
    nodes[ct_outer] = np.random.choice(rr, how_many, replace=False)
    
    eigv, lambda_val = fiedler_local(g, rr)
    lambda_val = np.real(lambda_val)
    
    step = (2*lambda_val - lambda_val/2)/4
    
    a_list = np.arange(lambda_val/2,2*lambda_val,step)
    
    vol_target = np.sum(g.d[rr])
    
    ct = 0
    for node in nodes[ct_outer]:
        ref_node = [node]
        
        max_precision = -1
        min_conduct = 100
        
        ct_inner = 0
        for a in a_list:
            
            if ct_outer <= 1:
                rho = 0.15/np.sum(g.d[rr])
            else:
                rho = 0.2/np.sum(g.d[rr])
            
            output_pr_clustering = approximate_PageRank(g,ref_node,method = "l1reg-rand", epsilon=1.0e-2, rho=rho, alpha=a, cpp = True, normalize=True,normalized_objective=True)
            number_experiments += 1
            
            output_pr_sc = sweep_cut(g,output_pr_clustering,cpp=True)
            
            S = output_pr_sc[0]
            
            vol_input = np.sum(g.d[S])

            vol_graph_minus_input = np.sum(g.d[list(set(range(g._num_vertices)) - set(S))])

            vol_target_intersection_input = np.sum(g.d[list(set(rr).intersection(set(S)))])

            gamma = vol_input/vol_graph_minus_input

    #         print("gamma: ", gamma)

            sigma = max(vol_target_intersection_input/vol_target,gamma)

    #         print("sigma: ", sigma)

            delta = min(max((1/3)*(1.0/(1.0/sigma - 1)) - gamma,0),1)

    #         print("(1.0/3.0)*(1.0/(1.0/sigma - 1)): ", (1.0/3.0)*(1.0/(1.0/sigma - 1)))

            print("DELTA: ", delta)

            S = flow_clustering(g,S,method="sl",delta=delta)[0]
            
            cuts_l1SL_ALL[ct_outer,node,ct_inner] = S
            
            size_clust_l1SL_ = len(S)
            
            cond_val_l1pr = g.compute_conductance(S)
            
            vol_ = sum(g.d[S])
            true_positives_l1SL_ = set(rr).intersection(S)
            if len(true_positives_l1SL_) == 0:
                true_positives_l1SL_ = set(ref_node)
                vol_ = g.d[ref_node][0]
            precision = sum(g.d[np.array(list(true_positives_l1SL_))])/vol_
            recall = sum(g.d[np.array(list(true_positives_l1SL_))])/sum(g.d[rr])
            f1_score_ = 2*(precision*recall)/(precision + recall)
            
            if f1_score_ >= max_precision:
                
                max_precision = f1_score_
                
                external_best_pre_cond_l1SL[ct_outer,node] = cond_val_l1pr
                vol_best_pre_l1SL[ct_outer,node] = vol_
                
                size_clust_best_pre_l1SL[ct_outer,node] = size_clust_l1SL_
                true_positives_best_pre_l1SL[ct_outer,node] = true_positives_l1SL_
                precision_best_pre_l1SL[ct_outer,node] = precision
                recall_best_pre_l1SL[ct_outer,node] = recall
                f1score_best_pre_l1SL[ct_outer,node] = f1_score_
                
                cuts_best_pre_l1SL[ct_outer,node] = S
        
            if cond_val_l1pr <= min_conduct:
                
                min_conduct = cond_val_l1pr
                
                external_best_cond_l1SL[ct_outer,node] = cond_val_l1pr
                vol_best_cond_l1SL[ct_outer,node] = vol_
                
                size_clust_best_cond_l1SL[ct_outer,node] = size_clust_l1SL_
                true_positives_best_cond_l1SL[ct_outer,node] = true_positives_l1SL_
                precision_best_cond_l1SL[ct_outer,node] = precision
                recall_best_cond_l1SL[ct_outer,node] = recall
                f1score_best_cond_l1SL[ct_outer,node] = f1_score_
                
                cuts_best_cond_l1SL[ct_outer,node] = S

        print('outer:', ct_outer, 'number of node: ',node, ' completed: ', ct/how_many, ' degree: ', g.d[node])
        print('conductance: ', external_best_cond_l1SL[ct_outer,node], 'f1score: ', f1score_best_pre_l1SL[ct_outer,node], 'precision: ', precision_best_pre_l1SL[ct_outer,node], 'recall: ', recall_best_pre_l1SL[ct_outer,node])
        ct += 1
    end = time.time()
    print(" ")
    print("Outer: ", ct_outer," Elapsed time L1+SL with rounding: ", end - start)
    print("Outer: ", ct_outer," Number of experiments: ", number_experiments)
    print(" ")
    ct_outer += 1

28
DELTA:  1
DELTA:  0.5305183907126931
DELTA:  0.009735011249777143
DELTA:  0.010638639780027298
outer: 0 number of node:  208  completed:  0.0  degree:  63.0
conductance:  0.4126234435379991 f1score:  0.8498765432098765 precision:  0.7389437526835552 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.0090607240451157
DELTA:  0.0090607240451157
outer: 0 number of node:  202  completed:  0.03571428571428571  degree:  54.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
DELTA:  1
DELTA:  0.5375900253054992
DELTA:  0.010638639780027298
DELTA:  0.010638639780027298
outer: 0 number of node:  185  completed:  0.07142857142857142  degree:  63.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.008712789228379291
DELTA:  0.008712789228379291
outer: 0 number of node:  198  completed:  0.10714285714285714  degree:  52.0
conductance:  0.4329023629068212 f1score:  


divide by zero encountered in double_scalars



DELTA:  0.010638639780027298
DELTA:  0.010638639780027298
outer: 0 number of node:  186  completed:  0.14285714285714285  degree:  63.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
DELTA:  1
DELTA:  0.1214200922829076
DELTA:  0.011346407122087834
DELTA:  0.011346407122087834
outer: 0 number of node:  214  completed:  0.17857142857142858  degree:  67.0
conductance:  0.4460093896713615 f1score:  0.8937938197870683 precision:  0.807981220657277 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.009235059634276744
DELTA:  0.009235059634276744
outer: 0 number of node:  212  completed:  0.21428571428571427  degree:  55.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74793568013907 recall:  1.0
DELTA:  1
DELTA:  1
DELTA:  0.008886634058147377
DELTA:  0.008886634058147377
outer: 0 number of node:  205  completed:  0.25  degree:  53.0
conductance:  0.41677531508039983 f1score:  0.8557931377424167 precision:  0.74

DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  129  completed:  0.08  degree:  169.0
conductance:  0.4411492122335496 f1score:  0.7096916438313018 precision:  0.724967824967825 recall:  0.69504596211981
DELTA:  0
DELTA:  1
DELTA:  1
DELTA:  0
outer: 1 number of node:  88  completed:  0.09  degree:  161.0
conductance:  0.4126234435379991 f1score:  0.7520882584712372 precision:  0.7688837329208559 recall:  0.7360108581652168
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  74  completed:  0.1  degree:  163.0
conductance:  0.4411492122335496 f1score:  0.14997745829785103 precision:  0.39877464038359084 recall:  0.0923560984638164
DELTA:  0
DELTA:  0
DELTA:  1
DELTA:  0
outer: 1 number of node:  63  completed:  0.11  degree:  165.0
conductance:  0.4520778129645188 f1score:  0.7952780758790482 precision:  0.8141924642926388 recall:  0.777222530692825
DELTA:  1
DELTA:  1
DELTA:  1
DELTA:  0
outer: 1 number of node:  94  completed:  0.12  degree:  167.0
conductance: 

DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  73  completed:  0.45  degree:  161.0
conductance:  0.4383249886208466 f1score:  0.18143289606458124 precision:  0.4979230130157851 recall:  0.11092602874946017
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  48  completed:  0.46  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.24366592194704567 precision:  0.5308616404308202 recall:  0.15812203097044852
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  103  completed:  0.47  degree:  150.0
conductance:  0.4126234435379991 f1score:  0.6656430503765446 precision:  0.8168685509196949 recall:  0.5616632734900364
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  55  completed:  0.48  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.24366592194704567 precision:  0.5308616404308202 recall:  0.15812203097044852
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  71  completed:  0.49  degree:  166.0
cond

DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  58  completed:  0.82  degree:  162.0
conductance:  0.4457674539657753 f1score:  0.8191779086705935 precision:  0.8415641876504653 recall:  0.7979517551977297
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  47  completed:  0.83  degree:  159.0
conductance:  0.4126234435379991 f1score:  0.24366592194704567 precision:  0.5308616404308202 recall:  0.15812203097044852
DELTA:  1
DELTA:  1
DELTA:  1
DELTA:  0
outer: 1 number of node:  20  completed:  0.84  degree:  181.0
conductance:  0.4951970851275257 f1score:  0.679217541392316 precision:  0.7047157922663659 recall:  0.6555000308470603
DELTA:  0
DELTA:  0
DELTA:  0
DELTA:  0
outer: 1 number of node:  80  completed:  0.85  degree:  166.0
conductance:  0.4411492122335496 f1score:  0.020274809160305347 precision:  1.0 recall:  0.010241224011351718
DELTA:  1
DELTA:  0
DELTA:  0
DELTA:  1
outer: 1 number of node:  60  completed:  0.86  degree:  147.0
conductance:  0.42950

## Performance of l1+SL

In [132]:
all_data = []
xlabels_ = []

print('Results for L1+SL')
sum_precision = 0
sum_recall = 0
sum_f1 = 0
sum_conductance = 0

info_ref_nodes = all_clusters
l_info_ref_nodes = len(info_ref_nodes)

for i in range(l_info_ref_nodes):
    temp_pre = []
    temp_rec = []
    temp_f1 = []
    temp_conductance = []
    
    for j in all_clusters[i]:
        temp_pre.append(precision_best_cond_l1SL[i,j])
        temp_rec.append(recall_best_cond_l1SL[i,j])
        temp_f1.append(f1score_best_cond_l1SL[i,j])
        temp_conductance.append(external_best_cond_l1SL[i,j])

    print('Feature:', i,'Precision', stat_.mean(temp_pre), 'Recall', stat_.mean(temp_rec), 'F1', stat_.mean(temp_f1), 'Cond.', stat_.mean(temp_conductance))
    

Results for L1+SL
Feature: 0 Precision 0.7683214659749378 Recall 1.0 F1 0.8685372472776756 Cond. 0.427663502821075
Feature: 1 Precision 0.9206091246821325 Recall 0.3666037386637053 F1 0.3812906580903924 Cond. 0.4354008547926768
