# Benchmark for Community detection

In [1]:
"""Load/import helper functions"""

import time
import random
from LocalPopular import locally_popular_clustering_with_hop_distance, extract_labels_from_communities, time_tester, calculate_scores_CD

from GraphFunctions import generate_agents, calculate_euclidian_relationships, create_graph, \
    my_make_circles, create_graphs_euclid, create_graphs_kNN, \
    generate_graph,create_graphs_hop_distance, create_graphs_hop_distance_abs,randomize_graph_node_labels

from PlotHelperFunctions import plot_clustering, plot_stuff

from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import make_moons
from sklearn.metrics import rand_score
import numpy as np
import networkx as nx
from scipy.spatial import distance

from community_detection.leiden import leiden
from community_detection.louvain import louvain
from community_detection.quality_functions import CPM, Modularity


import data.jazz as jazz
import data.cora as cora

## Create Graphs

In [2]:
repetitions = 1    #Number of random isomorph permutation for each graph

cora_graph = cora.get_graph()

cora_graph = nx.relabel_nodes(cora_graph, {list(cora_graph.nodes())[i] : i for i in range(len(cora_graph.nodes()))} )
cora_truth = list(cora_graph.nodes[i]['subject'] for i in range(len(cora_graph.nodes())))
cora_perm_graph = []
cora_perm_truth = []
for i in range(repetitions):
    g,t = randomize_graph_node_labels(cora_graph,cora_truth)
    cora_perm_graph += [g]
    cora_perm_truth += [t]


jazz_graph = jazz.get_graph()
jazz_graph = nx.relabel_nodes(jazz_graph, {i : i-1 for i in range(len(jazz_graph)+1)} )
jazz_graph,_ = randomize_graph_node_labels(jazz_graph,None)
jazz_truth = None

jazz_perm_graph = []
jazz_perm_truth = []
for i in range(repetitions):
    g,t = randomize_graph_node_labels(jazz_graph,jazz_truth)
    jazz_perm_graph += [g]
    jazz_perm_truth += [t]


karate_graph = nx.karate_club_graph()
#karate_graph,_ = randomize_graph_node_labels(karate_graph,None)
karate_truth = list(karate_graph.nodes[i]["club"] for i in range(34))

karate_perm_graph = []
karate_perm_truth = []
for i in range(repetitions):
    g,t = randomize_graph_node_labels(karate_graph,karate_truth)
    karate_perm_graph += [g]
    karate_perm_truth += [t]

karate_perm_graph = [karate_graph,karate_graph,karate_graph,karate_graph,karate_graph]
karate_perm_truth = [karate_truth,karate_truth,karate_truth,karate_truth,karate_truth]

graph,graph_truth = generate_graph(10,25,0.2,0.05)

graph_perm_graph = []
graph_perm_truth = []
for i in range(repetitions):
    g,t = randomize_graph_node_labels(graph,graph_truth)
    graph_perm_graph += [g]
    graph_perm_truth += [t]




## Run the algorithms


## Gather the numbers

We can use the collected_data dictionairy to build a table for better comparison


In [3]:
import itertools
import timeit 
import pandas as pd
import numbers

dfs = []
labels = [(0.2,0.2),(0.25,0.35), (0.4,0.4)]

for treshold in labels:
    collected_data = {}
    for repetitions in range(10):
        f = treshold[0]   #f-bound
        e = treshold[1]   
    
        
        
        graphs = [karate_perm_graph,cora_perm_graph,jazz_perm_graph,graph_perm_graph]
        expected_clusters = [2,7,None,25]
        graph_names = [ 'Karate Club','Cora','Jazz','25 random communities']
        graph_truths =  [karate_perm_truth,cora_perm_truth,jazz_perm_truth,graph_perm_truth]
        
        
        𝓗 = Modularity(1.0)
        
        fn_louvain_mod = lambda G,_: louvain(G, 𝓗)
        fn_leiden_mod  = lambda G,_: leiden(G, 𝓗)
        
        louv_out = None
        lei_out = None
        
        algorithms = [ fn_louvain_mod, fn_leiden_mod]
        algo_names = [ 'Louvain (Mod)', 'Leiden (Mod)']
        
        lp_a_b =lambda agents, initial_clustering, pre: locally_popular_clustering_with_hop_distance(agents, f, e, initial_clustering,mode='B',pre = pre)
        lp_a_f =lambda agents, initial_clustering, pre: locally_popular_clustering_with_hop_distance(agents, f, e, initial_clustering,mode='F',pre = pre)
        lp_a_e =lambda agents, initial_clustering, pre: locally_popular_clustering_with_hop_distance(agents, f, e, initial_clustering,mode='E',pre = pre)
        
        algorithms = [ fn_louvain_mod, fn_leiden_mod,lp_a_b,lp_a_f,lp_a_e]
        algo_names = [ 'Louvain (Mod)', 'Leiden (Mod)','LP (Balanced) Heuristic',\
                       'LP (Friend-Oriented) Heuristic','LP (Enemy-Averse) Heuristic']
        is_lp_heuristic = [False,False,True, True, True]
        
        
        for ((graph, g_name,clusters,truth), (algo, a_name,lp_heuristic)) in \
            itertools.product(zip(graphs, graph_names, expected_clusters,graph_truths), zip(algorithms, algo_names,is_lp_heuristic)):
            
                
                
            agents = []
            for i in range(len(graph)):
                agents += [list(graph[i].nodes())]
        
        
            if lp_heuristic:
                # start with everyone alone
                a_name_modified = a_name + ' starting with everyone alone'
                initial_clusters = len(agents[0])
                if graph == cora_perm_graph:
                    a_name_modified += ' *6 starting clusters'
                    initial_clusters = 6
                print(f"Running {a_name_modified} on {g_name} … ", end='')
                
                test_callable = lambda a: algo(a,initial_clusters,None)
                times,outputs = time_tester(test_callable,graph)
                avg_time = sum(times)/len(times)
                scores = calculate_scores_CD(outputs,truth,graph)
                scores['Time'] = avg_time
        
                if (a_name_modified, g_name) not in collected_data:
                    collected_data[(a_name_modified, g_name)] = []
                collected_data[(a_name_modified, g_name)].append(scores)
                print(f"execution time: ~ {avg_time:.6f} seconds.")
                for score_name in scores.keys():
                    print(score_name,": ~",scores.get(score_name))
        
                
                # starting with predicted number of clusters
                a_name_modified = a_name + ' starting with predicted number of clusters'
                initial_clusters = clusters
                print(f"Running {a_name_modified} on {g_name} … ", end='')
                
                test_callable = lambda a: algo(a,initial_clusters,None)
                times,outputs = time_tester(test_callable,graph)
                avg_time = sum(times)/len(times)
                scores = calculate_scores_CD(outputs,truth,graph)
                scores['Time'] = avg_time
        
                if (a_name_modified, g_name) not in collected_data:
                    collected_data[(a_name_modified, g_name)] = []
                collected_data[(a_name_modified, g_name)].append(scores)
                print(f"execution time: ~ {avg_time:.6f} seconds.")
                for score_name in scores.keys():
                    print(score_name,": ~",scores.get(score_name))
        
                
                # start with the output of leiden
                a_name_modified = a_name + ' starting with the output of leiden'
                initial_clusters = clusters
                print(f"Running {a_name_modified} on {g_name} … ", end='')
                
                test_callable = lambda a: algo(a,initial_clusters,fn_leiden_mod)
                times,outputs = time_tester(test_callable,graph)
                avg_time = sum(times)/len(times)
                scores = calculate_scores_CD(outputs,truth,graph)
        
                rand_score_with_init = sum(rand_score(list(out.values()), list(lei.values())) for out, lei in zip(outputs, lei_output)) / len(outputs)
                scores['Rand Score with initial clustering'] = rand_score_with_init
                
                scores['Time'] = avg_time
        
                if (a_name_modified, g_name) not in collected_data:
                    collected_data[(a_name_modified, g_name)] = []
                collected_data[(a_name_modified, g_name)].append(scores)
                print(f"execution time: ~ {avg_time:.6f} seconds.")
                for score_name in scores.keys():
                    print(score_name,": ~",scores.get(score_name))
        
               
        
            else:
                print(f"Running {a_name} on {g_name} … ", end='')
                test_callable = lambda a : algo(a,_)
                times,outputs = time_tester(test_callable,graph)
                outputs = [extract_labels_from_communities(c.communities) for c in outputs]
        
                if algo == fn_leiden_mod:
                    lei_output = outputs
                    
                
                avg_time = sum(times)/len(times)
                scores = calculate_scores_CD(outputs,truth,graph)
                scores['Time'] = avg_time
                if (a_name, g_name) not in collected_data:
                    collected_data[(a_name, g_name)] = []
                collected_data[(a_name, g_name)].append(scores)
                print(f"execution time: ~ {avg_time:.6f} seconds.")
                for score_name in scores.keys():
                    print(score_name,": ~",scores.get(score_name))
        
    records = []
    for (method, dataset), metrics_list in collected_data.items():
        record = {'Method': method, 'Dataset': dataset}
        # metrics_list is a list of 10 dictionaries
        keys = metrics_list[0].keys()
        for key in keys:
            # check if this metric is numeric
            if isinstance(metrics_list[0][key], numbers.Number):
                values = [m[key] for m in metrics_list]
                mean = sum(values) / len(values)
                std = (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
                record[key] = (mean, std)
            else:
                # if not numeric (e.g., a string), just copy one of them
                record[key] = metrics_list[0][key]
        records.append(record)
    
    df = pd.DataFrame(records)
    
    dfs.append(df)

dfs[0].head()

Running Louvain (Mod) on Karate Club … execution time: ~ 0.014735 seconds.
Rand Index : ~ 0.6634581105169339
Modularity : ~ 0.44182267948501713
Time : ~ 0.01473462000139989
Running Leiden (Mod) on Karate Club … execution time: ~ 0.031532 seconds.
Rand Index : ~ 0.669162210338681
Modularity : ~ 0.44353928899383444
Time : ~ 0.03153191998717375
Running LP (Balanced) Heuristic starting with everyone alone on Karate Club … execution time: ~ 0.016852 seconds.
Rand Index : ~ 0.5525846702317291
Modularity : ~ 0.20661157024793392
Time : ~ 0.016852080007083714
Running LP (Balanced) Heuristic starting with predicted number of clusters on Karate Club … execution time: ~ 0.004869 seconds.
Rand Index : ~ 0.5294117647058824
Modularity : ~ 0.2790708569929349
Time : ~ 0.0048687199945561584
Running LP (Balanced) Heuristic starting with the output of leiden on Karate Club … execution time: ~ 0.037900 seconds.
Rand Index : ~ 0.704812834224599
Modularity : ~ 0.4293866306853319
Rand Score with initial clust

Unnamed: 0,Method,Dataset,Rand Index,Modularity,Time,Rand Score with initial clustering
0,Louvain (Mod),Karate Club,"(0.6623172905525847, 0.012461206288743516)","(0.43996214463746935, 0.0035712819982026854)","(0.010712952003232202, 0.0016492329878081678)",
1,Leiden (Mod),Karate Club,"(0.6676292335115864, 0.011112534502960564)","(0.43833005378459927, 0.004394759371961409)","(0.021314589998219162, 0.003843045547673766)",
2,LP (Balanced) Heuristic starting with everyone...,Karate Club,"(0.5525846702317291, 0.0)","(0.20661157024793392, 0.0)","(0.011682751998887398, 0.0019216067305451944)",
3,LP (Balanced) Heuristic starting with predicte...,Karate Club,"(0.5294117647058824, 0.0)","(0.2790708569929349, 0.0)","(0.003238090000813827, 0.0007277515430596899)",
4,LP (Balanced) Heuristic starting with the outp...,Karate Club,"(0.7030303030303029, 0.0029094054564140085)","(0.4292205918179944, 0.0006752578735055873)","(0.02531908599950839, 0.004742882419540857)","(0.6920142602495545, 0.005633362637882694)"


In [34]:
avg_stds = {}

for df in dfs:
    for col in ['Rand Index']:
        stds = df[col].apply(lambda x: x[1] if isinstance(x, (tuple, list)) else float('nan'))
        current_avg = stds.mean()
        if col in avg_stds:
            avg_stds[col].append(current_avg)
        else:
            avg_stds[col] = [current_avg]

# If you want the overall average across all DataFrames:
overall_avg_stds = {col: np.nanmean(values) for col, values in avg_stds.items()}

print(overall_avg_stds)

{'Rand Index': 0.0025383914328214623}


In [52]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib.lines import Line2D

plt.rcParams.update({'font.size': 18})

def plot_custom_thresholds_with_louvain_leiden(dfs, labels, dataset_name, score_col='Rand Index'):
    # Add threshold column to each DataFrame
    for df, label in zip(dfs, labels):
        df['Threshold'] = str(label)  # make sure label is string for plotting

    # Combine all datasets
    df_all = pd.concat(dfs)

    # Filter for selected dataset
    df_all = df_all[df_all['Dataset'] == dataset_name].copy()

    # Add method group
    def method_group(row):
        m = row['Method']
        if 'everyone alone' in m:
            return 'LocPop-S'
        elif 'predicted number of clusters' in m:
            return 'LocPop-P'
        elif 'output of leiden' in m:
            return 'LocPop-Ld'
        elif 'louvain' in m.lower():
            return 'Louvain'
        elif 'leiden' in m.lower():
            return 'Leiden'
        else:
            return 'Other'

    def variant_label(row):
        m = row['Method']
        if 'Enemy-Averse' in m:
            return 'AE'
        elif 'Balanced' in m:
            return 'B'
        elif 'Friend-Oriented' in m:
            return 'AF'
        else:
            return 'Other'

    df_all['method_group'] = df_all.apply(method_group, axis=1)
    df_all['variant'] = df_all.apply(variant_label, axis=1)

    # Define order and positions
    method_order = ['Louvain', 'Leiden', 'LocPop-S', 'LocPop-P',
                    'LocPop-Ld']
    method_pos = {method: i for i, method in enumerate(method_order)}

    variant_colors = {
        'B': 'green',
        'AF': 'orange',
        'AE': 'blue'
    }
    threshold_markers = {
        str(labels[0]): '^',
        str(labels[1]): 'o',
        str(labels[2]): 's'
    }
    variant_offset = {
        'AF': -0.2,
        'B': 0.0,
        'AE': 0.2
    }

    fig, ax = plt.subplots(figsize=(12, 6))

    # Plot LP heuristics
    for method in method_order[2:]:  # skip Leiden/Louvain
        for variant in ['B', 'AF', 'AE']:
            for threshold in threshold_markers.keys():
                sub_df = df_all[
                    (df_all['method_group'] == method) &
                    (df_all['variant'] == variant) &
                    (df_all['Threshold'] == threshold)
                ]
                if sub_df.empty:
                    continue

                x_pos = method_pos[method] + variant_offset[variant] + 2
                means = sub_df[score_col].apply(lambda x: float(x[0]) if not pd.isna(x[0]) else np.nan).values
                stds = sub_df[score_col].apply(lambda x: float(x[1]) if not pd.isna(x[1]) else np.nan).values

                ax.scatter(
                    [x_pos] * len(means), means,
                    color=variant_colors[variant],
                    marker=threshold_markers[threshold],
                    s=80, edgecolor='black', alpha=0.8,
                    label=f'{variant} - {threshold}' if (threshold == str(labels[0])) else None
                )

                # Plot 2-sigma error bars
                for mean, std in zip(means, stds):
                    ax.plot(
                        [x_pos, x_pos],
                        [mean - 2*std, mean + 2*std],
                        color=variant_colors[variant],
                        alpha=0.4,
                        linewidth=1
                    )

    # Plot Leiden and Louvain averages
    for method in ['Leiden', 'Louvain']:
        if method not in method_pos:
            continue  # skip if method not recognized
    
        sub_df = df_all[df_all['method_group'] == method]
        if sub_df.empty or sub_df[score_col].isna().all():
            continue  # skip if no valid scores
    
        y_vals = [val[0] for val in sub_df[score_col].dropna().values]
        y_stds = [val[1] for val in sub_df[score_col].dropna().values]
    
        avg = np.mean(y_vals)
        std = np.sqrt(np.mean([s**2 for s in y_stds]))
    
        ax.scatter(
            method_pos[method] + 2, avg,
            color='black', marker='o', s=80, edgecolor='white',
            label=None
        )
    
        ax.errorbar(
            method_pos[method] + 2, avg,
            yerr=2*std,
            fmt='none',
            ecolor='black',
            elinewidth=1,
            capsize=3,
            alpha=0.6
        )

    ax.set_xticks(range(2, len(method_order)+2))
    ax.set_xticklabels(method_order, rotation=90)
    ax.set_ylabel(score_col)
    ax.set_title(f'{score_col} by Method on {dataset_name}')
    ax.grid(True, axis='y', linestyle='--', alpha=0.6)

    # Color legend
    color_legend = [
        Line2D([0], [0], marker='o', color='w', label=v, markerfacecolor=c, markersize=10, markeredgecolor='black')
        for v, c in variant_colors.items()
    ]
    shape_legend = [
        Line2D([0], [0], marker=m, color='w', label=thresh, markerfacecolor='gray', markersize=10, markeredgecolor='black')
        for thresh, m in threshold_markers.items()
    ]

    # Adjust legend position
    plt.legend(
        handles=color_legend + shape_legend, 
        title='', 
        loc='upper right',
        bbox_to_anchor=(1.28, 1),
        borderaxespad=0.5,
        fontsize=16,
        ncol=1
    )
    #plt.ylim(0,1.1)
    plt.tight_layout()
    plt.xticks(rotation=0)
    plt.subplots_adjust(right=0.75)

    return fig, ax


In [53]:
import ast
import numpy as np

def try_parse_tuple(x):
    # Handle actual tuples or lists
    if isinstance(x, (tuple, list)) and len(x) == 2:
        return (float(x[0]), float(x[1]))

    # Handle strings like '(0.85, 0.05)'
    if isinstance(x, str) and x.startswith('('):
        try:
            parsed = ast.literal_eval(x)
            if isinstance(parsed, (tuple, list)) and len(parsed) == 2:
                return (float(parsed[0]), float(parsed[1]))
        except Exception:
            pass

    # Handle known invalids like 'nan', 'n.A.', empty string, etc.
    return (np.nan, 0.0)

def normalize_score_column(df, score_cols):
    for col in score_cols:
        df[col] = df[col].apply(try_parse_tuple)
    return df
def plot_and_save(dfs,labels, dataset, score, save_path= None):
    for i in range(len(dfs)):
        dfs[i].replace("n.A.", np.nan, inplace= True)
        dfs[i] = normalize_score_column(dfs[i], [score])

    fig, ax = plot_custom_thresholds_with_louvain_leiden(
    dfs=dfs,
    labels=labels,
    dataset_name=dataset,
    score_col=score)
    if save_path:
        plt.savefig(save_path, format='png', dpi=300, bbox_inches='tight')
        print(f"Figure saved as {save_path}")
    plt.show()



In [49]:
import ast
import numpy as np

avg_vals = {}
all_used_values = {}

for df in dfs:
    for col in ['Rand Score with initial clustering']:
        # Safely parse string to tuple and extract the first element
        vals = df[col].apply(
            lambda x: ast.literal_eval(x)[0] if isinstance(x, str) and x.startswith('(') else float('nan')
        )
        
        non_nan_vals = vals.dropna().tolist()
        current_avg = np.nanmean(non_nan_vals)

        if col in avg_vals:
            avg_vals[col].append(current_avg)
            all_used_values[col].extend(non_nan_vals)
        else:
            avg_vals[col] = [current_avg]
            all_used_values[col] = non_nan_vals

# Compute overall average across all DataFrames, ignoring NaNs
overall_avg_vals = {col: np.nanmean(values) for col, values in avg_vals.items()}

print("Overall averages:", overall_avg_vals)
print("All non-NaN values used per column:")
for col, values in all_used_values.items():
    print(f"{col}: {values}")


Overall averages: {'Rand Score with initial clustering': 0.6694821990833386}
All non-NaN values used per column:
Rand Score with initial clustering: [0.6920142602495545, 0.6921212121212121, 0.6951158645276292, 0.9405973571445332, 0.9401819452712727, 0.9407162567204995, 0.6110752191970465, 0.6109931805363278, 0.6035943188227453, 0.8046361445783132, 0.799540562248996, 0.806859437751004, 0.6925133689839573, 0.6927985739750445, 0.6937611408199643, 0.719093367542653, 0.7186352031141976, 0.7195877911579968, 0.5814233707634722, 0.5799159103727632, 0.5774906424652617, 0.8039807228915663, 0.800841767068273, 0.7952417670682731, 0.6244206773618538, 0.629982174688057, 0.6292691622103387, 0.33514628358340076, 0.3351223017735626, 0.3351594340183746, 0.4323898887350664, 0.4322668307439881, 0.4325437112239142, 0.7965975903614458, 0.800067469879518, 0.8056642570281125]


In [42]:
dfs2 = []
for i in range(3):
    dfs2.append(pd.read_csv(f'/Users/User/Downloads/local-popular-main-final/cluster/dataset-{i}.csv'))
    

In [48]:
import ast
import numpy as np

avg_vals = {}
all_used_values = {}

for df in dfs2:
    for col in ['Rand Score with initial clustering']:
        # Safely parse string to tuple and extract the first element
        vals = df[col].apply(
            lambda x: ast.literal_eval(x)[0] if isinstance(x, str) and x.startswith('(') else float('nan')
        )
        
        non_nan_vals = vals.dropna().tolist()
        current_avg = np.nanmean(non_nan_vals)

        if col in avg_vals:
            avg_vals[col].append(current_avg)
            all_used_values[col].extend(non_nan_vals)
        else:
            avg_vals[col] = [current_avg]
            all_used_values[col] = non_nan_vals

# Compute overall average across all DataFrames, ignoring NaNs
overall_avg_vals = {col: np.nanmean(values) for col, values in avg_vals.items()}

print("Overall averages:", overall_avg_vals)
print("All non-NaN values used per column:")
for col, values in all_used_values.items():
    print(f"{col}: {values}")


Overall averages: {'Rand Score with initial clustering': 0.6176081826379919}
All non-NaN values used per column:
Rand Score with initial clustering: [0.6594202898550724, 0.6594202898550724, 0.6519286510590857, 0.339685618729097, 0.33982831661092533, 0.34046822742474914, 0.7120597044481299, 0.7120597044481299, 0.7120597044481299, 0.43346756152125276, 0.43346756152125276, 0.43346756152125276, 0.6870457079152732, 0.6870457079152732, 0.6829654403567446, 0.4212664437012263, 0.4053199554069118, 0.40730657748049054, 0.8751330478477188, 0.8751330478477188, 0.8751330478477188, 0.5831767337807606, 0.5831767337807606, 0.5756599552572707, 0.6410256410256412, 0.6410256410256412, 0.6410256410256412, 0.50803121516165, 0.50803121516165, 0.5076387959866221, 0.9452461694596401, 0.9452461694596401, 0.9452461694596401, 0.6195973154362415, 0.6264876957494409, 0.6195973154362415]


In [60]:
dfs[2][dfs[2]['Dataset']=='Cora']['Time']

11     (2.8912171499978285, 0.3872421876362815)
12    (4.8556456200021785, 0.09712032664137751)
13     (37.119912609999304, 0.7066149370785609)
14      (40.03265883999702, 0.8343622965970648)
15      (320.56660711999865, 8.726123003606416)
16      (37.11190594999935, 0.2704258180901129)
17       (40.08539384000178, 0.287763634252405)
18       (315.6107046100049, 11.13768353944769)
19      (37.30912904000142, 0.7955260071139907)
20     (40.468399850002605, 0.6748635377028401)
21     (321.74053790000033, 10.364414221766337)
Name: Time, dtype: object