In [1]:
from sklearn.metrics import adjusted_rand_score as ARI
from sklearn.metrics import adjusted_mutual_info_score as AMI 
import hypernetx as hnx
import hypernetx.algorithms.hypergraph_modularity as hmod
from collections import Counter
import igraph as ig
import pandas as pd
import numpy as np
import h_louvain as hl
import csv
import bayes_opt



In [2]:
def load_ABCDH_from_file(filename):
    with open(filename,"r") as f:
        rd = csv.reader(f)
        lines = list(rd)
    Edges = []
    for line in lines:
        Edges.append(list(line))

    HG = hnx.Hypergraph(dict(enumerate(Edges)))
    return HG

In [3]:
HG = load_ABCDH_from_file("datasets/results_300_he.txt")
## julia --project abcdh.jl -n 300 -d 2.5,5,20 -c 1.5,10,30 -x 0.3 -q 0.0,0.4,0.3,0.2,0.1 -w :linear -s 1234 --stats -o results_300


In [4]:
%%time
# hmod_tau: w(d,c) = (c/d)^tau for c>d/2 else 0
# hmod_tau = 1 (linear) 
# hmod_tau = 0 (majority)
# hmod_tau = "infinity" (strict)

hL = hl.hLouvain(HG, hmod_tau=1, resolution = 1, random_seed = 123) 


CPU times: user 149 ms, sys: 0 ns, total: 149 ms
Wall time: 148 ms


In [5]:
## basic hLouvain algorithm execution (without the last_step optimization)

In [6]:
%%time

alphas = []
c = 0.3
b = 0.8
for i in range(30):
    alphas.append(1-((1-b)**i))

#alphas = [0]
#alphas = [1]

A, q2, alphas_out = hL.h_louvain_community(alphas = alphas, change_frequency = c)

print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("q2 =",q2)

alphas_out [0.0, 0.8, 0.96, 1]
final_alpha 1
q2 = 0.5309079514468399
CPU times: user 802 ms, sys: 0 ns, total: 802 ms
Wall time: 802 ms


In [7]:
#checking the results (calculate h-modularity (alpha =1)) and print partition
qH = hL.combined_modularity(A, alpha = 1,hmod_tau=1,resolution=1)
print("qH", qH)
print("partition",A)

qH 0.5309079514468399
partition [{'97', '251', '173', '106', '143', '78', '265', '155', '151', '20', '18', '237', '116', '223', '210', '229', '128', '228', '85', '95', '249', '132', '46', '288', '188', '109', '12', '193', '72', '88', '152', '145', '247', '222'}, {'4', '290', '114', '177', '82', '63', '40', '13', '162', '255', '167', '186', '147', '62', '180', '236', '44', '32', '235', '270', '283'}, {'131', '163', '47', '254', '42', '36', '298', '170', '142', '59', '10', '294', '99', '264', '69', '144'}, {'127', '207', '176', '3', '129', '33', '221', '250', '165', '187'}, {'209', '89', '87', '92', '121', '90', '28', '68', '198', '41', '16', '154', '110', '123', '34', '189', '280', '71', '119', '83', '230', '268', '226', '185', '141'}, {'102', '49', '205', '140', '244', '220', '269', '58', '225', '266', '2', '234', '166', '172', '211', '200', '258', '73', '216', '224', '104', '233', '31', '45', '201', '67', '15', '48', '241', '29', '202', '35'}, {'287', '118', '212', '245', '246', '168'

In [8]:
## enhanced algorithm execution (with the last step optimization)

In [9]:
%%time

alphas = []
c = 0.3
b = 0.8
for i in range(30):
    alphas.append(1-((1-b)**i))
    
    
Als, A, qH_ls, qH_basic, alphas_out = hL.h_louvain_community_plus_last_step(alphas = alphas, change_frequency = c)


print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("qH-basic =",qH_basic)
print("qH+last_step =",qH_ls)


alphas_out [0.0, 0.8, 0.96, 1]
final_alpha 1
qH-basic = 0.5309079514468399
qH+last_step = 0.5324466112772114
CPU times: user 4.62 s, sys: 14.4 ms, total: 4.63 s
Wall time: 4.62 s


In [10]:
#checking the results (calculate h-modularity (alpha =1)) and print partition
qH = hL.combined_modularity(A, alpha = 1,hmod_tau=1,resolution=1)
print("qH", qH)
print("partition",A)

qHls = hL.combined_modularity(Als, alpha = 1,hmod_tau=1,resolution=1)
print("qH_last_step", qHls)
print("partition",Als)

qH 0.5309079514468399
partition [{'97', '251', '173', '106', '143', '78', '265', '155', '151', '20', '18', '237', '116', '223', '210', '229', '128', '228', '85', '95', '249', '132', '46', '288', '188', '109', '12', '193', '72', '88', '152', '145', '247', '222'}, {'4', '290', '114', '177', '82', '63', '40', '13', '162', '255', '167', '186', '147', '62', '180', '236', '44', '32', '235', '270', '283'}, {'131', '163', '47', '254', '42', '36', '298', '170', '142', '59', '10', '294', '99', '264', '69', '144'}, {'127', '207', '176', '3', '129', '33', '221', '250', '165', '187'}, {'209', '89', '87', '92', '121', '90', '28', '68', '198', '41', '16', '154', '110', '123', '34', '189', '280', '71', '119', '83', '230', '268', '226', '185', '141'}, {'102', '49', '205', '140', '244', '220', '269', '58', '225', '266', '2', '234', '166', '172', '211', '200', '258', '73', '216', '224', '104', '233', '31', '45', '201', '67', '15', '48', '241', '29', '202', '35'}, {'287', '118', '212', '245', '246', '168'

In [11]:
## ground truth

with open("datasets/results_300_assign.txt", 'r') as file:
    gt = [int(line) for line in file]
A_gt = [x for x in hmod.dict2part({str(i+1):gt[i] for i in range(len(gt))}) if len(x)>0]
gt_mod = hL.combined_modularity(A_gt, alpha = 1,hmod_tau=1,resolution=1)

print("ground-truth partition")
print(A_gt)
print("qh-gt:",gt_mod)

def getAMI_ARI(HG,gt,A):
    d = hmod.part2dict(A)
    A4ari = [d[str(i+1)] for i in range(len(HG.nodes))]
    return ARI(gt, A4ari), AMI(gt, A4ari)

ground-truth partition
[{'96', '191', '203', '286', '271', '77', '117', '227', '254', '197', '300', '112', '81', '125', '135', '124', '79', '65', '285', '272', '70', '183', '134', '260', '182', '148', '14', '144'}, {'209', '89', '87', '92', '121', '90', '28', '68', '198', '41', '16', '154', '110', '123', '34', '189', '280', '71', '119', '83', '230', '268', '226', '185', '141'}, {'251', '173', '266', '143', '78', '265', '151', '18', '116', '223', '210', '229', '228', '95', '221', '46', '288', '296', '12', '109', '72', '88', '152', '247', '222'}, {'4', '290', '114', '82', '177', '63', '40', '13', '162', '255', '167', '186', '147', '261', '62', '180', '236', '44', '32', '235', '270', '283'}, {'102', '49', '140', '257', '269', '58', '2', '166', '172', '200', '258', '216', '104', '299', '31', '45', '67', '174', '195', '29', '35', '201'}, {'66', '297', '43', '93', '50', '238', '80', '276', '211', '57', '284', '190', '169', '103', '149', '241', '213', '74', '179'}, {'157', '208', '214', '184'

In [12]:
#AMI and ARI for basic A

ari, ami = getAMI_ARI(HG,gt,A)
print("ARI =", ari)
print("AMI =", ami)
print("comm =", len(A))
print("comm-gt =", len(A_gt))

ARI = 0.7607284749397808
AMI = 0.8582215418720631
comm = 15
comm-gt = 18


In [13]:
#AMI and ARI for the result after Last Step optimization

ari, ami = getAMI_ARI(HG,gt,Als)
print("ARI =", ari)
print("AMI =", ami)
print("comm =", len(Als))
print("comm-gt =", len(A_gt))

ARI = 0.7647769209702481
AMI = 0.8592088864373395
comm = 15
comm-gt = 18


### Bigger example 

In [14]:
HG = load_ABCDH_from_file("datasets/results_3000_he.txt")
## julia --project abcdh.jl -n 3000 -d 2.5,5,20 -c 1.5,10,30 -x 0.3 -q 0.0,0.4,0.3,0.2,0.1 -w :linear -s 1234 --stats -o results_3000


In [15]:
%%time

hL = hl.hLouvain(HG, hmod_tau=1, resolution = 1, random_seed = 5673) 

CPU times: user 7.08 s, sys: 11.4 ms, total: 7.09 s
Wall time: 7.09 s


In [16]:
%%time

alphas = []
c = 0.3
b = 0.8
for i in range(30):
    alphas.append(1-((1-b)**i))

#alphas = [0]
#alphas = [1]

A, q2, alphas_out = hL.h_louvain_community(alphas = alphas, change_frequency = c)

print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("q2 =",q2)

alphas_out [0.0, 0.8, 0.96, 0.992, 1]
final_alpha 1
q2 = 0.568983501700291
CPU times: user 18.5 s, sys: 47.9 ms, total: 18.6 s
Wall time: 18.6 s


In [17]:
%%time

alphas = []
c = 0.3
b = 0.8
for i in range(30):
    alphas.append(1-((1-b)**i))
    
    
Als, A, qH_ls, qH_basic, alphas_out = hL.h_louvain_community_plus_last_step(alphas = alphas, change_frequency = c)


print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("qH-basic =",qH_basic)
print("qH+last_step =",qH_ls)

alphas_out [0.0, 0.8, 0.96, 0.992, 1]
final_alpha 1
qH-basic = 0.568983501700291
qH+last_step = 0.5720527848869881
CPU times: user 1min 6s, sys: 292 ms, total: 1min 6s
Wall time: 1min 6s


## Weighted graph

In [18]:
import pickle

def load_GoT():
    ## load the GoT dataset
    Edges, Names, Weights = pickle.load(open( "datasets/GoT.pkl", "rb" ))
    print(len(Names),'nodes and',len(Edges),'edges')

    HG = hnx.Hypergraph(dict(enumerate(Edges)))
    ## add edge weights
    for e in HG.edges:
        #HG.dataframe["weight"] = Weights
        HG.edges[e].weight = Weights[e]
    return HG

HG = load_GoT()

198 nodes and 1492 edges


In [19]:
hL = hl.hLouvain(HG, hmod_tau="infinity", resolution = 1, random_seed = 5673) 

In [20]:
%%time

alphas = []
c = 0.2
b = 0.7
for i in range(30):
    alphas.append(1-((1-b)**i))

#alphas = [0]
#alphas = [1]

A, q2, alphas_out = hL.h_louvain_community(alphas = alphas, change_frequency = c)

print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("q2 =",q2)

  [wdc[k[1]][k[0]]* v[0] for (k, v) in _df.iterrows() if k[0] > k[1] / 2]


alphas_out [0.0, 0.7, 0.9099999999999999, 1]
final_alpha 1
q2 = 0.5700210291240112
CPU times: user 1min 46s, sys: 249 ms, total: 1min 46s
Wall time: 1min 46s


In [21]:
%%time

### second run with different c and b (faster since the degree taxes are precalculated)

alphas = []
c = 0.2
b = 0.7
for i in range(30):
    alphas.append(1-((1-b)**i))

#alphas = [0]
#alphas = [1]

A, q2, alphas_out = hL.h_louvain_community(alphas = alphas, change_frequency = c)

print("alphas_out", alphas_out)
print("final_alpha", alphas_out[-1])
print("q2 =",q2)

  [wdc[k[1]][k[0]]* v[0] for (k, v) in _df.iterrows() if k[0] > k[1] / 2]


alphas_out [0.0, 0.7, 0.9099999999999999, 1]
final_alpha 1
q2 = 0.5700210291240112
CPU times: user 1.01 s, sys: 112 µs, total: 1.01 s
Wall time: 990 ms


In [22]:
%%time

import warnings

with warnings.catch_warnings(action="ignore"):


    alphas = []
    c = 0.3
    b = 0.8
    for i in range(30):
        alphas.append(1-((1-b)**i))


    Als, A, qH_ls, qH_basic, alphas_out = hL.h_louvain_community_plus_last_step(alphas = alphas, 
                                                                                change_frequency = c)


    print("alphas_out", alphas_out)
    print("final_alpha", alphas_out[-1])
    print("qH-basic =",qH_basic)
    print("qH+last_step =",qH_ls)
    

alphas_out [0.0, 0.8, 0.96, 0.992, 1]
final_alpha 1
qH-basic = 0.5700585597535348
qH+last_step = 0.5717924615951986
CPU times: user 1min 52s, sys: 144 ms, total: 1min 52s
Wall time: 1min 52s
