# Notebook to show how to analyse the results of the workplace dataset

In [1]:
import pandas as pd
import numpy as np
import sklearn.metrics as sm

import tools as tl

In [2]:
''' Parameters '''
data = 'workplace'
in_folder = '../data/input/'
cU = False

In [3]:
''' Import data '''
filename = in_folder + data + '.npz'
df_meta = pd.read_csv(in_folder+data+'_meta.csv')
Data = np.load(filename, allow_pickle=True)
A = Data['A']
B = Data['B']
hye = Data['hyperedges']
hyL = [len(e) for eid, e in enumerate(hye)]

N, E = B.shape
Ds, countDs = np.unique(hyL, return_counts=True)
groups, countG = np.unique(df_meta['class'], return_counts=True)

print(f'Number of nodes N = {N}')
print(f'Number of hyperedges E = {E}')
print('Hyperedges degree, counts, and proportions:', Ds, countDs, np.round(countDs / sum(countDs), 3))
print('Metadata classes and counts:', groups, countG)

Number of nodes N = 92
Number of hyperedges E = 788
Hyperedges degree, counts, and proportions: [2 3 4] [742  44   2] [0.942 0.056 0.003]
Metadata classes and counts: ['DISQ' 'DMCT' 'DSE' 'SFLE' 'SRH'] [15 26 34  4 13]


### Communities
Results obtained by running ```main.py```

In [4]:
''' Import inferred parameters '''
theta_HyMT = np.load(f'../data/output/theta_{data}_cU{cU}_HyMT.npz')
theta_GrMT = np.load(f'../data/output/theta_{data}_cU{cU}_GrMT.npz')
theta_PaMT = np.load(f'../data/output/theta_{data}_cU{cU}_PaMT.npz')

In [5]:
''' Save membership matrices u'''
u = {'HyMT': tl.normalize_nonzero_membership(theta_HyMT['u']),
     'GrMT': tl.normalize_nonzero_membership(theta_GrMT['u']),
     'PaMT': tl.normalize_nonzero_membership(theta_PaMT['u'])}

In [8]:
''' Metadata partition '''
groups = df_meta['classID'].values
u_gt = np.zeros_like(u['HyMT'])
for i in range(N):
    u_gt[i][groups[i]] = 1

In [9]:
''' Label permutation '''
for mod in ['HyMT', 'GrMT', 'PaMT']:
    P = tl.CalculatePermutation(u[mod], u_gt)
    u[mod] = np.dot(u[mod], P)

In [22]:
''' Compute the F1-score between inferred communities and metadata partition '''
print('F1-score')
labels = {'HyMT': 'Hypergraph-MT', 'GrMT': 'Graph-MT', 'PaMT': 'Pairs-MT'}
for mod in ['HyMT', 'GrMT', 'PaMT']:
    print(labels[mod], 
          np.round(sm.f1_score(np.argmax(u_gt, axis=1), np.argmax(u[mod], axis=1), average='weighted'), 3))

F1-score
Hypergraph-MT 0.829
Graph-MT 0.82
Pairs-MT 0.83


### Hyperedge prediction
Results obtained by running ```main_cv.py```

In [26]:
''' Import results of the 5-fold Cross-Validation routine '''
folds = pd.read_csv(f'../data/output/5-fold_cv/{data}_cU{cU}.csv')
assert len(folds)==5

In [34]:
''' Average over the folds '''
np.round(np.mean(folds, axis=0)[9:], 3)

auc_test_HyMT         0.742
auc_test_GrMT         0.744
auc_test_HyMTpairs    0.729
auc_test_GrMTpairs    0.730
auc_test_PaMT         0.734
dtype: float64

In [35]:
''' Standard deviations over the folds '''
np.round(np.std(folds, axis=0)[9:], 3)

auc_test_HyMT         0.025
auc_test_GrMT         0.030
auc_test_HyMTpairs    0.020
auc_test_GrMTpairs    0.022
auc_test_PaMT         0.022
dtype: float64