In [1]:
import pandas as pd
from sklearn.metrics import normalized_mutual_info_score
import numpy as np
import re
from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment
from sklearn.metrics import jaccard_score

In [3]:
# For HbT and using this data from another study to compare the findings

data = {
    "Sensorimotor (12)": [9, 10.0, 12.0, 14.0, 18.0, 19.0, 20.0, 22.0, 23.0, 24.0, 25.0, 29.0, 41.0, 42.0, 43.0, 52.0, 54.0],
    "Auditory (31)": [13, 14.0, 15.0, 16.0, 21.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 31.0, 59.0, 60.0, 62.0, 63.0],
    "FPC (19)": [4, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 17.0, 19.0, 20.0, 21.0, 23.0, 24.0, 27.0, 28.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 49.0, 50.0, 51.0],
    "DMN (6)": [4, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 17.0, 18.0, 19.0, 20.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 49.0, 50.0, 51.0, 53.0],
    "Visual (4)": [0, 1.0, 2.0, 10.0, 13.0, 15.0, 16.0, 32.0, 33.0, 34.0, 35.0, 45.0, 47.0, 48.0, 63.0],
}

In [4]:
# Mapping of networks to integers
network_mapping = {
    "Sensorimotor (12)": 0,
    "Auditory (31)": 1,
    "FPC (19)": 2,
    "DMN (6)": 3,
    "Visual (4)": 4
}

In [5]:
# Create a list of node-community-network tuples
node_community_pairs = []

for network, nodes in data.items():
    # Remove numbers in brackets from the network name
    clean_network_name = re.sub(r"\s*\(.*?\)", "", network)
    community_id = network_mapping[network]
    for node in nodes:
        node_community_pairs.append((node, community_id, clean_network_name))

In [6]:
# Convert to dataframe
df = pd.DataFrame(node_community_pairs, columns=["node", "community_id", "network_name"])

In [7]:
df

Unnamed: 0,node,community_id,network_name
0,9.0,0,Sensorimotor
1,10.0,0,Sensorimotor
2,12.0,0,Sensorimotor
3,14.0,0,Sensorimotor
4,18.0,0,Sensorimotor
...,...,...,...
92,35.0,4,Visual
93,45.0,4,Visual
94,47.0,4,Visual
95,48.0,4,Visual


In [8]:
df['node']=df['node'].astype(int)

In [11]:
df.to_csv('HbT_rsNetworks.csv',index=False)

In [16]:
# Drop duplicate rows based solely on the 'node' column
df_unique = df.drop_duplicates(subset=['node'], keep='first')

In [18]:
df_unique.reset_index(drop=True, inplace=True)

In [19]:
df_unique.to_csv('RSN_df_ground_truth.csv')

In [12]:
# Load the ground truth CSV file
ground_truth_df = pd.read_csv('HbT_rsNetworks.csv')

ground_truth_df

Unnamed: 0,node,community_id,network_name
0,9,0,Sensorimotor
1,10,0,Sensorimotor
2,12,0,Sensorimotor
3,14,0,Sensorimotor
4,18,0,Sensorimotor
...,...,...,...
92,35,4,Visual
93,45,4,Visual
94,47,4,Visual
95,48,4,Visual


In [10]:
# Function to create a dictionary of community_id -> set of nodes
def create_community_dict(df, community_col='community_id', node_col='node'):
    community_dict = {}
    for community_id, group in df.groupby(community_col):
        community_dict[community_id] = set(group[node_col])
    return community_dict

# Creating a dictionary for resting state networks
resting_state_dict = create_community_dict(ground_truth_df)

In [11]:
girvan_newman_df = pd.read_csv('communities_girvan_newman/contrast_3_final_communities.csv')
louvain_df = pd.read_csv('communities_louvain/contrast_3_final_communities.csv')
isomap_df = pd.read_csv('communities_isomap/contrast_3_final_communities.csv')

# Display the data to check the format
print(girvan_newman_df.head())
print(louvain_df.head())
print(isomap_df.head())

   contrast_id  community_id  node       label_name       lobe hemisphere
0            3             0     0         Cuneus_L  Occipital          L
1            3             0    32         Cuneus_R  Occipital          R
2            3             0    33  Occipital_Sup_R  Occipital          R
3            3             0    34         Cuneus_L  Occipital          L
4            3             0    35         Cuneus_R  Occipital          R
   contrast_id  community_id  node          label_name      lobe hemisphere
0            3             0    31      Temporal_Mid_L  Temporal          L
1            3             0    61           Angular_R  Parietal          R
2            3             0    56     SupraMarginal_R  Parietal          R
3            3             0    54       Postcentral_R  Parietal          R
4            3             0    52  Frontal_Inf_Oper_R   Frontal          R
   contrast_id  community_id  node       label_name       lobe hemisphere
0            3            

In [12]:
community_gn_dict= create_community_dict(girvan_newman_df)
community_ln_dict= create_community_dict(louvain_df)
community_iso_dict= create_community_dict(isomap_df)

In [13]:
# Jaccard similarity calculation function
def jaccard_similarity(set1, set2):
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union != 0 else 0

# Compare communities with resting state networks
def compare_communities_with_networks(community_dict, resting_state_dict):
    results = []
    for comm_id, comm_nodes in community_dict.items():
        for rs_id, rs_nodes in resting_state_dict.items():
            similarity = jaccard_similarity(comm_nodes, rs_nodes)
            results.append((comm_id, rs_id, similarity))
    return results

results_gn = compare_communities_with_networks(community_gn_dict, resting_state_dict)
results_ln = compare_communities_with_networks(community_ln_dict, resting_state_dict)
results_iso = compare_communities_with_networks(community_iso_dict, resting_state_dict)

In [14]:
# Convert results to a DataFrame for easy viewing
df_results_gn = pd.DataFrame(results_gn, columns=['community_id', 'rs_network_id', 'jaccard_similarity'])
df_results_ln = pd.DataFrame(results_ln, columns=['community_id', 'rs_network_id', 'jaccard_similarity'])
df_results_iso = pd.DataFrame(results_iso, columns=['community_id', 'rs_network_id', 'jaccard_similarity'])

In [15]:
# Sorting results by similarity
df_results_gn.sort_values(by='jaccard_similarity', ascending=False, inplace=True)
df_results_ln.sort_values(by='jaccard_similarity', ascending=False, inplace=True)
df_results_iso.sort_values(by='jaccard_similarity', ascending=False, inplace=True)

In [16]:
# Retain only the best match for each community_id
best_matches_gn = df_results_gn.groupby('community_id').first().reset_index()
best_matches_ln = df_results_ln.groupby('community_id').first().reset_index()
best_matches_iso = df_results_iso.groupby('community_id').first().reset_index()

In [17]:
rs_network_dict = ground_truth_df.set_index('community_id')['network_name'].to_dict()

In [18]:
rs_network_dict

{0: 'Sensorimotor', 1: 'Auditory', 2: 'FPC', 3: 'DMN', 4: 'Visual'}

In [19]:
# Map the network names to the best matches DataFrames
best_matches_gn['network_name'] = best_matches_gn['rs_network_id'].map(rs_network_dict)
best_matches_ln['network_name'] = best_matches_ln['rs_network_id'].map(rs_network_dict)
best_matches_iso['network_name'] = best_matches_iso['rs_network_id'].map(rs_network_dict)

In [24]:
print(best_matches_gn)
print(best_matches_ln)
print(best_matches_iso)

   community_id  rs_network_id  jaccard_similarity  network_name
0             0              4            0.687500        Visual
1             1              1            0.350000      Auditory
2             2              0            0.304348  Sensorimotor
3             3              0            0.363636  Sensorimotor
4             4              3            0.652174           DMN
   community_id  rs_network_id  jaccard_similarity  network_name
0             0              0            0.461538  Sensorimotor
1             1              3            0.478261           DMN
2             2              1            0.363636      Auditory
3             3              3            0.217391           DMN
4             4              4            0.687500        Visual
   community_id  rs_network_id  jaccard_similarity  network_name
0             0              3            0.233333           DMN
1             1              1            0.133333      Auditory
2             2          

In [32]:
name_mapping_dict_gn= best_matches_gn.set_index('community_id')['network_name'].to_dict()
name_mapping_dict_ln= best_matches_ln.set_index('community_id')['network_name'].to_dict()
name_mapping_dict_iso= best_matches_iso.set_index('community_id')['network_name'].to_dict()

In [21]:
name_mapping_dict_gn

{0: 'Visual', 1: 'Auditory', 2: 'Sensorimotor', 3: 'Sensorimotor', 4: 'DMN'}

In [22]:
name_mapping_dict_ln

{0: 'Sensorimotor', 1: 'DMN', 2: 'Auditory', 3: 'DMN', 4: 'Visual'}

In [33]:
girvan_newman_df['network_name']= girvan_newman_df['community_id'].map(name_mapping_dict_gn)
louvain_df['network_name']= louvain_df['community_id'].map(name_mapping_dict_ln)
isomap_df['network_name']= isomap_df['community_id'].map(name_mapping_dict_iso)

In [27]:
# First merge with Girvan-Newman
merged_df = girvan_newman_df.merge(louvain_df[['node', 'network_name']], on='node', suffixes=('_gn', '_ln'))

In [28]:
merged_df

Unnamed: 0,contrast_id,community_id,node,label_name,lobe,hemisphere,network_name_gn,network_name_ln
0,3,0,0,Cuneus_L,Occipital,L,Visual,Visual
1,3,0,32,Cuneus_R,Occipital,R,Visual,Visual
2,3,0,33,Occipital_Sup_R,Occipital,R,Visual,Visual
3,3,0,34,Cuneus_L,Occipital,L,Visual,Visual
4,3,0,35,Cuneus_R,Occipital,R,Visual,Visual
...,...,...,...,...,...,...,...,...
59,3,4,6,Frontal_Sup_Medial_L,Frontal,L,DMN,DMN
60,3,4,5,Frontal_Sup_L,Frontal,L,DMN,DMN
61,3,4,4,Frontal_Sup_Medial_L,Frontal,L,DMN,DMN
62,3,4,38,Frontal_Sup_R,Frontal,R,DMN,DMN


In [29]:
# Extract the network labels
gn_labels = merged_df['network_name_gn'].tolist()
louvain_labels = merged_df['network_name_ln'].tolist()

In [31]:
# Calculate NMI for contrast 3 (HbT)

nmi_score = normalized_mutual_info_score(louvain_labels, gn_labels)

print(f'NMI between Girvan-Newman and Louvain method: {nmi_score}')

NMI between Girvan-Newman and Louvain method: 0.8005166034741567


In [35]:
girvan_newman_df.to_csv('RSN_df_girvan_newman.csv',index=False)
louvain_df.to_csv('RSN_df_louvain.csv',index=False)
isomap_df.to_csv('RSN_df_isomap.csv',index=False)