# Community detection

In [None]:
!pip install python-louvain
!pip install scikit-network

In [1]:
from community import community_louvain
import itertools
import networkx as nx
from networkx import edge_betweenness_centrality as betweenness
from networkx.algorithms import community
import numpy as np
from operator import itemgetter
import pandas as pd
import re

In [2]:
def network_from_edges(edges):
  dict_edges = {}
  for ind in range(len(edges)):
    if edges.iloc[ind,2] > 10:
      dict_edges[(edges.iloc[ind,0], edges.iloc[ind,1])] = edges.iloc[ind,2]
  net = pd.DataFrame.from_dict(dict_edges,orient='index')
  net.columns = ["weight"]
  net.sort_values(by="weight",inplace=True, ascending=False)
  return dict_edges, net


def get_graph(network):
  G = nx.Graph()
  for edge in network:
      #we can filter edges by weight by uncommenting the next line and setting desired weight threshold
      G.add_edge(edge[0], edge[1], weight=network[edge])
  
  #G.add_weighted_edges_from(up_weighted, weight='weight')
  return G


def extract_text(df):       #extract the text from the tweets and RT
                            #works ONLY on .csv file
  list_strings = []
  for index in range(len(df)):
    text = df.loc[index]['text']                          #if it is nor trucated nor a RT  i take "text"
    string = -1
    if (df.loc[index,"truncated"] == True):                 #if it is trucated I take "extended_tweet"
        string = df.loc[index,"extended_tweet"]
    if type(df.loc[index,"retweeted_status"]) != float:     #if it is a RT I take retweeted_status
        string = df.loc[index,"retweeted_status"]
    if type(string) == str :
        if(re.search('full_text\':(.+?)https',string) != None):     #if I find "full_text"
          s = re.search('full_text\':(.+?)https',string).group(1)
        if(re.search('text\':(.+?)https',string)!= None):
          s = re.search('text\':(.+?)https',string).group(1)
        else: 
          continue
        list_strings.append(s)
    else:
      list_strings.append(text)

  return list_strings


def frequency_dictionary(df):
  unique_words = {}

  for row in df:
    for word in row.split():
      #if the word is encountered for the first time add to dict as key and set its value to 0
      unique_words.setdefault(word,0)
      #increase the value (i.e the count) of the word by 1 every time it is encountered
      unique_words[word] += 1

  return unique_words


def adj_matrix(node_list, edge_list):   

  words = sorted(list(node_list['Id']))

  A = pd.DataFrame(np.zeros((len(words),len(words))), columns = words, index = words)
  print('A shape: ', A.shape)
  for row in range(len(edge_list)):
    if row%1000==0:
      print(str(round(row/1000)) + '/' + str(round(len(edge_list)/1000)))
    word1 = edge_list.iloc[row, 0]
    word2 = edge_list.iloc[row, 1]
    weight = edge_list.iloc[row, 2]
    A[word1, word2] = weight
    A[word2, word1] = weight
  print('A shape: ', A.shape)
  return A

In [3]:
def community_check(community, dict_1, dict_2, G_1, G_2, thr):
  sum_1 = 0
  sum_2 = 0
  sum_12 = 0
  for key in community:
    if (key in list(dict_1.keys())) and (key in list(dict_2.keys())):
      if dict_1[key] / sum(list(dict_1.values())) - dict_2[key] / sum(list(dict_2.values())) > thr:
        sum_1 += 1
      elif dict_1[key] / sum(list(dict_1.values())) - dict_2[key] / sum(list(dict_2.values())) < -thr:
        sum_2 += 1  
      else:
        sum_12 += 1 
    elif key in list(G_1.nodes()):
      sum_1 += 1
    elif key in list(G_2.nodes()):
      sum_2 += 1
  return sum_1, sum_2, sum_12

In [115]:
# Functions for measuring the quality of a partition (into communities).

def compute_metrics(G, partition):

  # Returns the coverage of a partition.
  coverage = community.coverage(G, partition)

  # Returns the coverage and performance of a partition of G.
  quality = community.partition_quality(G, partition)

  performance = community.performance(G, partition)

  # Returns the modularity of the given partition of the graph.
  modularity = community.modularity(G, partition)

  print("Coverage: ", coverage)
  # print("Quality: ", quality) # Coverage and performance
  print("Performance: ", performance)
  print("Modularity: ", modularity)

  return coverage, quality, performance, modularity


# PageRank Networks

## All periods

In [150]:
period = ''
China = pd.read_csv('/content/China'+period+'.csv')
USA = pd.read_csv('/content/USA'+period+'.csv')

edges = pd.read_csv('/content/edgelist_China_USA'+period+'.csv')
edges_China = pd.read_csv('/content/edgelist_China'+period+'.csv')
edges_USA = pd.read_csv('/content/edgelist_USA'+period+'.csv')

nodes = pd.read_csv('/content/nodelist_China_USA'+period+'.csv')

text_China = extract_text(China)
text_USA = extract_text(USA)

freq_dict_China = frequency_dictionary(text_China)
freq_dict_China = dict(sorted(freq_dict_China.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

freq_dict_USA = frequency_dictionary(text_USA)
freq_dict_USA = dict(sorted(freq_dict_USA.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

net, net_df = network_from_edges(edges)
net_China, net_df_China = network_from_edges(edges_China)
net_USA, net_df_USA = network_from_edges(edges_USA)

G = get_graph(net)
G_China = get_graph(net_China)
G_USA = get_graph(net_USA)

In [151]:
print('China&USA:')
print('Nodes: ',len(G.nodes()))
print('Edges: ',len(G.edges()))
print('Is connected: ',nx.is_connected(G))
print()

print('China:')
print('Nodes: ',len(G_China.nodes()))
print('Edges: ',len(G_China.edges()))
print('Is connected: ',nx.is_connected(G_China))
print()
print('USA:')
print('Nodes: ',len(G_USA.nodes()))
print('Edges: ',len(G_USA.edges()))
print('Is connected: ',nx.is_connected(G_USA))
print()

China&USA:
Nodes:  509
Edges:  11982
Is connected:  True

China:
Nodes:  445
Edges:  3042
Is connected:  False

USA:
Nodes:  522
Edges:  8973
Is connected:  True



### Girvan_Newman

In [7]:
community_gn = community.girvan_newman(G,)
for c_list in community_gn:
  tuple(print(sorted(c)) for c in c_list)

['accord', 'across', 'add', 'administer', 'administration', 'adult', 'adviser', 'affect', 'africa', 'age', 'agency', 'ahead', 'aid', 'air', 'allow', 'almost', 'already', 'also', 'america', 'amid', 'among', 'analysis', 'announce', 'another', 'antibody', 'appear', 'approve', 'area', 'around', 'arrive', 'asian', 'ask', 'australia', 'authority', 'avoid', 'back', 'bad', 'ban', 'bank', 'base', 'batch', 'battle', 'become', 'begin', 'behind', 'beijing', 'benefit', 'big', 'bill', 'billion', 'blood', 'boost', 'brazil', 'break', 'breaking', 'bring', 'british', 'build', 'business', 'california', 'call', 'campaign', 'canada', 'cancel', 'candidate', 'capital', 'care', 'case', 'catch', 'cause', 'center', 'central', 'challenge', 'change', 'chief', 'child', 'china', 'citizen', 'city', 'claim', 'clinical', 'close', 'clot', 'combat', 'come', 'commission', 'committee', 'community', 'company', 'concern', 'condition', 'confidence', 'confirm', 'confirmed', 'contact', 'contain', 'continue', 'contract', 'contr

KeyboardInterrupt: ignored

In [None]:
# VERY HEAVY COMPUTATION
def most_central_edge(G):
  centrality = betweenness(G, weight="weight")
  return max(centrality, key=centrality.get)

community_gn_centrality = community.girvan_newman(G, most_valuable_edge=most_central_edge)
tuple(print(sorted(c)) for c in next(community_gn_centrality))

In [8]:
def heaviest(G):
  u, v, w = max(G.edges(data="weight"), key=itemgetter(2))
  return (u, v)

community_gn_heaviest = community.girvan_newman(G, most_valuable_edge=heaviest)
tuple(print(sorted(c)) for c in next(community_gn_heaviest))

['accord', 'across', 'add', 'administer', 'administration', 'adult', 'adviser', 'affect', 'africa', 'age', 'agency', 'ahead', 'aid', 'air', 'allow', 'almost', 'already', 'also', 'america', 'amid', 'among', 'analysis', 'announce', 'another', 'antibody', 'appear', 'approve', 'area', 'around', 'arrive', 'asian', 'ask', 'australia', 'authority', 'avoid', 'back', 'bad', 'ban', 'bank', 'base', 'batch', 'battle', 'become', 'begin', 'behind', 'beijing', 'benefit', 'big', 'bill', 'billion', 'blood', 'boost', 'brazil', 'break', 'breaking', 'bring', 'british', 'build', 'business', 'california', 'call', 'campaign', 'canada', 'cancel', 'candidate', 'capital', 'care', 'case', 'catch', 'cause', 'center', 'central', 'challenge', 'change', 'chief', 'child', 'china', 'citizen', 'city', 'claim', 'clinical', 'close', 'clot', 'combat', 'come', 'commission', 'committee', 'community', 'company', 'concern', 'condition', 'confidence', 'confirm', 'confirmed', 'contact', 'contain', 'continue', 'contract', 'contr

(None, None)

### Bipartition
Partition a graph into two blocks using the Kernighan–Lin algorithm.

In [152]:
community_bipart = community.kernighan_lin_bisection(G,weight='weight')

In [153]:
print(community_bipart[0])
print(community_bipart[1])

{'stage', 'approve', 'committee', 'korea', 'add', 'express', 'beijing', 'today', 'declare', 'special', 'strong', 'launch', 'population', 'adviser', 'service', 'facility', 'rule', 'federal', 'north', 'family', 'sale', 'share', 'arrive', 'whether', 'keep', 'pass', 'experimental', 'fall', 'loss', 'human', 'democratic', 'experience', 'secretary', 'season', 'game', 'hard', 'person', 'hundred', 'exclusive', 'free', 'export', 'stay', 'side', 'evacuate', 'air', 'young', 'within', 'area', 'press', 'base', 'vice', 'reach', 'regulator', 'track', 'bank', 'transmission', 'almost', 'appear', 'republican', 'step', 'place', 'move', 'hand', 'decline', 'talk', 'drive', 'governor', 'force', 'research', 'group', 'drop', 'confidence', 'stop', 'good', 'past', 'court', 'breaking', 'system', 'produce', 'full', 'poll', 'party', 'hope', 'industry', 'diagnosis', 'deal', 'pneumonia', 'combat', 'italy', 'growth', 'yet', 'rally', 'statement', 'military', 'like', 'department', 'head', 'release', 'post', 'great', 'em

In [154]:
threshold = 0.00001
sum_China_0, sum_USA_0, sum_China_USA_0 = community_check(community_bipart[0], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
sum_China_1, sum_USA_1, sum_China_USA_1 = community_check(community_bipart[1], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)

In [155]:
print('First community:')
print('China: ', round(100 * sum_China_0 / len(community_bipart[0])),'%')
print('USA: ', round(100 * sum_USA_0 / len(community_bipart[0])),'%')
print('China&USA: ', round(100 * sum_China_USA_0 / len(community_bipart[0])),'%')
print()
print('China: ', round(100 * sum_China_1 / len(community_bipart[1])),'%')
print('USA: ', round(100 * sum_USA_1 / len(community_bipart[1])),'%')
print('China&USA: ', round(100 * sum_China_USA_1 / len(community_bipart[1])),'%')

First community:
China:  28 %
USA:  62 %
China&USA:  10 %

China:  38 %
USA:  54 %
China&USA:  7 %


**Metrics**

In [156]:
_ = compute_metrics(G, community_bipart)

Coverage:  0.6846937072275079
Performance:  0.5352164967591232
Modularity:  -0.00705109029492303


### Modularity-based communities
Find communities in G using greedy modularity maximization.

In [157]:
community_mod = community.greedy_modularity_communities(G, n_communities=2, weight='weight')
# communities_naive_mod = community.naive_greedy_modularity_communities(G)

In [158]:
community_mod_list = []
for comm in community_mod:
 print(list(comm))
 community_mod_list.append(list(comm))

 # The first community might be related to

['global', 'express', 'year', 'hospital', 'beijing', 'declare', 'special', 'care', 'coronavirus', 'service', 'family', 'sale', 'epicenter', 'keep', 'human', 'loss', 'hundred', 'japan', 'stay', 'ship', 'evacuate', 'air', 'prevent', 'recover', 'economy', 'almost', 'bank', 'transmission', 'place', 'step', 'flight', 'people', 'medical', 'hand', 'update', 'force', 'take', 'around', 'become', 'patient', 'big', 'infect', 'outside', 'epidemic', 'confidence', 'stop', 'disease', 'full', 'battle', 'large', 'industry', 'passenger', 'help', 'pneumonia', 'combat', 'growth', 'home', 'work', 'like', 'center', 'department', 'great', 'employee', 'business', 'major', 'kill', 'international', 'visit', 'deadly', 'development', 'many', 'look', 'story', 'travel', 'control', 'shut', 'impact', 'street', 'much', 'demand', 'hong', 'support', 'despite', 'cruise', 'continue', 'fund', 'affect', 'central', 'lead', 'call', 'novel', 'world', 'fear', 'outbreak', 'financial', 'measure', 'china', 'already', 'face', 'chal

In [159]:
threshold = 0.00001
for i in range(len(community_mod_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_mod_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_mod_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_mod_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_mod_list[i])),'%')
  print()

Community:  0
China:  44 %
USA:  48 %
China&USA:  9 %

Community:  1
China:  22 %
USA:  68 %
China&USA:  11 %

Community:  2
China:  23 %
USA:  70 %
China&USA:  7 %

Community:  3
China:  46 %
USA:  46 %
China&USA:  7 %

Community:  4
China:  50 %
USA:  25 %
China&USA:  25 %



**Metrics**

In [160]:
_ = compute_metrics(G, community_mod_list)

Coverage:  0.3890836254381572
Performance:  0.718051451819996
Modularity:  0.1837115988393901


### Louvain Community Detection
Find the best partition of a graph using the Louvain Community Detection Algorithm.

In [161]:
community_louv = community_louvain.best_partition(G,weight='weight')

# modularity_louvain = community.modularity(communities_louvain, G) # ERROR: not a partition
# print("The modularity Q based on networkx is {}".format(modularity_louvain))

In [162]:
max = np.max(list(community_louv.values()))
community_louvain_list = []
for i in range(max+1):
  community_louvain_list.append([])
for key in list(community_louv.keys()):
  community_louvain_list[community_louv[key]].append(key)
for i in range(max+1):
  print(community_louvain_list[i])

['infectious', 'disease', 'prevention', 'control', 'south', 'center', 'korea', 'africa', 'north']
['china', 'coronavirus', 'novel', 'outbreak', 'people', 'medical', 'virus', 'challenge', 'face', 'human', 'medium', 'watch', 'great', 'hard', 'outside', 'cruise', 'ship', 'japan', 'live', 'worker', 'across', 'line', 'central', 'epicenter', 'late', 'hubei', 'province', 'support', 'local', 'hospital', 'help', 'fight', 'express', 'contain', 'epidemic', 'try', 'effort', 'send', 'amid', 'drop', 'allow', 'foreign', 'mask', 'critical', 'work', 'service', 'industry', 'combat', 'business', 'national', 'commission', 'treatment', 'transmission', 'update', 'around', 'development', 'die', 'measure', 'leave', 'home', 'city', 'battle', 'win', 'due', 'authority', 'beijing', 'pneumonia', 'economy', 'many', 'stand', 'infect', 'family', 'patient', 'spread', 'already', 'production', 'year', 'old', 'discharge', 'affect', 'evacuate', 'put', 'despite', 'become', 'full', 'recover', 'keep', 'citizen', 'close', 'lo

In [163]:
threshold = 0.00001
for i in range(len(community_louvain_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_louvain_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_louvain_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_louvain_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_louvain_list[i])),'%')
  print()

Community:  0
China:  67 %
USA:  22 %
China&USA:  11 %

Community:  1
China:  47 %
USA:  45 %
China&USA:  8 %

Community:  2
China:  21 %
USA:  69 %
China&USA:  10 %

Community:  3
China:  22 %
USA:  65 %
China&USA:  13 %

Community:  4
China:  38 %
USA:  53 %
China&USA:  9 %

Community:  5
China:  28 %
USA:  65 %
China&USA:  7 %



**Metrics**

In [164]:
_ = compute_metrics(G, community_louvain_list)

Coverage:  0.35645134368218995
Performance:  0.7397862104172146
Modularity:  0.18387625749187272


### Clique percolation

In [9]:
cliques = nx.find_cliques(G)
# in our case cliques correspond to tweets

In [10]:
k = 7
community_clique = community.k_clique_communities(G, k, cliques)

In [None]:
c = next(community_clique)

### Dendogram

In [None]:
from sknetwork.hierarchy import Paris
paris = Paris()
adjacency = adj_matrix(nodes, edges)
dendrogram = paris.fit_transform(adjacency)

A shape:  (509, 509)
0/79
1/79
2/79
3/79
4/79
5/79
6/79
7/79
8/79
9/79
10/79
11/79
12/79
13/79
14/79
15/79
16/79
17/79
18/79
19/79
20/79
21/79
22/79
23/79
24/79
25/79
26/79
27/79
28/79
29/79
30/79
31/79
32/79
33/79
34/79
35/79
36/79
37/79
38/79
39/79
40/79
41/79
42/79
43/79
44/79


### Lukes partitioning

In [8]:
max_size = 100 
community_lukes = community.lukes_partitioning(G, max_size, edge_weight='weight')

NotATree: ignored

### BigCLAM

In [None]:
! git clone https://github.com/RobRomijnders/bigclam.git

## JanFeb2020

In [135]:
period = '_JanFeb2020'
China = pd.read_csv('/content/China'+period+'.csv')
USA = pd.read_csv('/content/USA'+period+'.csv')

edges = pd.read_csv('/content/edgelist_China_USA'+period+'.csv')
edges_China = pd.read_csv('/content/edgelist_China'+period+'.csv')
edges_USA = pd.read_csv('/content/edgelist_USA'+period+'.csv')

text_China = extract_text(China)
text_USA = extract_text(USA)

freq_dict_China = frequency_dictionary(text_China)
freq_dict_China = dict(sorted(freq_dict_China.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

freq_dict_USA = frequency_dictionary(text_USA)
freq_dict_USA = dict(sorted(freq_dict_USA.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

net, net_df = network_from_edges(edges)
net_China, net_df_China = network_from_edges(edges_China)
net_USA, net_df_USA = network_from_edges(edges_USA)

G = get_graph(net)
G_China = get_graph(net_China)
G_USA = get_graph(net_USA)

In [136]:
print('China&USA:')
print('Nodes: ',len(G.nodes()))
print('Edges: ',len(G.edges()))
print('Is connected: ',nx.is_connected(G))
print()
print('China:')
print('Nodes: ',len(G_China.nodes()))
print('Edges: ',len(G_China.edges()))
print('Is connected: ',nx.is_connected(G_China))
print()
print('USA:')
print('Nodes: ',len(G_USA.nodes()))
print('Edges: ',len(G_USA.edges()))
print('Is connected: ',nx.is_connected(G_USA))
print()

China&USA:
Nodes:  454
Edges:  3254
Is connected:  True

China:
Nodes:  300
Edges:  1391
Is connected:  False

USA:
Nodes:  435
Edges:  1746
Is connected:  True



### Bipartition
Partition a graph into two blocks using the Kernighan–Lin algorithm.

In [137]:
community_bipart = community.kernighan_lin_bisection(G,weight='weight')

In [138]:
print(community_bipart[0])
print(community_bipart[1])

{'wish', 'france', 'together', 'korea', 'evacuee', 'today', 'receive', 'special', 'result', 'care', 'news', 'carry', 'australia', 'clinical', 'service', 'facility', 'quarter', 'nurse', 'front', 'family', 'arrive', 'police', 'share', 'keep', 'sale', 'pass', 'singapore', 'whether', 'fall', 'respiratory', 'protective', 'hard', 'dozen', 'person', 'also', 'hundred', 'strand', 'recovery', 'develop', 'animal', 'five', 'air', 'area', 'base', 'recently', 'reach', 'almost', 'bank', 'appear', 'cough', 'place', 'away', 'move', 'six', 'hand', 'dollar', 'talk', 'force', 'plane', 'research', 'group', 'robot', 'firm', 'good', 'mother', 'shortage', 'restriction', 'system', 'south', 'full', 'tokyo', 'asia', 'party', 'industry', 'large', 'deal', 'isolation', 'last', 'growth', 'statement', 'military', 'baby', 'nine', 'way', 'border', 'well', 'head', 'release', 'investor', 'data', 'fly', 'among', 'major', 'set', 'reporter', 'scientist', 'australian', 'issue', 'shanghai', 'thailand', 'low', 'visit', 'fast',

In [139]:
threshold = 0.00001
sum_China_0, sum_USA_0, sum_China_USA_0 = community_check(community_bipart[0], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
sum_China_1, sum_USA_1, sum_China_USA_1 = community_check(community_bipart[1], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)

In [140]:
print('First community:')
print('China: ', round(100 * sum_China_0 / len(community_bipart[0])),'%')
print('USA: ', round(100 * sum_USA_0 / len(community_bipart[0])),'%')
print('China&USA: ', round(100 * sum_China_USA_0 / len(community_bipart[0])),'%')
print()
print('China: ', round(100 * sum_China_1 / len(community_bipart[1])),'%')
print('USA: ', round(100 * sum_USA_1 / len(community_bipart[1])),'%')
print('China&USA: ', round(100 * sum_China_USA_1 / len(community_bipart[1])),'%')

First community:
China:  43 %
USA:  52 %
China&USA:  4 %

China:  45 %
USA:  51 %
China&USA:  4 %


**Metrics**

In [141]:
_ = compute_metrics(G, community_bipart)

Coverage:  0.8097725875845113
Performance:  0.5207087356925441
Modularity:  0.000738799589312081


### Modularity-based communities
Find communities in G using greedy modularity maximization.

In [142]:
community_mod = community.greedy_modularity_communities(G, n_communities=2, weight='weight')
# communities_naive_mod = community.naive_greedy_modularity_communities(G)

In [143]:
community_mod_list = []
for comm in community_mod:
 print(list(comm))
 community_mod_list.append(list(comm))

['evacuee', 'korea', 'today', 'result', 'coronavirus', 'news', 'australia', 'clinical', 'quarter', 'front', 'family', 'arrive', 'police', 'share', 'whether', 'sale', 'level', 'keep', 'respiratory', 'singapore', 'fall', 'hard', 'dozen', 'person', 'hundred', 'may', 'strand', 'five', 'air', 'drug', 'area', 'base', 'recently', 'state', 'economy', 'almost', 'bank', 'appear', 'cough', 'place', 'know', 'flight', 'need', 'move', 'six', 'dollar', 'hand', 'force', 'become', 'big', 'due', 'good', 'firm', 'system', 'restriction', 'south', 'tokyo', 'asia', 'industry', 'last', 'growth', 'statement', 'military', 'nine', 'like', 'risk', 'well', 'department', 'head', 'investor', 'set', 'high', 'fly', 'among', 'business', 'get', 'australian', 'issue', 'shanghai', 'thailand', 'low', 'wash', 'wall', 'possible', 'california', 'come', 'still', 'story', 'four', 'travel', 'since', 'potential', 'impact', 'street', 'much', 'demand', 'despite', 'question', 'woman', 'fund', 'united', 'affect', 'cut', 'near', 'rat

In [144]:
threshold = 0.00001
for i in range(len(community_mod_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_mod_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_mod_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_mod_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_mod_list[i])),'%')
  print()

Community:  0
China:  26 %
USA:  69 %
China&USA:  4 %

Community:  1
China:  63 %
USA:  34 %
China&USA:  2 %

Community:  2
China:  46 %
USA:  46 %
China&USA:  8 %

Community:  3
China:  76 %
USA:  20 %
China&USA:  5 %

Community:  4
China:  47 %
USA:  47 %
China&USA:  6 %

Community:  5
China:  19 %
USA:  78 %
China&USA:  0 %

Community:  6
China:  29 %
USA:  71 %
China&USA:  0 %



**Metrics**

In [145]:
_ = compute_metrics(G, community_mod_list)

Coverage:  0.4164105716041795
Performance:  0.7469342902432146
Modularity:  0.21828728489729782


### Louvain Community Detection
Find the best partition of a graph using the Louvain Community Detection Algorithm.

In [146]:
# communities_louvain = community.louvain.louvain_communities(G)
# partitions_louvain = community.louvain.louvain_partitions(G)

community_louv = community_louvain.best_partition(G,weight='weight')

# modularity_louvain = community.modularity(communities_louvain, G) # ERROR: not a partition
# print("The modularity Q based on networkx is {}".format(modularity_louvain))

In [147]:
max = np.max(list(community_louv.values()))
community_louvain_list = []
for i in range(max+1):
  community_louvain_list.append([])
for key in list(community_louv.keys()):
  community_louvain_list[community_louv[key]].append(key)
for i in range(max+1):
  print(community_louvain_list[i])

['talk', 'china', 'coronavirus', 'novel', 'outbreak', 'university', 'student', 'face', 'deal', 'medium', 'watch', 'make', 'take', 'hard', 'announce', 'team', 'expert', 'among', 'country', 'asia', 'state', 'information', 'office', 'across', 'line', 'since', 'medic', 'arrive', 'support', 'local', 'help', 'fight', 'cut', 'firm', 'express', 'contain', 'epidemic', 'nearly', 'risk', 'drug', 'ensure', 'give', 'trump', 'try', 'share', 'effort', 'send', 'amid', 'offer', 'system', 'wang', 'mask', 'donate', 'show', 'australia', 'daily', 'begin', 'back', 'work', 'service', 'industry', 'combat', 'get', 'business', 'issue', 'plan', 'possible', 'high', 'raise', 'woman', 'measure', 'south', 'leave', 'home', 'city', 'food', 'come', 'fund', 'battle', 'billion', 'dollar', 'win', 'meet', 'due', 'big', 'beijing', 'economy', 'fast', 'part', 'slow', 'many', 'find', 'aid', 'may', 'stand', 'infected', 'see', 'spread', 'response', 'diagnose', 'much', 'yuan', 'company', 'production', 'president', 'solidarity', '

In [148]:
threshold = 0.00001
for i in range(len(community_louvain_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_louvain_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_louvain_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_louvain_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_louvain_list[i])),'%')
  print()

Community:  0
China:  42 %
USA:  54 %
China&USA:  3 %

Community:  1
China:  47 %
USA:  45 %
China&USA:  8 %

Community:  2
China:  33 %
USA:  67 %
China&USA:  0 %

Community:  3
China:  72 %
USA:  26 %
China&USA:  2 %

Community:  4
China:  29 %
USA:  71 %
China&USA:  0 %

Community:  5
China:  16 %
USA:  77 %
China&USA:  3 %

Community:  6
China:  50 %
USA:  42 %
China&USA:  8 %



**Metrics**

In [149]:
_ = compute_metrics(G, community_louvain_list)

Coverage:  0.5009219422249539
Performance:  0.5915045073956298
Modularity:  0.2218519183055515


## SeptOct2020

In [119]:
period = '_SeptOct2020'
China = pd.read_csv('/content/China'+period+'.csv')
USA = pd.read_csv('/content/USA'+period+'.csv')

edges = pd.read_csv('/content/edgelist_China_USA'+period+'.csv')
edges_China = pd.read_csv('/content/edgelist_China'+period+'.csv')
edges_USA = pd.read_csv('/content/edgelist_USA'+period+'.csv')

text_China = extract_text(China)
text_USA = extract_text(USA)

freq_dict_China = frequency_dictionary(text_China)
freq_dict_China = dict(sorted(freq_dict_China.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

freq_dict_USA = frequency_dictionary(text_USA)
freq_dict_USA = dict(sorted(freq_dict_USA.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

net, net_df = network_from_edges(edges)
net_China, net_df_China = network_from_edges(edges_China)
net_USA, net_df_USA = network_from_edges(edges_USA)

G = get_graph(net)
G_China = get_graph(net_China)
G_USA = get_graph(net_USA)

In [120]:
print('China&USA:')
print('Nodes: ',len(G.nodes()))
print('Edges: ',len(G.edges()))
print('Is connected: ',nx.is_connected(G))
print()
print('China:')
print('Nodes: ',len(G_China.nodes()))
print('Edges: ',len(G_China.edges()))
print('Is connected: ',nx.is_connected(G_China))
print()
print('USA:')
print('Nodes: ',len(G_USA.nodes()))
print('Edges: ',len(G_USA.edges()))
print('Is connected: ',nx.is_connected(G_USA))
print()

China&USA:
Nodes:  507
Edges:  5768
Is connected:  True

China:
Nodes:  173
Edges:  556
Is connected:  False

USA:
Nodes:  512
Edges:  4963
Is connected:  True



### Bipartition
Partition a graph into two blocks using the Kernighan–Lin algorithm.

In [122]:
community_bipart = community.kernighan_lin_bisection(G,weight='weight')

In [123]:
print(community_bipart[0])
print(community_bipart[1])

{'company', 'france', 'approve', 'korea', 'add', 'milestone', 'real', 'today', 'receive', 'enough', 'strong', 'boom', 'promise', 'facility', 'service', 'rule', 'federal', 'north', 'family', 'share', 'whether', 'keep', 'loss', 'experience', 'season', 'game', 'hard', 'person', 'also', 'stimulus', 'bar', 'free', 'gathering', 'player', 'physician', 'commission', 'stay', 'mayor', 'immune', 'side', 'sen', 'within', 'prevent', 'air', 'young', 'press', 'base', 'recover', 'democrat', 'senator', 'bank', 'transmission', 'almost', 'third', 'republican', 'place', 'flight', 'move', 'six', 'decline', 'update', 'research', 'group', 'outside', 'epidemic', 'shot', 'stop', 'firm', 'good', 'supply', 'package', 'court', 'breaking', 'system', 'register', 'produce', 'south', 'pay', 'full', 'battle', 'poll', 'party', 'industry', 'voter', 'survive', 'local', 'italy', 'yet', 'nationwide', 'jersey', 'statement', 'nine', 'like', 'way', 'expose', 'feel', 'release', 'post', 'night', 'business', 'among', 'major', 't

In [124]:
threshold = 0.00001
sum_China_0, sum_USA_0, sum_China_USA_0 = community_check(community_bipart[0], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
sum_China_1, sum_USA_1, sum_China_USA_1 = community_check(community_bipart[1], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)

In [125]:
print('First community:')
print('China: ', round(100 * sum_China_0 / len(community_bipart[0])),'%')
print('USA: ', round(100 * sum_USA_0 / len(community_bipart[0])),'%')
print('China&USA: ', round(100 * sum_China_USA_0 / len(community_bipart[0])),'%')
print()
print('China: ', round(100 * sum_China_1 / len(community_bipart[1])),'%')
print('USA: ', round(100 * sum_USA_1 / len(community_bipart[1])),'%')
print('China&USA: ', round(100 * sum_China_USA_1 / len(community_bipart[1])),'%')

First community:
China:  28 %
USA:  65 %
China&USA:  6 %

China:  39 %
USA:  56 %
China&USA:  4 %


**Metrics**

In [126]:
_ = compute_metrics(G, community_bipart)

Coverage:  0.7115117891816921
Performance:  0.5200084196739715
Modularity:  -0.0027572798112494128


### Modularity-based communities
Find communities in G using greedy modularity maximization.

In [127]:
community_mod = community.greedy_modularity_communities(G, n_communities=2, weight='weight')
# communities_naive_mod = community.naive_greedy_modularity_communities(G)

In [128]:
community_mod_list = []
for comm in community_mod:
 print(list(comm))
 community_mod_list.append(list(comm))

['company', 'stage', 'approve', 'johnson', 'real', 'receive', 'enough', 'coverage', 'strong', 'result', 'promise', 'news', 'participant', 'service', 'facility', 'clinical', 'rule', 'adult', 'vaccine', 'share', 'experimental', 'loss', 'season', 'dose', 'even', 'exclusive', 'end', 'free', 'gathering', 'may', 'prime', 'early', 'immune', 'develop', 'side', 'drug', 'young', 'study', 'bank', 'transmission', 'third', 'appear', 'know', 'need', 'become', 'emergency', 'patient', 'infect', 'due', 'find', 'group', 'shot', 'firm', 'supply', 'system', 'pay', 'produce', 'full', 'battle', 'party', 'industry', 'deal', 'help', 'survive', 'home', 'work', 'like', 'risk', 'head', 'data', 'post', 'employee', 'make', 'get', 'effect', 'scientist', 'boost', 'plastic', 'immunity', 'candidate', 'serious', 'likely', 'french', 'protocol', 'seek', 'development', 'possible', 'want', 'russia', 'story', 'right', 'meet', 'potential', 'support', 'despite', 'sign', 'question', 'offer', 'would', 'woman', 'fund', 'wave', '

In [129]:
threshold = 0.00001
for i in range(len(community_mod_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_mod_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_mod_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_mod_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_mod_list[i])),'%')
  print()

Community:  0
China:  23 %
USA:  72 %
China&USA:  5 %

Community:  1
China:  30 %
USA:  65 %
China&USA:  5 %

Community:  2
China:  50 %
USA:  44 %
China&USA:  6 %

Community:  3
China:  50 %
USA:  50 %
China&USA:  0 %

Community:  4
China:  25 %
USA:  75 %
China&USA:  0 %

Community:  5
China:  0 %
USA:  100 %
China&USA:  0 %



**Metrics**

In [130]:
_ = compute_metrics(G, community_mod_list)

Coverage:  0.5409153952843273
Performance:  0.691582664826812
Modularity:  0.22681624599609804


### Louvain Community Detection
Find the best partition of a graph using the Louvain Community Detection Algorithm.

In [131]:
# communities_louvain = community.louvain.louvain_communities(G)
# partitions_louvain = community.louvain.louvain_partitions(G)

community_louv = community_louvain.best_partition(G, weight='weight')

# modularity_louvain = community.modularity(communities_louvain, G) # ERROR: not a partition
# print("The modularity Q based on networkx is {}".format(modularity_louvain))

In [132]:
max = np.max(list(community_louv.values()))
community_louvain_list = []
for i in range(max+1):
  community_louvain_list.append([])
for key in list(community_louv.keys()):
  community_louvain_list[community_louv[key]].append(key)
for i in range(max+1):
  print(community_louvain_list[i])

['daily', 'case', 'coronavirus', 'infection', 'high', 'since', 'number', 'new', 'hit', 'record', 'italy', 'week', 'report', 'health', 'time', 'increase', 'death', 'month', 'country', 'rise', 'france', 'see', 'expert', 'surpass', 'million', 'toll', 'confirm', 'one', 'resurgence', 'accord', 'ministry', 'register', 'national', 'student', 'university', 'local', 'among', 'across', 'pass', 'today', 'second', 'wave', 'surge', 'close', 'government', 'bring', 'total', 'tally', 'india', 'brazil', 'winter', 'could', 'milestone', 'outbreak', 'spread', 'measure', 'past', 'hour', 'city', 'far', 'day', 'state', 'set', 'york', 'qingdao', 'last', 'united', 'fall', 'germany', 'start', 'spike', 'two', 'authority', 'six', 'school', 'region', 'minister', 'restriction', 'seven', 'nine', 'nationwide', 'three', 'decline', 'nearly', 'bar', 'rate', 'australia', 'low', 'impose', 'level', 'ease', 'another', 'warn', 'half', 'child', 'curb', 'force', 'almost', 'research', 'trend', 'slow', 'federal', 'task', 'prime'

In [133]:
threshold = 0.00001
for i in range(len(community_louvain_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_louvain_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_louvain_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_louvain_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_louvain_list[i])),'%')
  print()

Community:  0
China:  52 %
USA:  42 %
China&USA:  6 %

Community:  1
China:  28 %
USA:  68 %
China&USA:  4 %

Community:  2
China:  30 %
USA:  63 %
China&USA:  6 %



**Metrics**

In [134]:
_ = compute_metrics(G, community_louvain_list)

Coverage:  0.5402219140083218
Performance:  0.647441744431711
Modularity:  0.22715740893142422


## MarchApril2021

In [100]:
period = '_MarchApril2021'
China = pd.read_csv('/content/China'+period+'.csv')
USA = pd.read_csv('/content/USA'+period+'.csv')

edges = pd.read_csv('/content/edgelist_China_USA'+period+'.csv')
edges_China = pd.read_csv('/content/edgelist_China'+period+'.csv')
edges_USA = pd.read_csv('/content/edgelist_USA'+period+'.csv')

text_China = extract_text(China)
text_USA = extract_text(USA)

freq_dict_China = frequency_dictionary(text_China)
freq_dict_China = dict(sorted(freq_dict_China.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

freq_dict_USA = frequency_dictionary(text_USA)
freq_dict_USA = dict(sorted(freq_dict_USA.items(), key=lambda item: item[1], reverse = True))   #order from more frequent to less frequent word

net, net_df = network_from_edges(edges)
net_China, net_df_China = network_from_edges(edges_China)
net_USA, net_df_USA = network_from_edges(edges_USA)

G = get_graph(net)
G_China = get_graph(net_China)
G_USA = get_graph(net_USA)

In [101]:
print('China&USA:')
print('Nodes: ',len(G.nodes()))
print('Edges: ',len(G.edges()))
print('Is connected: ',nx.is_connected(G))
print()
print('China:')
print('Nodes: ',len(G_China.nodes()))
print('Edges: ',len(G_China.edges()))
print('Is connected: ',nx.is_connected(G_China))
print()
print('USA:')
print('Nodes: ',len(G_USA.nodes()))
print('Edges: ',len(G_USA.edges()))
print('Is connected: ',nx.is_connected(G_USA))
print()

China&USA:
Nodes:  500
Edges:  3018
Is connected:  True

China:
Nodes:  206
Edges:  638
Is connected:  True

USA:
Nodes:  491
Edges:  2285
Is connected:  True



### Bipartition
Partition a graph into two blocks using the Kernighan–Lin algorithm.

In [102]:
community_bipart = community.kernighan_lin_bisection(G,weight='weight')

In [103]:
print(community_bipart[0])
print(community_bipart[1])

{'wait', 'add', 'double', 'passport', 'mask', 'plant', 'today', 'inoculate', 'strong', 'coverage', 'fourth', 'result', 'launch', 'news', 'service', 'quarter', 'rule', 'average', 'family', 'share', 'overseas', 'level', 'whether', 'keep', 'pass', 'fall', 'human', 'authority', 'experience', 'game', 'hard', 'person', 'also', 'exclusive', 'forecast', 'gathering', 'japan', 'early', 'detect', 'ship', 'five', 'within', 'facebook', 'area', 'cite', 'recover', 'require', 'write', 'almost', 'economy', 'place', 'step', 'immunization', 'move', 'medical', 'investigate', 'decline', 'talk', 'mass', 'governor', 'ever', 'consider', 'big', 'decide', 'drop', 'good', 'similar', 'breaking', 'pay', 'system', 'full', 'large', 'hope', 'tokyo', 'suffer', 'deal', 'growth', 'hold', 'yet', 'home', 'military', 'like', 'threaten', 'extra', 'border', 'protection', 'executive', 'well', 'head', 'release', 'great', 'business', 'among', 'major', 'scientist', 'former', 'boost', 'issue', 'thailand', 'international', 'visit'

In [104]:
threshold = 0.00001
sum_China_0, sum_USA_0, sum_China_USA_0 = community_check(community_bipart[0], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
sum_China_1, sum_USA_1, sum_China_USA_1 = community_check(community_bipart[1], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)

In [105]:
print('First community:')
print('China: ', round(100 * sum_China_0 / len(community_bipart[0])),'%')
print('USA: ', round(100 * sum_USA_0 / len(community_bipart[0])),'%')
print('China&USA: ', round(100 * sum_China_USA_0 / len(community_bipart[0])),'%')
print()
print('China: ', round(100 * sum_China_1 / len(community_bipart[1])),'%')
print('USA: ', round(100 * sum_USA_1 / len(community_bipart[1])),'%')
print('China&USA: ', round(100 * sum_China_USA_1 / len(community_bipart[1])),'%')

First community:
China:  29 %
USA:  64 %
China&USA:  6 %

China:  40 %
USA:  58 %
China&USA:  2 %


**Metrics**

In [118]:
_ = compute_metrics(G, community_bipart)

Coverage:  0.7823061630218688
Performance:  0.5146613226452906
Modularity:  -0.00029443991038140185


### Modularity-based communities
Find communities in G using greedy modularity maximization.

In [106]:
community_mod = community.greedy_modularity_communities(G, n_communities=2, weight='weight')
# communities_naive_mod = community.naive_greedy_modularity_communities(G)

In [107]:
community_mod_list = []
for comm in community_mod:
 print(list(comm))
 community_mod_list.append(list(comm))

['day', 'france', 'add', 'global', 'double', 'surpass', 'year', 'hospital', 'today', 'spike', 'intensive', 'strong', 'coverage', 'fourth', 'brazil', 'care', 'news', 'coronavirus', 'service', 'quarter', 'rule', 'average', 'family', 'share', 'overseas', 'pass', 'new', 'human', 'experience', 'game', 'hard', 'week', 'surge', 'month', 'forecast', 'gathering', 'early', 'japan', 'detect', 'time', 'five', 'area', 'cite', 'recover', 'toll', 'write', 'study', 'almost', 'economy', 'state', 'third', 'place', 'step', 'yunnan', 'move', 'decline', 'two', 'update', 'patient', 'ever', 'big', 'research', 'drop', 'system', 'pay', 'total', 'next', 'restriction', 'suffer', 'tokyo', 'analysis', 'help', 'local', 'last', 'hold', 'growth', 'yet', 'work', 'home', 'province', 'border', 'general', 'head', 'data', 'release', 'set', 'high', 'great', 'business', 'major', 'rise', 'low', 'visit', 'likely', 'international', 'french', 'seek', 'america', 'california', 'story', 'four', 'travel', 'block', 'another', 'bad',

In [108]:
threshold = 0.00001
for i in range(len(community_mod_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_mod_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_mod_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_mod_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_mod_list[i])),'%')
  print()

Community:  0
China:  39 %
USA:  54 %
China&USA:  6 %

Community:  1
China:  40 %
USA:  57 %
China&USA:  3 %

Community:  2
China:  17 %
USA:  81 %
China&USA:  1 %

Community:  3
China:  40 %
USA:  60 %
China&USA:  0 %

Community:  4
China:  12 %
USA:  82 %
China&USA:  6 %

Community:  5
China:  15 %
USA:  77 %
China&USA:  8 %

Community:  6
China:  55 %
USA:  45 %
China&USA:  0 %

Community:  7
China:  43 %
USA:  57 %
China&USA:  0 %

Community:  8
China:  0 %
USA:  100 %
China&USA:  0 %



**Metrics**

In [117]:
_ = compute_metrics(G, community_mod_list)

Coverage:  0.5089463220675944
Performance:  0.7108216432865732
Modularity:  0.17671835270017283


### Louvain Community Detection
Find the best partition of a graph using the Louvain Community Detection Algorithm.

In [109]:
# communities_louvain = community.louvain.louvain_communities(G)
# partitions_louvain = community.louvain.louvain_partitions(G)

community_louv = community_louvain.best_partition(G,weight='weight')

# modularity_louvain = community.modularity(communities_louvain, G) # ERROR: not a partition
# print("The modularity Q based on networkx is {}".format(modularity_louvain))

In [110]:
max = np.max(list(community_louv.values()))
community_louvain_list = []
for i in range(max+1):
  community_louvain_list.append([])
for key in list(community_louv.keys()):
  community_louvain_list[community_louv[key]].append(key)
for i in range(max+1):
  print(community_louvain_list[i])

['covid', 'low', 'pandemic', 'pass', 'bring', 'see', 'response', 'america', 'lose', 'child', 'close', 'like', 'claim', 'lift', 'restriction', 'around', 'business', 'head', 'life', 'team', 'care', 'france', 'reach', 'die', 'full', 'talk', 'school', 'follow', 'increase', 'join', 'rate', 'leader', 'march', 'return', 'gathering', 'warn', 'leave', 'tokyo', 'question', 'asian', 'community', 'fight', 'impact', 'place', 'test', 'positive', 'long', 'patient', 'seek', 'capital', 'germany', 'several', 'thousand', 'part', 'impose', 'spread', 'despite', 'pressure', 'ahead', 'quarter', 'border', 'news', 'general', 'three', 'reopen', 'face', 'recent', 'area', 'help', 'facebook', 'open', 'california', 'late', 'work', 'event', 'hold', 'special', 'hard', 'antibody', 'boost', 'family', 'visit', 'ease', 'thailand', 'allow', 'game', 'without', 'crowd', 'much', 'share', 'major', 'economy', 'big', 'hospital', 'recover', 'institute', 'push', 'agree', 'double', 'detect', 'social', 'medical', 'strong', 'look', 

In [111]:
threshold = 0.00001
for i in range(len(community_louvain_list)):
  sum_China, sum_USA, sum_China_USA = community_check(community_louvain_list[i], freq_dict_China, freq_dict_USA, G_China, G_USA, threshold)
  print('Community: ', i)
  print('China: ', round(100 * sum_China / len(community_louvain_list[i])),'%')
  print('USA: ', round(100 * sum_USA / len(community_louvain_list[i])),'%')
  print('China&USA: ', round(100 * sum_China_USA / len(community_louvain_list[i])),'%')
  print()

Community:  0
China:  30 %
USA:  62 %
China&USA:  7 %

Community:  1
China:  38 %
USA:  59 %
China&USA:  3 %

Community:  2
China:  50 %
USA:  48 %
China&USA:  2 %

Community:  3
China:  60 %
USA:  40 %
China&USA:  0 %

Community:  4
China:  58 %
USA:  42 %
China&USA:  0 %

Community:  5
China:  28 %
USA:  72 %
China&USA:  0 %

Community:  6
China:  17 %
USA:  80 %
China&USA:  3 %



**Metrics**

In [116]:
_ = compute_metrics(G, community_louvain_list)

Coverage:  0.4463220675944334
Performance:  0.7174909819639279
Modularity:  0.1789428682732272
