In [None]:
import numpy as np
import networkx as nx
import pickle
import community
from operator import itemgetter
from scipy import integrate
from matplotlib import pyplot as plt
%matplotlib inline
import os

## [abo | gun | blm]
campaign = 'gun'
## [followers | friends]
connection_type = 'followers'
## [2018 | 2020]
year = 2018

def set_node_community(G, communities):
    '''Add community to node attributes'''
    for node in communities:        
        # Add 1 to save 0 for external edges
        G.nodes[node]['community'] = communities[node] + 1

def set_edge_community(G):
    '''Find internal edges and add their community to their attributes'''
    for v, w, in G.edges:
        if G.nodes[v]['community'] == G.nodes[w]['community']:
            # Internal edge, mark with community
            G.edges[v, w]['community'] = G.nodes[v]['community']
        else:
            # External edge, mark as 0
            G.edges[v, w]['community'] = 0

def get_color(i, r_off=1, g_off=1, b_off=1):
    '''Assign a color to a vertex.'''
    r0, g0, b0 = 0, 0, 0
    n = 16
    low, high = 0.1, 0.9
    span = high - low
    r = low + span * (((i + r_off) * 3) % n) / (n - 1)
    g = low + span * (((i + g_off) * 5) % n) / (n - 1)
    b = low + span * (((i + b_off) * 7) % n) / (n - 1)
    return (r, g, b)


In [None]:
## read the connection list
connection_list = pickle.load(open("data/social_media/{}/ea_{}_{}.pkl".format(campaign, connection_type, str(year)), 'rb'))
print("{} list read!".format(connection_type))

## read the connection graph
G = None
filtered_graph_file_path = 'data/social_media/{}/graph_edges/ea_filtered_graph_edge_list_{}.gpickle'.format(campaign, str(year))
if os.path.exists(filtered_graph_file_path):
    G = nx.read_gpickle(filtered_graph_file_path)
    print('Filtered G read!')
else:
    print("Please do perform disparity filtering first!")

print("# of nodes in G:", G.number_of_nodes())
print("# of edges in G:", G.number_of_edges())

## Add the nodes not having any common followers to the graph.
diff_users = list(set(connection_list.keys()).difference(set(G.nodes)))
print("# of diff users:", len(diff_users))
for user in diff_users:
    G.add_node(user)

print("# of nodes in G after addition:", G.number_of_nodes())
print("# of edges in G after addition:", G.number_of_edges())


In [None]:
## detect communities. Check if it is already detected first.
resolution_param = 0.8
community_results = None
community_file = 'data/social_media/{}/ea_communities_{}_res_{}.pkl'.format(campaign, str(year), resolution_param)
if os.path.exists(community_file):
    community_results = pickle.load(open(community_file, 'rb'))
    communities = community_results['original_com_memberships']
    communities_new = community_results['assigned_com_memberships']
    pos = community_results['layout_pos']
    print('Community file read!')
else:
    print('Community detection started with resolution {} ...'.format(resolution_param))
    communities = community.community_louvain.best_partition(G2, resolution=resolution_param)
    clusters = {}
    for elem in communities:
        if communities[elem] not in clusters:
            clusters[communities[elem]] = [elem]
        else:
            clusters[communities[elem]].append(elem)

    ## sort clusters by number of nodes
    clsuters_by_num_nodes = {}
    for cluster in clusters:
        clsuters_by_num_nodes[cluster] = len(clusters[cluster])
    sorted_clsuters_by_num_nodes = sorted(clsuters_by_num_nodes.items(), key=itemgetter(1), reverse=True)
    print(sorted_clsuters_by_num_nodes)
    
    communities_new = {}
    #comms_to_be_kept = [0, 1, 2, 3, 4]
    comms_to_be_kept = [item[0] for item in sorted_clsuters_by_num_nodes if item[1] > 200]
    comms_to_be_kept.sort()
    for node in communities:
        if communities[node] in comms_to_be_kept:
            communities_new[node] = comms_to_be_kept.index(communities[node])
        else:
            communities_new[node] = len(comms_to_be_kept)
    
    print(comms_to_be_kept)
    #print(communities_new)
    print('Communities detected!')
    

In [None]:
## Set node and edge communities, and set community node colors and set layout if not already set.
set_node_community(G2, communities_new)
set_edge_community(G2)

node_color = [get_color(G2.nodes[v]['community']) for v in G2.nodes]

# Set community color for edges between members of the same community (internal) and intra-community edges (external)
external = [(v, w) for v, w in G2.edges if G2.edges[v, w]['community'] == 0]
internal = [(v, w) for v, w in G2.edges if G2.edges[v, w]['community'] > 0]
internal_color = ['black' for e in internal]
print('Node and edge communities set!')

if community_results == None:
    # Set the positions of the nodes based on the specific layout.
    pos = nx.spring_layout(G2)
    print('Postions are set for G2!')
    # Save the detected community results
    result = {}
    result['original_com_memberships'] = communities
    result['assigned_com_memberships'] = communities_new
    result['layout_pos'] = pos
    pickle.dump(result, open(community_file, 'wb'))
    print('Community results saved!')


In [None]:
# visualize the communities
colors = ['orange', 'blue', 'red', 'green', 'yellow', 'grey']
#colors = ['blue', 'orange', 'red', 'green', 'purple', 'yellow', 'cyan', 'magenta', 'grey']
#print([G2.nodes[v]['community'] for v in G2.nodes])
node_color_2 = [colors[G2.nodes[v]['community']-1] for v in G2.nodes]

plt.rcParams.update({'figure.figsize': (15, 10)})
'''
nx.draw_networkx(
        G2,
        pos=pos,
        node_size=0,
        edgelist=external,
        edge_color="silver",
        with_labels=False,
        alpha=0.4)

nx.draw_networkx(
        G2,
        pos=pos,
        node_size=20,
        with_labels=False,
        alpha=0.4,
        node_color=node_color_2,
        edgelist=internal,
        edge_color=internal_color)
'''
nx.draw_networkx_nodes(
        G2,
        pos=pos,
        node_size=20,
        with_labels=False,
        alpha=0.4,
        node_color=node_color_2)

print(G2.number_of_nodes())
print(G2.number_of_edges())
