In [None]:
import numpy as np
import networkx as nx
import pickle
import community
from operator import itemgetter
from scipy import integrate
from matplotlib import pyplot as plt
%matplotlib inline
import os

## [blm | gun | abo | cli | img]
campaign = 'gun'
ea_type = 'ea_20'
## [followers | friends]
connection_type = 'followers'
## [2017 | 2018 | 2020]
year = 2018
## [sam, pmi]
method_type = "sam"
## filtering type = [percentile | disparity]
filter_type = 'disparity'


def set_node_community(G, communities):
    '''Add community to node attributes'''
    for node in communities:        
        # Add 1 to save 0 for external edges
        G.nodes[node]['community'] = communities[node] + 1

def set_edge_community(G):
    '''Find internal edges and add their community to their attributes'''
    for v, w, in G.edges:
        if G.nodes[v]['community'] == G.nodes[w]['community']:
            # Internal edge, mark with community
            G.edges[v, w]['community'] = G.nodes[v]['community']
        else:
            # External edge, mark as 0
            G.edges[v, w]['community'] = 0

def get_color(i, r_off=1, g_off=1, b_off=1):
    '''Assign a color to a vertex.'''
    r0, g0, b0 = 0, 0, 0
    n = 16
    low, high = 0.1, 0.9
    span = high - low
    r = low + span * (((i + r_off) * 3) % n) / (n - 1)
    g = low + span * (((i + g_off) * 5) % n) / (n - 1)
    b = low + span * (((i + b_off) * 7) % n) / (n - 1)
    return (r, g, b)

''' 
M. A. Serrano et al. (2009) Extracting the Multiscale backbone of complex weighted networks. PNAS, 106:16, pp. 6483-6488.
'''
def disparity_filter(G, weight='weight'):
    if nx.is_directed(G): #directed case    
        N = nx.DiGraph()
        for u in G:
            
            k_out = G.out_degree(u)
            k_in = G.in_degree(u)
            
            if k_out > 1:
                sum_w_out = sum(np.absolute(G[u][v][weight]) for v in G.successors(u))
                for v in G.successors(u):
                    w = G[u][v][weight]
                    p_ij_out = float(np.absolute(w))/sum_w_out
                    alpha_ij_out = 1 - (k_out-1) * integrate.quad(lambda x: (1-x)**(k_out-2), 0, p_ij_out)[0]
                    N.add_edge(u, v, weight = w, alpha_out=float('%.4f' % alpha_ij_out))
                    
            elif k_out == 1 and G.in_degree(G.successors(u)[0]) == 1:
                #we need to keep the connection as it is the only way to maintain the connectivity of the network
                v = G.successors(u)[0]
                w = G[u][v][weight]
                N.add_edge(u, v, weight = w, alpha_out=0., alpha_in=0.)
                #there is no need to do the same for the k_in, since the link is built already from the tail
            
            if k_in > 1:
                sum_w_in = sum(np.absolute(G[v][u][weight]) for v in G.predecessors(u))
                for v in G.predecessors(u):
                    w = G[v][u][weight]
                    p_ij_in = float(np.absolute(w))/sum_w_in
                    alpha_ij_in = 1 - (k_in-1) * integrate.quad(lambda x: (1-x)**(k_in-2), 0, p_ij_in)[0]
                    N.add_edge(v, u, weight = w, alpha_in=float('%.4f' % alpha_ij_in))
        return N
    
    else: #undirected case
        B = nx.Graph()
        for u in G:
            k = len(G[u])
            if k > 1:
                sum_w = sum(np.absolute(G[u][v][weight]) for v in G[u])
                for v in G[u]:
                    w = G[u][v][weight]
                    p_ij = float(np.absolute(w))/sum_w
                    alpha_ij = 1 - (k-1) * integrate.quad(lambda x: (1-x)**(k-2), 0, p_ij)[0]
                    B.add_edge(u, v, weight = w, alpha=float('%.4f' % alpha_ij))
        return B

''' 
M. A. Serrano et al. (2009) Extracting the Multiscale backbone of complex weighted networks. PNAS, 106:16, pp. 6483-6488.
'''
def disparity_filter_alpha_cut(G,weight='weight',alpha_t=0.4, cut_mode='or'):    
    if nx.is_directed(G):#Directed case:   
        B = nx.DiGraph()
        for u, v, w in G.edges(data=True):
            try:
                alpha_in =  w['alpha_in']
            except KeyError: #there is no alpha_in, so we assign 1. It will never pass the cut
                alpha_in = 1
            try:
                alpha_out =  w['alpha_out']
            except KeyError: #there is no alpha_out, so we assign 1. It will never pass the cut
                alpha_out = 1  
            
            if cut_mode == 'or':
                if alpha_in<alpha_t or alpha_out<alpha_t:
                    B.add_edge(u,v, weight=w[weight])
            elif cut_mode == 'and':
                if alpha_in<alpha_t and alpha_out<alpha_t:
                    B.add_edge(u,v, weight=w[weight])
        return B

    else:
        B = nx.Graph()#Undirected case:   
        for u, v, w in G.edges(data=True):
            
            try:
                alpha = w['alpha']
            except KeyError: #there is no alpha, so we assign 1. It will never pass the cut
                alpha = 1
                
            if alpha<alpha_t:
                B.add_edge(u,v, weight=w[weight])
        return B                




In [None]:
## read the connection list
connection_list = pickle.load(open("twitter_data_collection/data/{}_{}/{}_{}.pkl".format(campaign, 
                                                                                         ea_type, 
                                                                                         connection_type, 
                                                                                         str(year)), 'rb'))
print("{} list read!".format(connection_type))

## read the connection graph
G = None
G2 = None
already_filtered = False
filtered_graph_file_path = 'results/{}_{}/{}/{}/filtered_graph_edge_list_{}.gpickle'.format(campaign, 
                                                                                            ea_type, 
                                                                                            method_type, 
                                                                                            filter_type, 
                                                                                            str(year))
if os.path.exists(filtered_graph_file_path):
    G = nx.read_gpickle(filtered_graph_file_path)
    already_filtered = True
    print('Filtered G read!')
else:
    complete_graph_file_path = 'twitter_data_collection/data/{}_{}/graph_edges/{}/{}_{}.txt'.format(campaign, 
                                                                                                    ea_type, 
                                                                                                    method_type, 
                                                                                                    connection_type, 
                                                                                                    str(year))
    G = nx.read_weighted_edgelist(complete_graph_file_path)
    print("Initial graph G read!")

print("# of nodes in G:", G.number_of_nodes())
print("# of edges in G:", G.number_of_edges())

## Add the nodes not having any common followers to the graph.
diff_users = list(set(connection_list.keys()).difference(set(G.nodes)))
print("# of diff users:", len(diff_users))
for user in diff_users:
    G.add_node(user)

print("# of nodes in G after addition:", G.number_of_nodes())
print("# of edges in G after addition:", G.number_of_edges())

## Apply filtering to G if not not already filtered
if already_filtered == False:
    if filter_type == 'percentile':
        all_edge_weights = [item[2]['weight'] for item in list(G.edges.data())]
        threshold = np.percentile(all_edge_weights, 80)
        edges_to_be_removed = [(item[0], item[1]) for item in list(G.edges.data()) if item[2]['weight'] <= threshold]
        print("all_edge_weights:", len(all_edge_weights))
        print("edges_to_be_removed:", len(edges_to_be_removed))

        ## Remove those edges from the graph
        for e in edges_to_be_removed:
            G.remove_edge(*e)
        G2 = G
        print(len(edges_to_be_removed), ' removed from G!')

    elif filter_type == 'disparity':
        alpha = 0.05
        G = disparity_filter(G)
        G2 = nx.Graph([(u, v, d) for u, v, d in G.edges(data=True) if d['alpha'] < alpha and d['weight'] > 0])
        #G2 = nx.Graph([(u, v, d) for u, v, d in G.edges(data=True) if d['alpha'] <= alpha and d['weight'] > 0])
    
    print("# of nodes in G2 after filtering:", G2.number_of_nodes())
    print("# of edges in G2 after filtering:", G2.number_of_edges())
    
    ## Add the nodes not having common followers to the graph.
    diff_users = list(set(connection_list.keys()).difference(set(G2.nodes)))
    print("# of diff users:", len(diff_users))
    for user in diff_users:
        G2.add_node(user)

    print("# of nodes in G2 after addition:", G2.number_of_nodes())
    print("# of edges in G2 after addition:", G2.number_of_edges())
    
    ## save the filtered network.
    nx.write_gpickle(G2, filtered_graph_file_path)
    
else:
    G2 = G



In [None]:
## detect communities. Check if it is already detected first.
resolution_param = 1
community_results = None
community_file = 'results/{}_{}/{}/{}/communities_{}_res_{}.pkl'.format(campaign, 
                                                                        ea_type, 
                                                                        method_type, 
                                                                        filter_type, 
                                                                        str(year), 
                                                                        resolution_param)
if os.path.exists(community_file):
    community_results = pickle.load(open(community_file, 'rb'))
    communities = community_results['original_com_memberships']
    communities_new = community_results['assigned_com_memberships']
    pos = community_results['layout_pos']
    print('Community file read!')
else:
    communities = community.community_louvain.best_partition(G2, resolution=resolution_param)
    clusters = {}
    for elem in communities:
        if communities[elem] not in clusters:
            clusters[communities[elem]] = [elem]
        else:
            clusters[communities[elem]].append(elem)

    ## sort clusters by number of nodes
    clsuters_by_num_nodes = {}
    for cluster in clusters:
        clsuters_by_num_nodes[cluster] = len(clusters[cluster])
    sorted_clsuters_by_num_nodes = sorted(clsuters_by_num_nodes.items(), key=itemgetter(1), reverse=True)
    print(sorted_clsuters_by_num_nodes)
    
    communities_new = {}
    #comms_to_be_kept = [0, 1, 2, 3, 4]
    comms_to_be_kept = [item[0] for item in sorted_clsuters_by_num_nodes if item[0] > 200]
    for node in communities:
        if communities[node] in comms_to_be_kept:
            communities_new[node] = comms_to_be_kept.index(communities[node])
        else:
            communities_new[node] = len(comms_to_be_kept)

    print(communities_new)
    print('Communities detected!')
    

In [None]:
## Set node and edge communities, and set community node colors and set layout if not already set.
set_node_community(G2, communities_new)
set_edge_community(G2)

node_color = [get_color(G2.nodes[v]['community']) for v in G2.nodes]

# Set community color for edges between members of the same community (internal) and intra-community edges (external)
external = [(v, w) for v, w in G2.edges if G2.edges[v, w]['community'] == 0]
internal = [(v, w) for v, w in G2.edges if G2.edges[v, w]['community'] > 0]
internal_color = ['black' for e in internal]
print('Node and edge communities set!')

if community_results == None:
    # Set the positions of the nodes based on the specific layout.
    pos = nx.spring_layout(G2)
    print('Postions are set for G2!')
    # Save the detected community results
    result = {}
    result['original_com_memberships'] = communities
    result['assigned_com_memberships'] = communities_new
    result['layout_pos'] = pos
    pickle.dump(result, open(community_file, 'wb'))
    print('Community results saved!')


In [None]:
# visualize the communities
colors = ['orange', 'blue', 'red', 'green', 'yellow', 'grey']
#colors = ['blue', 'orange', 'red', 'grey', 'green', 'yellow']
#print([G2.nodes[v]['community'] for v in G2.nodes])
node_color_2 = [colors[G2.nodes[v]['community']-1] for v in G2.nodes]

plt.rcParams.update({'figure.figsize': (15, 10)})
'''
nx.draw_networkx(
        G2,
        pos=pos,
        node_size=0,
        edgelist=external,
        edge_color="silver",
        with_labels=False,
        alpha=0.4)

nx.draw_networkx(
        G2,
        pos=pos,
        node_size=20,
        with_labels=False,
        alpha=0.4,
        node_color=node_color_2,
        edgelist=internal,
        edge_color=internal_color)
'''
nx.draw_networkx_nodes(
        G2,
        pos=pos,
        node_size=20,
        with_labels=False,
        alpha=0.4,
        node_color=node_color_2)

print(G2.number_of_nodes())
print(G2.number_of_edges())
