In [None]:
import networkx as nx
import pickle
from random import choices, sample
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# First read the bot and banned IDs

with open('relevant_bots.p', 'rb') as handle:
    bots = pickle.load(handle)

with open('relevant_bans.p', 'rb') as handle:
    bans = pickle.load(handle)

In [None]:
botlist = bots | bans

In [None]:
botlist = set([str(el) for el in botlist])

In [None]:
# Load the retweet network and the partitions

In [None]:
G = nx.read_gml("final_test.gml")
print(nx.info(G))

In [None]:
left_partition_users, right_partition_users = [], []

In [None]:
with open("com1.txt") as f1:
    lines = f1.readlines()

for line in lines:
    line = line.strip()
    left_partition_users.append(line)

In [None]:
with open("com2.txt") as f2:
    lines = f2.readlines()

for line in lines:
    line = line.strip()
    right_partition_users.append(line)

In [None]:
# Load the edge-betweenness values
with open('ebdict.pickle', 'rb') as handle:
    dict_edgebetweenness = pickle.load(handle)

In [None]:
def BBC_score(graph, dict_edges, partition1, partition2):
    
    # Graph partition
    #c = list(greedy_modularity_communities(graph))
    #left_partition_users = list(c[0])
    #right_partition_users = list(c[1])
    
    left_partition_users = partition1
    right_partition_users = partition2
    
    # Getting the edges in the cut
    eb_list = []
    
    for i in range(len(left_partition_users)):
        name1 = left_partition_users[i]
    
        for j in range(len(right_partition_users)):
            name2 = right_partition_users[j]
        
            if (graph.has_edge(name1, name2)):

                    if ((name1, name2) in dict_edges):
                        edge_betweenness = dict_edges[(name1, name2)]
                        eb_list.append(edge_betweenness)

                    else:
                        edge_betweenness = dict_edges[(name2, name1)]
                        eb_list.append(edge_betweenness)
                    
    
    #print("Length of cut: ", len(eb_list))
    #print("Length of cut/num edges", len(eb_list)*1.0/len(graph.edges))
    
    # Let us sample from the distributions
    #print(eb_list)
    cut_dist = choices(eb_list, k=10000)
    all_dist = choices(list(dict_edges.values()), k=10000)
    
    kl_divergence = stats.entropy(all_dist, cut_dist)
    
    BCC = 1-2.71828**(-kl_divergence)
    
    return BCC

In [None]:
BBC_original_graph = []

for _ in range(1000):
    
    BBC_original_graph.append(BBC_score(G, dict_edgebetweenness, left_partition_users, right_partition_users))

In [None]:
plt.hist(BBC_original_graph, label = "Original network")
plt.legend()

### 1. Measuring bot-polarization

In [None]:
ht = "ilmastonmuutos"

In [None]:
G = nx.read_gml(ht + "/" + ht +"_retweet_network_giant.gml")
print(nx.info(G))

In [None]:
print("The number of bots present in our converstation graph: ", len(botlist & set(G.nodes)))

In [None]:
bots_in_graph = botlist & set(G.nodes)

Random node removal vs. removing the bots from the network

In [None]:
G_rn = G.copy() # graph from which we remove random nodes 
G_bn = G.copy() # graph from which we remove bot nodes

In [None]:
# CONSTANTS
N_sample = 500
i = 0

In [None]:
RN, BN = [], []

In [None]:
for _ in range(10):
    
    G_rn = G.copy() # graph from which we remove random nodes 
    G_bn = G.copy() # graph from which we remove bot nodes
    
    to_be_removed_RN = set(sample(list(G.nodes), N_sample))
    to_be_removed_BN = set(sample(list(bots_in_graph), N_sample))
    
    G_rn.remove_nodes_from(to_be_removed_RN)
    G_bn.remove_nodes_from(to_be_removed_BN)
    
    dict_edgebetweenness_rn = nx.edge_betweenness_centrality(G_rn)
    dict_edgebetweenness_bn = nx.edge_betweenness_centrality(G_bn)
    
    community1_users_rn = [user for user in community1_users if user not in to_be_removed_RN]
    community2_users_rn = [user for user in community2_users if user not in to_be_removed_RN]

    community1_users_bn = [user for user in community1_users if user not in to_be_removed_BN]
    community2_users_bn = [user for user in community2_users if user not in to_be_removed_BN]
    
    for _ in range(80):
        
        RN.append(BBC_score(G_rn, dict_edgebetweenness_rn, community1_users_rn, community2_users_rn))
        BN.append(BBC_score(G_bn, dict_edgebetweenness_bn, community1_users_bn, community2_users_bn))
    
    i += 1
    
    print("Iteration number: ", i)

In [None]:
fig, ax = plt.subplots(1,1)

ax.hist(RN, label = "RN removed")
ax.hist(BN, label = "BN removed", alpha=0.8)
ax.legend()

fig.savefig(ht+"_bcc.png", dpi=200)

In [None]:
print("Mean: ", np.mean(RN))
print("Standard deviation: ", np.std(RN))

print("Mean: ", np.mean(BN))
print("Standard deviation: ", np.std(BN))

### 2. Computing the bot proportions and testing significance

H0: p1 = p2, where p1 is the proportion from the first population, and p2 is the proportion from the second population.

Let the significance level be 0.05

In [None]:
def compute_p_value(p_anti, p_pro, n_anti, n_pro, n_total_bots):
    p_diff = p_anti-p_pro
    p_all = n_total_bots/(n_anti + n_pro)
    inner = p_all*(1-p_all)*((1/n_anti) + (1/n_pro))
    standard_error = np.sqrt(inner)
    t_statistic = p_diff/standard_error
    #print(t_statistic)
    p_value = stats.norm.sf(abs(t_statistic))
    
    return p_value

In [None]:
ht = "sote"

In [None]:
community1_users, community2_users = [], []

with open(ht + "/" + ht + "_community1.txt") as f1:
    lines = f1.readlines()

for line in lines:
    line = line.strip()
    community1_users.append(line)
    
with open(ht + "/" + ht + "_community2.txt") as f2:
    lines = f2.readlines()

for line in lines:
    line = line.strip()
    community2_users.append(line)

In [None]:
anti_one = False  

In [None]:
if anti_one:
    anti_bubble_size = len(community1_users)
    pro_bubble_size = len(community2_users)
    anti_bubble = set(community1_users)
    pro_bubble = set(community2_users)
else:
    anti_bubble_size = len(community2_users)
    pro_bubble_size = len(community1_users)
    anti_bubble = set(community2_users)
    pro_bubble = set(community1_users)

In [None]:
anti_bot_proportion = (len(anti_bubble & botlist))/(anti_bubble_size)
pro_bot_proportion = (len(pro_bubble & botlist))/(pro_bubble_size)
n_total_bots = len(pro_bubble & botlist) + len(anti_bubble & botlist)

In [None]:
print("%-ANTIBOT: ", round(anti_bot_proportion, 4)*100)
print("%-PROBOT: ", round(pro_bot_proportion, 4)*100)

print("P-arvo: ", compute_p_value(anti_bot_proportion, 
                pro_bot_proportion,
                anti_bubble_size,
                pro_bubble_size,
                n_total_bots))