In [1]:
import networkx as nx
import numpy as np
import random
import matplotlib.pyplot as plt

In [2]:
def visualize_RWC(scores):
    
    #Binning
    weights = np.ones_like(scores)/float(len(scores))
    
    #Plotting
    fig, ax = plt.subplots(1,1)
    
    ax.hist(scores, weights = weights, bins = 20, color = "coral")
    ax.text(0.5, 1.05, "#" + ht, ha='center', va='center', transform=ax.transAxes)
    ax.text(0.85, 0.8, "μ = " + str(round(np.mean(scores), 2)), ha='center', va='center', transform=ax.transAxes)
    ax.text(0.85, 0.7, "σ = " + str(round(np.std(scores), 2)), ha='center', va='center', transform=ax.transAxes)
    
    #Save the figure
    fig.savefig("rwc_plots/" + ht + ".png", dpi=200)

In [3]:
def performRandomWalk(G, starting_node, li, ri):

    # Bunch of constants
    found = 0
    end_side = 0
    
    which_random_starting_node = starting_node
    
    while (found != 1):
        
        neighbors = list(G.neighbors(which_random_starting_node))
        next_node = random.choice(neighbors)
        
        if (next_node in li):
            end_side = "left"
            found = 1
            
        elif (next_node in ri):
            end_side = "right"
            found = 1
            
        else:
            which_random_starting_node = next_node
        
    return end_side

In [4]:
def simulation():
    
    left_left = 0
    left_right = 0
    right_right = 0
    right_left = 0

    for _ in range(1000):

        starting_side = random.choice(["left", "right"])

        if starting_side == "left":
            which_random_starting_node = random.choice(left_partition_users)

        else:
            which_random_starting_node = random.choice(right_partition_users)

        end_side = performRandomWalk(G, which_random_starting_node, left_influencers, right_influencers)

        if (starting_side == "left") and (end_side ==  "left"):
            left_left += 1

        elif (starting_side == "left") and (end_side ==  "right"):
            left_right += 1

        elif (starting_side == "right") and (end_side ==  "right"):
            right_right += 1

        elif (starting_side == "right") and (end_side ==  "left"):
            right_left += 1

        else:
            print("Oops!")
        
    e1 = (left_left)/(left_left+right_left)
    e2 = (left_right)/(left_right+right_right)
    e3 = (right_left)/(left_left+right_left)
    e4 = (right_right)/(left_right+right_right)
    
    return(e1*e4 - e2*e3)

In [5]:
# The only variable of this script is the hashtag you want to study. Then you can the whole notebook.
ht = "kokoomus"

In [6]:
G = nx.read_gml(ht + "/" + ht +"_retweet_network_giant.gml")
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 3044
Number of edges: 7975
Average degree:   5.2398


In [7]:
left_partition_users, right_partition_users = [], []

In [8]:
with open(ht + "/" + ht + "_community1.txt") as f1:
    lines = f1.readlines()

for line in lines:
    line = line.strip()
    left_partition_users.append(line)

In [9]:
with open(ht + "/" + ht + "_community2.txt") as f2:
    lines = f2.readlines()

for line in lines:
    line = line.strip()
    right_partition_users.append(line)

In [10]:
print("Size of community 1 or left: ", len(left_partition_users))
print("Size of community 2 or right: ", len(right_partition_users))

Size of community 1 or left:  1844
Size of community 2 or right:  1200


In [11]:
dict_degree = {}
for node in G.nodes():
    dict_degree[node] = G.degree(node)

sorted_dict_degree = sorted(dict_degree.items(), key=lambda kv: kv[1], reverse=True)

In [12]:
left_influencers, right_influencers = [], []
k = 8

In [13]:
count_left, count_right = 0, 0

for node in sorted_dict_degree:
    
    if (node[0] in left_partition_users):
        
        if (count_left < k):
            left_influencers.append(node[0])
            count_left += 1
    else:
        
        if (count_right < k):
            right_influencers.append(node[0])
            count_right += 1

In [14]:
rwc_scores = []

for _ in range(1000):
    rwc_scores.append(simulation())
    
print("Polarization score: ", np.mean(rwc_scores))

Polarization score:  0.765224320958409


In [15]:
visualize_RWC(rwc_scores)