In [1]:
import networkx as nx
import math
import numpy as np
import matplotlib as mat
import matplotlib.pyplot as plt 
import random
%matplotlib inline

In [2]:
# plot settings
almost_black = '#262626'
plt.rcParams['text.usetex'] = False
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rcParams['axes.edgecolor'] = almost_black
plt.rcParams['text.color'] = almost_black
plt.rcParams['axes.linewidth'] = 0.5
plt.rcParams['axes.labelsize'] = 12

Implement an information cascade model, in which the probability of node i activating node j is proportional to the weight of the edge from i to j.

In [3]:
def information_cascade(G,t_tot,init):
    
    t = 0
    
    max_weight = max([e[2]['weight'] for e in G.edges(data=True)])
    
    activation_times = {}
    for i in init:
        activation_times[i]=0
    
    while t<t_tot:
    
        curr_infectious = [n for n in activation_times if activation_times[n]==t]

        for n in curr_infectious:
            for m in G.neighbors(n):
                if m not in activation_times.keys():
                    p = G[n][m]['weight']
                    if p>random.uniform(0,1)*max_weight:
                        activation_times[m] = t+1
                        
        t+=1

    return activation_times
    

# Selecting the Initial activation nodes

In [33]:
import pandas as pd

In [34]:
nodes = pd.read_csv("../nodes_with_metadata.csv")

In [35]:
nodes.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality,community,polygon
0,1417929933800751104,,,1,,,,,0,1,1,0.0,Liberal,1
1,1052290795942731777,,,1,,,,,0,1,1,0.0,Liberal,1
2,1247907027650838528,,,1,,,,,0,1,1,0.0,Liberal,1
3,1103476860187369472,,,1,,,,,0,1,1,0.0,Liberal,1
4,998590102573023232,,,1,,,,,0,1,1,0.0,Liberal,1


In [36]:
retweeting_activists_mps = nodes.dropna(subset=["political affiliation"])

In [37]:
retweeting_activists_mps.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality,community,polygon
21,86384661,,,1,Yasir_Naqvi,Yasir,Naqvi,Liberal,132,0,132,0.230067,Liberal,6
25,2344419362,,,1,SalmaZahid15,Salma,Zahid,Liberal,75,3,78,0.169595,Liberal,6
61,170377354,,,1,AnthonyRota,Anthony,Rota,Liberal,5,0,5,0.012606,Liberal,6
79,2942312619,,,1,YRobillardPLC,Yves,Robillard,Liberal,1,0,1,0.0,Liberal,6
120,3242606862,,,1,MMcLeodNWT,Michael,McLeod,Liberal,19,1,20,0.040971,Liberal,6


In [38]:
retweeting_activists_mps.describe()

Unnamed: 0,Id,Label,timeset,communityid,indegree,outdegree,degree,eigencentrality,polygon
count,277.0,0.0,0.0,277.0,277.0,277.0,277.0,277.0,277.0
mean,1.669641e+17,,,2.833935,50.685921,0.584838,51.270758,0.131738,5.393502
std,4.086269e+17,,,3.159046,57.596034,1.214729,57.90618,0.177564,1.207027
min,3358671.0,,,1.0,0.0,0.0,1.0,0.0,3.0
25%,158095800.0,,,1.0,11.0,0.0,11.0,0.012606,6.0
50%,417389800.0,,,2.0,30.0,0.0,31.0,0.05988,6.0
75%,2874773000.0,,,3.0,68.0,1.0,69.0,0.170186,6.0
max,1.427259e+18,,,21.0,378.0,8.0,381.0,1.0,6.0


In [39]:
retweeting_activists_mps = retweeting_activists_mps[retweeting_activists_mps["outdegree"] > 0]

In [40]:
retweeting_activists_mps.head()

Unnamed: 0,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality,community,polygon
25,2344419362,,,1,SalmaZahid15,Salma,Zahid,Liberal,75,3,78,0.169595,Liberal,6
120,3242606862,,,1,MMcLeodNWT,Michael,McLeod,Liberal,19,1,20,0.040971,Liberal,6
152,360677740,,,1,lisahepfner2021,Lisa,Hepfner,Liberal,72,1,73,0.15758,Liberal,6
364,25813888,,,1,,,,Activist Organization,47,1,48,0.034668,Liberal,3
519,2322580746,,,1,jimcarr_wpg,Jim,Carr,Liberal,7,1,8,0.012606,Liberal,6


In [41]:
retweeting_activists_mps.describe()

Unnamed: 0,Id,Label,timeset,communityid,indegree,outdegree,degree,eigencentrality,polygon
count,81.0,0.0,0.0,81.0,81.0,81.0,81.0,81.0,81.0
mean,1.940011e+17,,,1.839506,72.148148,2.0,74.148148,0.228402,5.740741
std,4.506026e+17,,,1.156036,67.549817,1.491643,67.799541,0.219144,0.848201
min,16014400.0,,,1.0,0.0,1.0,1.0,0.0,3.0
25%,234550900.0,,,1.0,22.0,1.0,24.0,0.05988,6.0
50%,488052200.0,,,1.0,54.0,1.0,55.0,0.162127,6.0
75%,2891741000.0,,,3.0,97.0,3.0,102.0,0.330466,6.0
max,1.427259e+18,,,5.0,378.0,8.0,381.0,1.0,6.0


In [42]:
len(retweeting_activists_mps["outdegree"])

81

## Selecting the targets from the edges

In [43]:
edges = pd.read_csv("../Null Model Comparison/largest_component_networkx_format.csv", names=["Source", "Target", "Weight"])

In [44]:
edges.head()

Unnamed: 0,Source,Target,Weight
0,1385933370090209280,158095776,"{""weight"":1}"
1,1416535505454338050,158095776,"{""weight"":8}"
2,1416535505454338050,1604931252,"{""weight"":1}"
3,544695802,158095776,"{""weight"":1}"
4,716257068538327040,158095776,"{""weight"":1}"


In [45]:
relevant_paths = pd.merge(edges, retweeting_activists_mps, left_on='Source', right_on='Id')

In [46]:
relevant_paths.head()

Unnamed: 0,Source,Target,Weight,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality,community,polygon
0,150270263,18681111,"{""weight"":1}",150270263,,,1,,,,Activist Organization,30,1,31,0.037561,Liberal,3
1,29545977,59686058,"{""weight"":1}",29545977,,,2,,,,Activist Organization,7,1,8,0.006303,NDP,3
2,25813888,150270263,"{""weight"":1}",25813888,,,1,,,,Activist Organization,47,1,48,0.034668,Liberal,3
3,577727470,119925381,"{""weight"":1}",577727470,,,2,,,,Activist Organization,45,1,46,0.031516,NDP,3
4,294660973,294660973,"{""weight"":1}",294660973,,,5,,,,Activist Organization,63,1,64,0.014769,LGBTQ2S+,3


In [47]:
activation_nodes = relevant_paths['Target'].unique()

In [48]:
activation_nodes

array([           18681111,            59686058,           150270263,
                 119925381,           294660973,            14079041,
                  17969963,          2715275551,           377588094,
       1342125115383939073,           273262205,            24990450,
                3025416359, 1425866189780160514,          2530008414,
                2800741820,            16014404, 1063494232126689280,
                 408072407,           234550882,          3402128080,
                 803381983,           261772246,           417389780,
       1170770038208565248,          1707636642, 1143229947932229632,
                2254171724,          2555308646,           739149720,
        989311745100566529,           268832287,  883774859452579840,
                  15810950,           129395750,  791282631006621696,
                 256552850, 1086084557009575936,            36133644,
                  34606493,           564207331,           414218319,
        943174774154

In [49]:
len(activation_nodes)

91

In [50]:
df = pd.DataFrame(data=activation_nodes, columns=["id"])


In [51]:
activation_nodes_profile_df = pd.merge(df, nodes, how="inner", left_on="id", right_on="Id")
activation_nodes_profile_df

Unnamed: 0,id,Id,Label,timeset,communityid,twittername,first name,last name,political affiliation,indegree,outdegree,degree,eigencentrality,community,polygon
0,18681111,18681111,,,2,,,,Activist Organization,46,0,46,0.025898,NDP,3
1,59686058,59686058,,,2,,,,Activist Organization,11,0,11,0.021728,NDP,3
2,150270263,150270263,,,1,,,,Activist Organization,30,1,31,0.037561,Liberal,3
3,119925381,119925381,,,5,,,,Activist Organization,108,0,108,0.080274,LGBTQ2S+,3
4,294660973,294660973,,,5,,,,Activist Organization,63,1,64,0.014769,LGBTQ2S+,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,165812196,165812196,,,1,stevenmackinnon,Steven,MacKinnon,Liberal,5,0,5,0.010864,Liberal,6
87,175259033,175259033,,,2,taylorbachrach,Taylor,Bachrach,NDP,60,0,60,0.463605,NDP,6
88,215632349,215632349,,,2,CharlieAngusNDP,Charlie,Angus,NDP,378,3,381,0.657356,NDP,6
89,341866567,341866567,,,2,LoriIdlout,Lori,Idlout,NDP,67,2,69,0.456031,NDP,6


In [53]:
activation_nodes_profile_df.to_csv("activation_nodes.csv")

Run a Groupby so we can Randomly pick a subset of nodes to run our diffusion on from one community 

In [55]:
activation_community_groups = activation_nodes_profile_df.groupby("community")

In [59]:
activation_community_groups.groups

{'Bloc Québécois': [9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 26], 'Conservative': [6, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43], 'LGBTQ2S+': [3, 4], 'Liberal': [2, 18, 19, 23, 24, 25, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86], 'NDP': [0, 1, 5, 7, 8, 87, 88, 89, 90]}

# Preparing Edges

The edges in this network are going to be flipped so we can let information be travel down to those the person was retweeted by

In [21]:
edges.head()

Unnamed: 0,Source,Target,Weight
0,1385933370090209280,158095776,"{""weight"":1}"
1,1416535505454338050,158095776,"{""weight"":8}"
2,1416535505454338050,1604931252,"{""weight"":1}"
3,544695802,158095776,"{""weight"":1}"
4,716257068538327040,158095776,"{""weight"":1}"


In [22]:
edges = edges[['Target', 'Source', 'Weight']]

In [23]:
edges 


Unnamed: 0,Target,Source,Weight
0,158095776,1385933370090209280,"{""weight"":1}"
1,158095776,1416535505454338050,"{""weight"":8}"
2,1604931252,1416535505454338050,"{""weight"":1}"
3,158095776,544695802,"{""weight"":1}"
4,158095776,716257068538327040,"{""weight"":1}"
...,...,...,...
13996,196717787,77596220,"{""weight"":1}"
13997,196717787,891058415195303939,"{""weight"":1}"
13998,196717787,1095768409449459712,"{""weight"":1}"
13999,196717787,881426857,"{""weight"":1}"


In [24]:
edges = edges.set_index("Target")

In [25]:
edges.to_csv("Source_retweeted_by_target.csv")

# Running an information cascade 

In [60]:
G = nx.read_edgelist('Source_retweeted_by_target.csv', comments='#',
                     create_using=nx.DiGraph(), 
                     delimiter=','"", 
                     nodetype=int, 
                     encoding='utf-8')

Running an initial cascade, starting with the nodes where the path lengths are going to be more than 1 
- G: the Graph with the edges reversed to allow for information to flow down to the retweeters
- Time: let this initally be 4 to see what we get with little spread
- activation nodes: These are the nodes where another MP or activist organization has retweeted them

### Liberal Experiment

In [75]:
len(activation_community_groups.get_group("Liberal")['id'].tolist())

49

In [76]:
liberal_starters = random.sample(activation_community_groups.get_group("Liberal")['id'].tolist(), 2)

In [77]:
liberal_starters

[22849568, 2891740872]

^ These are the starting nodes ^

In [82]:
t5 = information_cascade(G,5,liberal_starters)

In [85]:
experiments = []
for i in range(1,1000):
    t5 = information_cascade(G,5,liberal_starters)
    experiment = pd.DataFrame.from_dict(t5, orient='index',
                       columns=[ "activation_time_exp_{}".format(i)])
    experiments.append(experiment)

In [86]:
experiments_df = experiments[0]

for i in range (1, 999):
    experiments_df = pd.merge(experiments[i], experiments_df, left_index=True, right_index=True, how='outer')

In [87]:
experiments_df['average_activation_time'] = experiments_df.mean(axis=1)
experiments_df['average_activation_time'] = experiments_df['average_activation_time'].apply(np.floor)

In [89]:
experiments_df.describe()

Unnamed: 0,activation_time_exp_999,activation_time_exp_998,activation_time_exp_997,activation_time_exp_996,activation_time_exp_995,activation_time_exp_994,activation_time_exp_993,activation_time_exp_992,activation_time_exp_991,activation_time_exp_990,...,activation_time_exp_9,activation_time_exp_8,activation_time_exp_7,activation_time_exp_6,activation_time_exp_5,activation_time_exp_4,activation_time_exp_3,activation_time_exp_2,activation_time_exp_1,average_activation_time
count,12.0,9.0,10.0,11.0,11.0,8.0,9.0,16.0,10.0,7.0,...,11.0,15.0,12.0,14.0,9.0,7.0,11.0,9.0,12.0,460.0
mean,0.833333,0.777778,0.8,0.818182,0.818182,0.875,0.777778,0.9375,0.8,0.714286,...,0.818182,0.866667,0.833333,0.857143,0.777778,0.714286,0.909091,0.777778,0.833333,1.258696
std,0.389249,0.440959,0.421637,0.40452,0.40452,0.64087,0.440959,0.442531,0.421637,0.48795,...,0.40452,0.351866,0.389249,0.363137,0.440959,0.48795,0.53936,0.440959,0.389249,0.467262
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,1.0,1.0,1.0,0.75,1.0,1.0,1.0,0.5,...,1.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0,1.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0
max,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0


This shows that the maxiumum steps needed after a 1000 tries of diffusing the information was 3

In [None]:
t6 = information_cascade(G,6,activation_nodes)

In [None]:
len(t5)

In [None]:
t6

In [None]:
t7 = information_cascade(G,7,activation_nodes)

In [None]:
t7

In [None]:
t8 = information_cascade(G,8,activation_nodes)

In [None]:
t8

In [None]:
len(t8)

In [None]:
len(t6)

In [None]:
activation_times = pd.DataFrame.from_dict(t6, orient='index',
                       columns=[ "activation_time"])

In [None]:
activation_times.to_csv("activation_times.csv")

In [None]:
t8_100 = information_cascade(G,8,activation_nodes)

In [None]:
t8_100


In [None]:
len(t8_100)

In [None]:
activation_times = pd.DataFrame.from_dict(t8, orient='index',
                       columns=[ "activation_time_exp_0"])

activation_times.head()

Run an 8 step information cascade 100 times and place all items into a common dataframe

In [None]:
experiments = []
for i in range(1,1000):
    t8 = information_cascade(G,8,activation_nodes)
    experiment = pd.DataFrame.from_dict(t8, orient='index',
                       columns=[ "activation_time_exp_{}".format(i)])
    experiments.append(experiment)
    
    

In [None]:
experiments


Perform outer joins to form a dataframe of all nodes that recieved information

In [None]:
experiments_df = experiments[0]

for i in range (1, 999):
    experiments_df = pd.merge(experiments[i], experiments_df, left_index=True, right_index=True, how='outer')

In [None]:
experiments_df


In [None]:
experiments_df['average_activation_time'] = experiments_df.mean(axis=1)

In [None]:
experiments_df

Floor it so we have consistent integers

In [None]:
experiments_df['average_activation_time'] = experiments_df['average_activation_time'].apply(np.floor)

In [None]:
experiments_df


Persist the Average Activation time

In [None]:
experiments_df['average_activation_time'].to_csv("time_8_Information_Cascade.csv")