Sample some cascades to plot the network (for high- and low-quality messages)

In [1]:
## Group & sort by cascade size.
## Merge node uid with original network to get political affiliation
## Save file to .gml

In [1]:
import simsom.utils as utils
import matplotlib.pyplot as plt
import os
import glob
import json
from collections import defaultdict
import pandas as pd
import seaborn as sns

## Reshare cascade

In [2]:
import igraph as ig

In [3]:
# Read in reshare data
verbose_fpath = "results_manipulation/message_info.json.gz"
data = utils.read_json_compressed(verbose_fpath)
print(data.keys())

messages = pd.DataFrame.from_records(data['all_messages'])

dict_keys(['verbose', 'tracktimestep', 'save_message_info', 'output_cascades', 'mu', 'theta', 'quality', 'diversity', 'discriminative_pow', 'quality_timestep', 'exposure_timestep', 'age_timestep', 'all_messages', 'reshared_messages', 'feeds_message_ids', 'feeds_shares', 'feeds_ages'])


In [4]:
# Read in og graph data
network_path = "data/network_baseline.gml"
net = ig.Graph.Read_GML(network_path)

## Get reshare cascade by message id 

In [5]:
reshares = messages.explode("spread_via_agents")
reshares = reshares.drop_duplicates(subset=["id", "spread_via_agents"])

In [6]:
df = reshares
message_type = "bot"  # bot or human
# CASCADE_DIR = os.path.join(
#     "/N/slate/baotruon/marketplace/results", "cascade_net", folder
# )

# if not os.path.exists(CASCADE_DIR):
#     os.makedirs(CASCADE_DIR)

if message_type == "human":
    filtered = df[df["is_by_bot"] == 0]
    sizes = filtered.groupby(
        "id"
    ).spread_via_agents.count()  # return a Series where message id the index
elif message_type == "bot":
    filtered = df[df["is_by_bot"] == 1]
    sizes = filtered.groupby("id").spread_via_agents.count()

# Get the largest cascade
reshare_cascades = sizes.sort_values(ascending=False).reset_index()
largest = reshare_cascades.head(1)
print(f"Id of the message with the largest cascade (size={largest.loc[0, 'spread_via_agents']}): {largest.loc[0, 'id']} ")

message_id = largest.loc[0, 'id']

Id of the message with the largest cascade (size=109): 4724 


In [7]:
cascade_fpath = "results_manipulation/reshare.csv"
cascade = pd.read_csv(cascade_fpath, dtype={"source": str, "target": str})
cascade.head()

Unnamed: 0,message_id,timestep,source,target
0,2,1,106392,55935
1,1,1,84268,120848
2,1,1,84268,136140
3,1,1,84268,105229
4,3,1,114051,55783


In [8]:
import igraph


def get_reshare_cascade(message_id, cascade_inpath):
    """
    Get reshare cascade by message_id
    cascade_fpath (str): fpath of reshare  data

    """
    cascade = pd.read_csv(cascade_inpath, sep=",")
    cascade_data = cascade[cascade.message_id == message_id]
    # Get vertices
    nodes = list(cascade_data.agent1.unique()) + list(cascade_data.agent2.unique())
    # convert list of lists to list of tuples
    edges = [tuple(pair) for pair in cascade_data[["agent1", "agent2"]].values]

    graph = igraph.Graph(directed=True)
    graph.add_vertices(nodes)
    graph.add_edges(edges)

    # Set attributes of nodes from og graph data
    og_nodes = [i for i in net.vs if i["uid"] in nodes]
    graph.vs["party"] = [
        float(node["party"]) if node["party"] != "None" else 0 for node in og_nodes
    ]
    graph.vs["misinfo"] = [
        float(node["misinfo"]) if node["party"] != "None" else 100 for node in og_nodes
    ]
    graph.vs["bot"] = [int(node["bot"]) for node in og_nodes]
    return graph

In [9]:
cascade.head()

Unnamed: 0,message_id,timestep,source,target
0,2,1,106392,55935
1,1,1,84268,120848
2,1,1,84268,136140
3,1,1,84268,105229
4,3,1,114051,55783


In [10]:
reshare_nodes = df[df.id == message_id]["spread_via_agents"].values
cascade_data = cascade[
    (cascade.message_id == message_id)
    & (cascade.source.isin(reshare_nodes))
    & (cascade.target.isin(reshare_nodes))
]

In [11]:
reshare_nodes

array(['1f', '33657', '53672', '3d', '96498', '128507', '23417', '86753',
       '94462', '41625', '133880', '17707', '11991', '132120', '124546',
       '11715', '97951', '111855', '4862', '99106', '9o', '63279',
       '92806', '79127', '18144', '83156', '29196', '0c', '76804',
       '68255', '61547', '97144', '135404', '115523', '4o', '120273',
       '123631', '70328', '103635', '6616', '90829', '131781', '27382',
       '40840', '114629', '71578', '117659', '31102', '102360', '122771',
       '98468', '119995', '56153', '135311', '51043', '5w', '26474',
       '120058', '127041', '57945', '8s', '95945', '81316', '85762',
       '132711', '86974', '86101', '68602', '124188', '20801', '58703',
       '112341', '112937', '36385', '124055', '18393', '70083', '127773',
       '93628', '51998', '11424', '9751', '110517', '85117', '83815',
       '7943', '102644', '123511', '13860', '64895', '129218', '42481',
       '101480', '105108', '55922', '44355', '29905', '16200', '84125',
     

In [12]:
cascade[
    (cascade.message_id == message_id)]

Unnamed: 0,message_id,timestep,source,target
251456,4724,9,1f,102360
251462,4724,9,1f,119254
251464,4724,9,1f,129218
251476,4724,9,1f,28964
251483,4724,9,1f,21252
...,...,...,...,...
658534,4724,22,169,48921
659317,4724,22,127296,129100
659327,4724,22,127296,105988
659333,4724,22,127296,126600


In [13]:
reshare_nodes

array(['1f', '33657', '53672', '3d', '96498', '128507', '23417', '86753',
       '94462', '41625', '133880', '17707', '11991', '132120', '124546',
       '11715', '97951', '111855', '4862', '99106', '9o', '63279',
       '92806', '79127', '18144', '83156', '29196', '0c', '76804',
       '68255', '61547', '97144', '135404', '115523', '4o', '120273',
       '123631', '70328', '103635', '6616', '90829', '131781', '27382',
       '40840', '114629', '71578', '117659', '31102', '102360', '122771',
       '98468', '119995', '56153', '135311', '51043', '5w', '26474',
       '120058', '127041', '57945', '8s', '95945', '81316', '85762',
       '132711', '86974', '86101', '68602', '124188', '20801', '58703',
       '112341', '112937', '36385', '124055', '18393', '70083', '127773',
       '93628', '51998', '11424', '9751', '110517', '85117', '83815',
       '7943', '102644', '123511', '13860', '64895', '129218', '42481',
       '101480', '105108', '55922', '44355', '29905', '16200', '84125',
     

In [14]:
cascade_data

Unnamed: 0,message_id,timestep,source,target
251456,4724,9,1f,102360
251464,4724,9,1f,129218
251495,4724,9,1f,96498
251502,4724,9,1f,53672
251519,4724,9,1f,33657
...,...,...,...,...
658368,4724,22,169,81316
658401,4724,22,169,70083
658446,4724,22,169,7943
658465,4724,22,169,56153


In [15]:
# Get vertices
nodes = reshare_nodes
# nodes = list(cascade_data.agent1.unique()) + list(cascade_data.agent2.unique())

In [16]:
cascade_data[["source", "target"]].values

array([['1f', '102360'],
       ['1f', '129218'],
       ['1f', '96498'],
       ...,
       ['169', '7943'],
       ['169', '56153'],
       ['127296', '129100']], dtype=object)

In [17]:
# convert list of lists to list of tuples
edges = [tuple(pair) for pair in cascade_data[["source", "target"]].values]

In [18]:
import igraph

graph = igraph.Graph(directed=True)
graph.add_vertices(nodes)
graph.add_edges(edges)

In [19]:
net.vs.attributes()

['id', 'party', 'misinfo', 'bot', 'uid']

In [20]:
og_nodes = [i for i in net.vs if i["uid"] in nodes]

In [21]:
VIZ_DIR = "viz"
if not os.path.exists(VIZ_DIR):
    os.makedirs(VIZ_DIR)
# Set attributes of nodes from og graph data
graph.vs["party"] = [
    float(node["party"]) if node["party"] != "None" else 0 for node in og_nodes
]
graph.vs["misinfo"] = [
    float(node["misinfo"]) if node["party"] != "None" else 100 for node in og_nodes
]
graph.vs["bot"] = [int(node["bot"]) for node in og_nodes]
graph.write_gml(os.path.join(VIZ_DIR, f"{message_type}_{message_id}__reshare.gml"))

In [22]:
graph.vcount()

109

In [23]:
# Check that attributes are properly saved
c = ig.Graph.Read_GML(os.path.join(VIZ_DIR, f"{message_type}_{message_id}__reshare.gml"))
c.vs["party"]

[-0.0367808219178082,
 -0.168448,
 -0.539335,
 -0.169499090909091,
 0.21498,
 0.145421150592216,
 -0.272858620689655,
 0.17007602739726,
 -0.285880357142857,
 -0.204786567164179,
 0.403762790697674,
 -0.0453061855670103,
 -0.261586956521739,
 0.201552631578947,
 -0.597488571428571,
 0.3129925,
 0.0682858536585366,
 0.22887037037037,
 0.238050746268657,
 -0.0571611650485436,
 0.309016666666667,
 -0.190019811320755,
 0.0830473958333333,
 0.436367857142857,
 -0.146476543209876,
 -0.139589423076923,
 0.362811818181818,
 -0.0691559523809523,
 -0.109097368421053,
 -0.185158333333333,
 -0.471924324324324,
 -0.245525949367089,
 -0.206176146788991,
 0.269447619047619,
 -0.202001040118871,
 0.236582716049383,
 -0.225775641025641,
 -0.196617994100295,
 0.166508695652174,
 0.285629078014184,
 0.130035664335664,
 0.17326393442623,
 0.274369230769231,
 0.248670058139535,
 -0.203657692307692,
 -0.138071917808219,
 0.348021153846154,
 -0.167347619047619,
 0.165636470588235,
 0.132208108108108,
 -0.190