Sample some cascades to plot the network (for high- and low-quality messages)

In [2]:
## Group & sort by cascade size.
## Merge node uid with original network to get political affiliation
## Save file to .gml

In [6]:
import simsom.utils as utils
import matplotlib.pyplot as plt
import os
import glob
import json
from collections import defaultdict
import pandas as pd
import seaborn as sns

## Reshare network

In [7]:
import igraph as ig

In [8]:
# Read in og graph data
network_path = "data/network_baseline.gml"
net = ig.Graph.Read_GML(network_path)

In [9]:
cascade_fpath = "results_base/reshare.csv"
cascade = pd.read_csv(cascade_fpath, dtype={"source": str, "target": str})
cascade.head()

Unnamed: 0,message_id,timestep,source,target
0,2,1,159,733
1,3,1,193,736
2,5,1,82,91
3,4,1,118,308
4,4,1,118,362


In [17]:
weights = cascade.groupby(["source", "target"])["message_id"].count().reset_index()
weights = weights.rename(columns={"message_id": "weight"})

In [18]:
weights.describe()

Unnamed: 0,weight
count,2978.0
mean,16.0
std,0.0
min,16.0
25%,16.0
50%,16.0
75%,16.0
max,16.0


In [19]:
weights.weight.unique()

array([16])

In [60]:
# convert list of lists to list of tuples
edges = [tuple(pair) for pair in cascade_data[["source", "target"]].values]

In [61]:
import igraph

graph = igraph.Graph(directed=True)
graph.add_vertices(nodes)
graph.add_edges(edges)

In [62]:
net.vs.attributes()

['id', 'party', 'misinfo', 'bot', 'uid']

In [63]:
og_nodes = [i for i in net.vs if i["uid"] in nodes]

In [64]:
VIZ_DIR = "viz"
if not os.path.exists(VIZ_DIR):
    os.makedirs(VIZ_DIR)
# Set attributes of nodes from og graph data
graph.vs["party"] = [
    float(node["party"]) if node["party"] != "None" else 0 for node in og_nodes
]
graph.vs["misinfo"] = [
    float(node["misinfo"]) if node["party"] != "None" else 100 for node in og_nodes
]
graph.vs["bot"] = [int(node["bot"]) for node in og_nodes]
graph.write_gml(os.path.join(VIZ_DIR, f"{message_type}_{message_id}__reshare.gml"))

In [65]:
graph.vcount()

673

In [None]:
import igraph


def get_reshare_cascade(cascade_inpath, message_id=None):
    """
    Get reshare cascade by message_id
    cascade_fpath (str): fpath of reshare  data

    """
    cascade = pd.read_csv(cascade_inpath, sep=",")
    if message is not None:
        cascade_data = cascade[cascade.message_id == message_id]
    # Get vertices
    nodes = list(cascade_data.source.unique()) + list(cascade_data.target.unique())
    # convert list of lists to list of tuples
    edges = [tuple(pair) for pair in cascade_data[["source", "target"]].values]

    graph = igraph.Graph(directed=True)
    graph.add_vertices(nodes)
    graph.add_edges(edges)

    # Set attributes of nodes from og graph data
    og_nodes = [i for i in net.vs if i["uid"] in nodes]
    graph.vs["party"] = [
        float(node["party"]) if node["party"] != "None" else 0 for node in og_nodes
    ]
    graph.vs["misinfo"] = [
        float(node["misinfo"]) if node["party"] != "None" else 100 for node in og_nodes
    ]
    graph.vs["bot"] = [int(node["bot"]) for node in og_nodes]
    return graph

In [68]:
# Check that attributes are properly saved
c = ig.Graph.Read_GML(
    os.path.join(VIZ_DIR, f"{message_type}_{message_id}__reshare.gml")
)
c.vs["party"]

[0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
 0.188561594202899,
 0.343641935483871,
 -0.202001040118871,
 0.291375342465753,
 0.25182972972973,
