In [2]:
from pyecharts.charts import Bar, Graph
from pyecharts import options as opts
from gapminder import gapminder
import pyecharts

In [4]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [5]:
os.chdir("/Users/lingchm/Documents/Github/us_sodium_policies")

# Load data

In [6]:
df_graph = pd.read_csv("results/adjacency_matrix.csv", index_col=0)
usernames = df_graph.index.tolist()

In [8]:
df_graph

Unnamed: 0,DrTomFrieden,BruceNeal1,SimonCapewell99,KBibbinsDomingo,Dmozaffarian,jesse8850,braun_lynne,KulikovUNIATF,DrSaccoNeuro,chanders4,...,CDCgov,US_FDA,USDA,USDANutrition,FDAfood,CDCDirector,WHO,nycHealthy,HHSGov,TeamNutrition
DrTomFrieden,12672,0,2,4,0,0,0,0,0,0,...,0,0,0,0,0,0,8,1,0,0
BruceNeal1,0,104,8,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SimonCapewell99,2,8,261,1,4,0,0,0,0,0,...,0,3,0,0,0,1,7,1,0,0
KBibbinsDomingo,4,0,1,108,0,0,0,0,0,0,...,1,3,0,0,0,0,0,1,0,0
Dmozaffarian,0,0,4,0,1753,0,0,0,0,0,...,0,2,0,0,0,0,0,0,0,0
jesse8850,0,0,0,0,0,4,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
braun_lynne,0,0,0,0,0,0,24,0,0,0,...,0,1,0,0,0,0,0,0,0,0
KulikovUNIATF,0,0,0,0,0,0,0,5665,0,0,...,0,0,0,0,0,0,8,0,0,0
DrSaccoNeuro,0,0,0,0,0,0,0,0,21,0,...,0,0,0,0,0,0,0,0,0,0
chanders4,0,0,0,0,0,0,0,0,0,42,...,0,0,0,0,0,0,0,0,0,0


In [9]:
df_by_user = pd.read_csv("results/df_by_user.csv", index_col=0)

In [10]:
df_by_user

Unnamed: 0_level_0,category,organization,tweet_total,policy_type,influence
username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SimonCapewell99,Experts,Simon C.,80563,Individuals,261.0
WHO,Public agencies,WHO,66275,PolicyField,55593.0
American_Heart,Professional and advocacy associations,AHA,40867,Organizations,26395.0
CSPI,Professional and advocacy associations,CSPI,39887,Organizations,8993.0
CDCgov,Public agencies,CDC,33124,PolicyField,4304.0
nycHealthy,Public agencies,NYC Department of Health,33049,PolicyField,2830.0
HarvardChanSPH,Research and evaluation organizations,Harvard,32495,Organizations,3909.0
HHSGov,Public agencies,HHS,27955,PolicyField,1328.0
PublicHealth,Professional and advocacy assotiations,American Public Health Association,25656,Organizations,811.0
KBibbinsDomingo,Experts,Kirsten B.D.,22679,Individuals,108.0


In [11]:
df_by_user['policy_type'].unique()

array(['Individuals', 'PolicyField', 'Organizations'], dtype=object)

# Plot

In [46]:
def prepare_graph(df_graph, df_by_user, color_categories, color_col = "category",
                  influence_col="influence_index_all",
                  filename = "data/results/graph_all_tweets.html",
                  title="All Tweets", EDGE_LABEL=True, EDGE_CURVE=True, GRAVITY=0.2, 
                  MIN_EDGE=2, MIN_NODE=10, MAX_EDGE=15, MAX_NODE=90, LAYOUT="circular", 
                  TEXT_SIZE=14, EDGE_LIMIT=0):
    
    # sort values by category (node color) and influence index (node size)
    df_by_user = df_by_user.sort_values([color_col, influence_col], ascending=False)
    
    # scale value of nodes (influence)
    scaler = MinMaxScaler(feature_range=(MIN_NODE, MAX_NODE))
    scaled_node_value = scaler.fit_transform(np.asarray(df_by_user[[influence_col]]))
    scaled_node_value = pd.DataFrame(scaled_node_value, index=df_by_user.index)

    # scale value of edges (interactions)
    scaler = MinMaxScaler(feature_range=(MIN_EDGE,MAX_EDGE))
    df_graph_scaled = np.asarray(df_graph.copy())
    for i in range(df_graph_scaled.shape[0]):  # make diagonals 0 to not affect the scaling
        df_graph_scaled[i,i] = 0
    df_graph_scaled = df_graph_scaled.reshape(df_graph.shape[0]*df_graph.shape[1], 1) # convert to 1D vector
    df_graph_scaled = scaler.fit_transform(df_graph_scaled) # scale 
    df_graph_scaled = df_graph_scaled.reshape(df_graph.shape[0], df_graph.shape[1]) # convert back to matrix
    df_graph_scaled = pd.DataFrame(df_graph_scaled, columns=df_graph.columns, index=df_graph.index) 

    # create notes
    nodes = []
    for i in range(df_by_user.shape[0]): 

        from_ = df_by_user.index[i]

        # find color of the node
        category = df_by_user.loc[df_by_user.index[i], color_col]
        for j in range(len(color_categories)):
            if category == color_categories[j]["name"]:
                category_id = j

        nodes.append({"id": str(i), 
                        "name": df_by_user.loc[df_by_user.index[i], "organization"], # display name 
                        "category": category_id,
                        "value": df_by_user.loc[df_by_user.index[i], influence_col], # node value
                        "symbolSize": round(scaled_node_value.loc[from_][0], 6)})
    
    # create edges 
    links = []
    for i in range(len(df_by_user.index.tolist())):
        for j in range(len(df_by_user.index.tolist())):

            from_, to_ = df_by_user.index[i], df_by_user.index[j]
            val, width = df_graph.loc[from_, to_], df_graph_scaled.loc[from_,to_]

            # only plot if edge is large enough, and from/to are different organizations 
            if val > EDGE_LIMIT and from_ != to_:
                if EDGE_CURVE: curveness = round((from_[0] < to_[0]) * 0.1 + 0.2, 1)
                else: curveness = 0
                links.append({"source": str(i), 
                              "target": str(j), 
                              #"name": val,
                              "value": val, 
                              "lineStyle": {
                                  "normal": {
                                       "width": width,
                                       "curveness": curveness,
                                       "color": "source"
                                  }
                              },
                              "label": {
                                  "show": EDGE_LABEL,
                                  #"fontWeight": "bold",
                                  "formatter": "{}".format(round(val))
                              }
                             })

    # create graph object 
    c = (
        Graph(init_opts=opts.InitOpts(width="1300px", height="1000px"))
        .add(
            "",
            nodes=nodes,
            links=links,
            categories=color_categories,
            is_focusnode = True, 
            layout=LAYOUT, # force 
            gravity = GRAVITY,
            edge_symbol=['','arrow'],
            is_rotate_label=True,
            edge_label=opts.LabelOpts(is_show=EDGE_LABEL, position="middle", font_size=TEXT_SIZE-2), #, formatter="{b} interactions"),
            #linestyle_opts=opts.LineStyleOpts(color="source"), #,opacity=0.5
            label_opts=opts.LabelOpts(position="right", font_size=TEXT_SIZE)
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title),
            legend_opts=opts.LegendOpts(orient="vertical", pos_left="2%", pos_top="20%")
        )
        .render(filename)
    )
    return 

In [47]:
color_categories = [
     {'name': 'Individuals'},
     {'name': 'PolicyField'},
     {'name': 'Organizations'}]

In [48]:
prepare_graph(df_graph, df_by_user, color_categories, 
              color_col="policy_type", influence_col = "influence",
              filename = "results/circular_graph_n33.html",
              title="All Tweets", MIN_EDGE=0.2, MAX_EDGE=20, EDGE_LABEL=False)