In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import json

load

In [43]:
# in output_proc_utils.py

def load_data(fileroot):
    with open(fileroot + '.json') as file:
        config_data = json.load(file)
    df =pd.read_json(fileroot+"_output.jsonl", lines=True)
    pd.set_option('display.width', 1000)
    print(df.head())
    print()
    print('Evals:')
    print(df.loc[df.event_type=='eval','label'].value_counts())
    print()
    print('Actions:')
    print(df.loc[df.event_type=='action','label'].value_counts())
    print()
    print(df.loc[df.event_type=='action','data'].apply(lambda x: x.keys()).value_counts())
    return df

def post_process_output(df):
    eval_df = df.loc[df.event_type=='eval',['episode','source_user','label','data']].reset_index(drop=True)
    eval_df['response']=eval_df.data.apply(lambda x: x['query_return'])
    eval_df=eval_df.drop('data',axis=1)

    edge_df=df.loc[df.label.isin(['follow','unfollow']), ['episode','source_user','data','label']].reset_index(drop=True)
    edge_df['target_user']=edge_df.data.apply(lambda d: d['target_user'])
    edge_df=edge_df.drop('data',axis=1)

    interaction_types = ['post','like_toot','boost_toot','reply']
    int_df = df.loc[df.label.isin(interaction_types), :].reset_index(drop=True)
    return eval_df, int_df, edge_df

def episodewise_graphbuild(edge_df):
    follow_graph = nx.DiGraph()
    for epi_edge_data in edge_df.groupby('episode'):
        for action, operate_on_graph in zip(['follow','unfollow'],[follow_graph.add_edges_from,follow_graph.remove_edges_from]):
            if (epi_edge_data.label==action).any():
                data=epi_edge_data.loc[epi_edge_data.label==action,['source_user','target_user']]
                operate_on_graph(list(data.itertuples(index=False, name=None)))
    return follow_graph

#in dashboard-basic.py:

def get_target_user(row):
        if row.label == 'post':
            target_user = row.source_user
        elif row.label == 'like_toot':
            target_user = row.data['target_user']
        elif row.label=='boost_toot':
            target_user = row.data['target_user']
        elif row.label == 'reply':
            target_user = row.data['reply_to']['target_user']
        return target_user

def get_int_dict(int_df):
    past=dict(zip(['post', 'like_toot', 'boost_toot', 'reply'], ['posted', 'liked', 'boosted', 'replied']))
    int_df['int_data']=int_df.apply(lambda x: {
        'action': past[x.label],
        'episode': x.episode,
        'source_user': x.source_user,
        'target_user': get_target_user(x),
        'toot_id': x.data['toot_id']
    }, axis=1)
    int_df.int_data = int_df.apply(lambda x: x.int_data|{
        'parent_toot_id': x.data['reply_to']['toot_id']
    } if x.label == 'reply' else x.int_data, axis=1)
    return int_df.groupby('episode')['int_data'].apply(list).to_dict()

def get_toot_dict(int_df):
    past=dict(zip(['post', 'like_toot', 'boost_toot', 'reply'], ['posted', 'liked', 'boosted', 'replied']))
    text_df = int_df.loc[(int_df.label == 'post') | (int_df.label == 'reply'), :].reset_index(drop=True)

    #handle Nones as toot_ids by appending an index
    no_toot_id = text_df.data.apply(lambda x: x['toot_id'] is None)
    text_df['no_toot_id_idx'] = -1
    text_df.loc[no_toot_id,'no_toot_id_idx'] = range(no_toot_id.sum())
    text_df.loc[no_toot_id,'data']=text_df.loc[no_toot_id,:].apply(lambda x: x.data|{'toot_id': 'None'+str(x.no_toot_id_idx)}, axis=1)

    text_df['toot_id'] = text_df.data.apply(lambda x:x['toot_id'])
    text_df=text_df.set_index('toot_id')
    text_df['text_data'] = text_df.apply(lambda x: {
        'user': x.source_user,
        'action': past[x.label],
        'content': x.data['post_text']
    }, axis=1)
    text_df.text_data = text_df.apply(lambda x: x.text_data|{
        'parent_toot_id': x.data['reply_to']['toot_id']
    } if x.label == 'reply' else x.text_data, axis=1)

    return text_df.text_data.to_dict()

def load_data_dash(eval_df, int_df, edge_df):

    #votes
    votes=eval_df.loc[eval_df.label=='vote_pref', ['source_user','response','episode']].groupby('episode').apply(
        lambda x: dict(zip(x.source_user, x.response))
    ).to_dict()

    #final follow network
    follow_graph = nx.from_pandas_edgelist(edge_df, 'source_user','target_user',create_using=nx.DiGraph()) #invalid in presence of unfollows (in which case use episodewise_graphbuild)

    #active users with episode keys
    posted_users_by_episode=int_df.groupby('episode')['source_user'].apply(set).to_dict()

    #interaction data
    int_dict = get_int_dict(int_df.copy())

    #toot_data
    toot_dict = get_toot_dict(int_df.copy())

    return follow_graph, int_dict, posted_users_by_episode, toot_dict, votes

In [109]:
df=load_data(fileroot)

        source_user   label                                 data  episode event_type
0  Bill Fredrickson  follow    {'target_user': 'Jessica Nguyen'}       -1     action
1  Bill Fredrickson  follow     {'target_user': 'Liam Schwartz'}       -1     action
2  Bill Fredrickson  follow    {'target_user': 'Robert Johnson'}       -1     action
3    Bradley Carter  follow  {'target_user': 'Bill Fredrickson'}       -1     action
4  Bill Fredrickson  follow      {'target_user': 'Sophia Patel'}       -1     action

Evals:
label
favorability    1920
vote_pref        960
vote_intent      960
Name: count, dtype: int64

Actions:
label
follow        224
boost_toot    197
like_toot      74
reply          58
post           33
Name: count, dtype: int64

data
(toot_id, target_user)            271
(target_user)                     224
(reply_to, toot_id, post_text)     58
(toot_id, post_text)               33
Name: count, dtype: int64


In [6]:
fileroot = "../output/N20_T_40_None_Big5_independent"
fileroot = "examples/election/output/N20_None_Big5_malicious"

df=load_data(fileroot)
print()

eval_df, int_df, edge_df = post_process_output(df)
print(int_df)
print()
print(eval_df)
print()
print(edge_df)
print()

follow_graph, int_dict, posted_users_by_episode, toot_dict, votes = load_data_dash(eval_df, int_df, edge_df)
print()
print('follow_graph')
print(follow_graph)
print()
print('int_dict')
print(int_dict)
print()
print('poster_users_by_episode')
print(posted_users_by_episode)
print()
print('toot_dict')
print(toot_dict)
print()
print('votes')
print(votes)

        source_user   label                                 data  episode event_type
0  Bill Fredrickson  follow    {'target_user': 'Jessica Nguyen'}       -1     action
1  Bill Fredrickson  follow     {'target_user': 'Liam Schwartz'}       -1     action
2  Bill Fredrickson  follow    {'target_user': 'Robert Johnson'}       -1     action
3    Bradley Carter  follow  {'target_user': 'Bill Fredrickson'}       -1     action
4  Bill Fredrickson  follow      {'target_user': 'Sophia Patel'}       -1     action

Evals:
label
favorability    1920
vote_pref        960
vote_intent      960
Name: count, dtype: int64

Actions:
label
follow        224
boost_toot    197
like_toot      74
reply          58
post           33
Name: count, dtype: int64

data
(toot_id, target_user)            271
(target_user)                     224
(reply_to, toot_id, post_text)     58
(toot_id, post_text)               33
Name: count, dtype: int64

        source_user       label                                       

In [7]:
[sum(1 for value in epi_votes.values() if value == 'Invalid Answer')/20. for epi_votes in votes.values()]

[0.65,
 0.65,
 0.25,
 0.25,
 0.25,
 0.15,
 0.15,
 0.25,
 0.35,
 0.25,
 0.05,
 0.2,
 0.15,
 0.1,
 0.2,
 0.2,
 0.1,
 0.05,
 0.1,
 0.05,
 0.2,
 0.1,
 0.2,
 0.1,
 0.2,
 0.15,
 0.25,
 0.2,
 0.45,
 0.15,
 0.2,
 0.15,
 0.3,
 0.25,
 0.2,
 0.15,
 0.25,
 0.45,
 0.2,
 0.2,
 0.3,
 0.05,
 0.2,
 0.1,
 0.1,
 0.25,
 0.1,
 0.1]

In [9]:
def get_int_dict(int_df):
    past = dict(
        zip(
            ["post", "like_toot", "boost_toot", "reply"],
            ["posted", "liked", "boosted", "replied"],
            strict=False,
        )
    )
    int_df["int_data"] = int_df.apply(
        lambda x: {
            "action": past[x.label],
            "episode": x.episode,
            "source": x.source_user,
            "target": get_target_user(x),
            "toot_id": x.data["toot_id"],
        },
        axis=1,
    )
    int_df.int_data = int_df.apply(
        lambda x: x.int_data | {"parent_toot_id": x.data["reply_to"]["toot_id"]}
        if x.label == "reply"
        else x.int_data,
        axis=1,
    )
    return int_df.groupby("episode")["int_data"].apply(list).to_dict()

In [10]:
int_dict=get_int_dict(int_df)
int_dict

{-1: [{'action': 'posted',
   'episode': -1,
   'source': 'Jessica Nguyen',
   'target': 'Jessica Nguyen',
   'toot_id': 113674293624587527},
  {'action': 'posted',
   'episode': -1,
   'source': 'Janet Thompson',
   'target': 'Janet Thompson',
   'toot_id': 113674293632133311},
  {'action': 'posted',
   'episode': -1,
   'source': 'Liam Schwartz',
   'target': 'Liam Schwartz',
   'toot_id': 113674293673813891},
  {'action': 'posted',
   'episode': -1,
   'source': 'Emily Jacobs',
   'target': 'Emily Jacobs',
   'toot_id': 113674293679164293},
  {'action': 'posted',
   'episode': -1,
   'source': 'Maggie Chen',
   'target': 'Maggie Chen',
   'toot_id': 113674293692327582},
  {'action': 'posted',
   'episode': -1,
   'source': 'Roger Davis',
   'target': 'Roger Davis',
   'toot_id': 113674293714759598},
  {'action': 'posted',
   'episode': -1,
   'source': 'Mark Rodriguez',
   'target': 'Mark Rodriguez',
   'toot_id': 113674293720557675},
  {'action': 'posted',
   'episode': -1,
   'sou