In [None]:
import pickle
import json 
import pandas as pd
import glob

from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import re

from itertools import product

from socialsim_scoring.cp4 import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
narratives = ['arrests',
            'arrests/opposition',
            'guaido/legitimate',
            'international/aid',
            'international/aid_rejected',
            'international/respect_sovereignty',
            'maduro/cuba_support',
            'maduro/dictator',
            'maduro/legitimate',
            'maduro/narco',
            'military',
            'military/desertions',
            'other/anti_socialism',
            'other/censorship_outage',
            'other/chavez',
            'other/chavez/anti',
            'protests',
            'violence']

In [None]:
split_names = ['february1-february14','february8-february21','february15-february28','february22-february28',
              'march1-march14','march8-march21','march15-march28','march22-april4']


meas_list = ['number_of_shares','number_of_shares_over_time','activated_users','activated_users_over_time',
            'degree_distribution','page_rank']
metric_list = ['APE','RMSE','APE','RMSE','RH Distance','EM Distance']
meas_list_scalar = ['number_of_shares','activated_users']
meas_list_temporal = ['number_of_shares_over_time','activated_users_over_time']
save_plots = False

## Load Data
Specify file path and pickled measurement and metric results files to load

In [None]:
model_identifier = ''
path = './'
fns = glob.glob(path + f'*{model_identifier}*measurement*.pickle')
fns = sorted(fns)

fns

In [None]:
df = load_metrics(fns, narratives,platforms=['twitter','youtube'])

In [None]:
if df['model'].nunique() > 1:
    print('More than one model found. This notebook is for analyzing one model at a time. Please refine file list above.')

Specify path to baseline metrics and measurement pickle files

In [None]:
bl_path = './'
bl_fns = glob.glob(bl_path + '*.pkl')
bl_fns = sorted(bl_fns)
bl_df = load_metrics(bl_fns, narratives,platforms=['twitter','youtube'])
bl_df = bl_df[bl_df['split'].isin(df['split'])]

bl_grouped = bl_df.groupby(['narrative','split','measurement',
            'metric','platform'])['value'].agg([np.mean,np.std]).reset_index()

bl_grouped = bl_grouped.rename(columns={'mean':'bl_mean',
                                        'std':'bl_std'})


bl_grouped['split'] = pd.Categorical(bl_grouped['split'], 
                      categories=split_names,
                      ordered=True)

bl_grouped = bl_grouped.sort_values('split')


## Metric-Level Plots
Plot the metric performance of the model

In [None]:
grouped = df.groupby(['model','narrative','split','measurement',
            'metric','platform'])['value'].agg([np.mean,np.std]).reset_index()

grouped = grouped.rename(columns={'mean':'sim_mean',
                                 'std':'sim_std'})


grouped['split'] = pd.Categorical(grouped['split'], 
                      categories=split_names,
                      ordered=True)

grouped = grouped.sort_values('split')


grouped

In [None]:
#merge simulation and baseline data
grouped = grouped.merge(bl_grouped,on=['narrative','split','measurement','metric','platform'])
df_sim = pd.concat([df,bl_df])

### Metric distributions by time split

The plots below show the variability in metrics results across narratives and time splits for both the simulation (orange) and the baseline (blue).  Each point is an indvidual narrative and the error bars show the variability across multiple submissions of hte same method.

In [None]:
strip_plot_by_split(grouped,['twitter','youtube'],meas_list,metric_list,split_names=split_names)

### Metric distributions by time split: narrative focus

We can also highlight individual narratives in these plots.  The narrative variable in the block below can be used to select a specific narrative.

In [None]:
platform = 'twitter'
narrative = 'arrests'
    
for meas in meas_list:
    metric = metric_list[meas_list.index(meas)]
    strip_plot_by_split(grouped,[platform],[meas],[metric],split_names,narrative=narrative)

### Metric scatter plots

The below plots show how the simulation performance compares with the baseline by plotting a scatterplot of the simulation metric result versus the baseline metric results at the narrative level.  The error bars indicate the variation due to multiple submissions from the same method.

In [None]:
grid_scatterplot(grouped,['twitter','youtube'],meas_list,metric_list,split_names)

### CCDF Plots

The plots below show a comparison of the distribution of metrics across narratives for the baseline (orange) and simulation (blue) for each time split. The error band indicates variability due to multiple submissions from the same model.

In [None]:
grid_ccdf(df_sim,['twitter','youtube'],meas_list,metric_list,split_names)

## Measurement-Level Plots
Plot the measurements of the model and the ground truth

In [None]:
gt_df, sim_df = load_measurements(fns,narratives,meas_list_scalar,meas_list_temporal)
_, bl_df = load_measurements(bl_fns,narratives,meas_list_scalar,meas_list_temporal)
bl_df = bl_df[bl_df['split'].isin(sim_df['split'])]

In [None]:
sim_time_df = sim_df.groupby(['model','platform','informationID','meas',
                              'nodeTime','split'])['value'].agg([np.mean,np.std]).reset_index()
bl_time_df = bl_df.groupby(['model','platform','informationID','meas',
                              'nodeTime','split'])['value'].agg([np.mean,np.std]).reset_index()
bl_time_df['model'] = 'Baseline'

gt_time_df = gt_df.groupby(['platform','informationID','meas',
                              'nodeTime'])['value'].mean().reset_index()

sim_time_df = sim_time_df.merge(gt_time_df,on=['platform','informationID','meas','nodeTime'])
bl_time_df = bl_time_df.merge(gt_time_df,on=['platform','informationID','meas','nodeTime'])

In [None]:
sim_time_df

In [None]:
bl_time_df

In [None]:
sim_time_df = pd.concat([sim_time_df,bl_time_df])

### Temporal Measurements

The plots below compare the ground truth time series measurements with the measurements of both the simulation and baseline across multiple (overlapping) time splits.  The baseline measurements are shown with a dotted line and the simulation for the corresponding time split is shown in the same color with a solid line. The specific narrative to plot can be specified using the narrative variable in the block below.

In [None]:
meas = 'number_of_shares_over_time'
platform = 'twitter'

for narrative in sim_time_df['informationID'].unique():
    time_series_plot_by_split(sim_time_df,platform,meas,narrative)

In [None]:
meas = 'number_of_shares_over_time'
platform = 'youtube'

for narrative in sim_time_df['informationID'].unique():
    time_series_plot_by_split(sim_time_df,platform,meas,narrative)

In [None]:
meas = 'activated_users_over_time'
platform = 'twitter'

for narrative in sim_time_df['informationID'].unique():
    time_series_plot_by_split(sim_time_df,platform,meas,narrative)

In [None]:
meas = 'activated_users_over_time'
platform = 'youtube'

for narrative in sim_time_df['informationID'].unique():
    time_series_plot_by_split(sim_time_df,platform,meas,narrative)

In [None]:
sim_scalar_df = sim_df[sim_df['nodeTime'].isnull()].drop('nodeTime',axis=1)
bl_scalar_df = bl_df[bl_df['nodeTime'].isnull()].drop('nodeTime',axis=1)

gt_scalar_df = gt_df[gt_df['nodeTime'].isnull()].drop('nodeTime',axis=1)

sim_scalar_df = sim_scalar_df.groupby(['platform','informationID',
                                       'meas','split'])['value'].agg([np.mean,np.std]).reset_index()
sim_scalar_df = sim_scalar_df.rename(columns={'mean':'sim_mean',
                                             'std':'sim_std'})
bl_scalar_df = bl_scalar_df.groupby(['platform','informationID',
                                       'meas','split'])['value'].agg([np.mean,np.std]).reset_index()
bl_scalar_df = bl_scalar_df.rename(columns={'mean':'bl_mean',
                                             'std':'bl_std'})

gt_scalar_df = gt_scalar_df.groupby(['platform','informationID','meas'])['value'].mean().reset_index()

bl_scalar_df = bl_scalar_df.merge(gt_scalar_df,on=['platform','informationID','meas'])
sim_scalar_df = sim_scalar_df.merge(gt_scalar_df,on=['platform','informationID','meas'])
sim_scalar_df

In [None]:
sim_scalar_df = sim_scalar_df.merge(bl_scalar_df,on = ['platform',
                                                     'informationID',
                                                     'meas',
                                                     'split',
                                                     'value'])

In [None]:
sim_scalar_df['delta_mean'] = sim_scalar_df['bl_mean'] - sim_scalar_df['sim_mean']

### Scalar Measurements

In the plots below we show a comparison of the scalar measurements for the simulation with the baseline values versus the ground truth values.  The colored markers show the scatter plot of the simulation measurement with the ground truth measurement for each narrative.  The arrows show how the simulation measurements are changed compared with the baseline values.  For example, if the arrows uniformly point up it means that the simulation measurements are uniformly higher than the baseline measurements for all narratives.  If the arrows all point towards the one-to-one line, it means that the simulation measurements are closer to the ground truth values than the baseline.

In [None]:
grid_quiver(sim_scalar_df,['twitter','youtube'],meas_list_scalar,split_names = split_names)