In [None]:
import pickle
import json 
import pandas as pd
import glob

from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import re

from itertools import product

from socialsim_scoring.cp4 import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

Specify the narratives you want to include in the evaluation:

In [None]:
narratives = ['arrests',
            'arrests/opposition',
            'guaido/legitimate',
            'international/aid',
            'international/aid_rejected',
            'international/respect_sovereignty',
            'maduro/cuba_support',
            'maduro/dictator',
            'maduro/legitimate',
            'maduro/narco',
            'military',
            'military/desertions',
            'other/anti_socialism',
            'other/censorship_outage',
            'other/chavez',
            'other/chavez/anti',
            'protests',
            'violence']

In [None]:
meas_list = ['number_of_shares','number_of_shares_over_time','activated_users','activated_users_over_time',
            'degree_distribution','page_rank']
meas_list_scalar = ['number_of_shares','activated_users']
meas_list_temporal = ['number_of_shares_over_time','activated_users_over_time']
save_plots = False

## Load Data
Specify file path and pickled measurement and metric results files to load. This notebook for analyzing performance of multiple models across a single split.  Please point the notebook to the files for a single split. To generate these pickle files, run the EvaluationRunner (see the example in /examples/eval_runner.py) and pickle the output.

In [None]:
path = './'
fns = glob.glob(path + f'*measurement*.pickle')
fns = sorted(fns)
fns

In [None]:
df = load_metrics(fns, narratives,platforms=['twitter','youtube'])

In [None]:
if df['split'].nunique() > 1:
    print('More than one split found. This notebook is for analyzing one split at a time. Please refine file list above.')

## Metric-Level Plots
Plot the metric performance of the model

In [None]:
grouped = df.groupby(['narrative','model','measurement',
            'metric','platform'])['value'].agg([np.mean,np.std]).reset_index()
grouped

### Metric distributions by model

The plots below show the variability in metrics results across narratives and models.  Each point is an indvidual narrative and the error bars show the variability across multiple submissions of the same method. You can specify which platforms and measurements you would like to plot.  Additionally, some measurements tend to show a large range of metric results, so there is a log option to plot these metrics on a log scale to better see this range (if log=True the metrics than span more than a factor of 100 will be plotted on log scale while the others will remain on linear scale).

In [None]:
strip_plot(grouped,['twitter','youtube'],meas_list,log=True,save_plots=save_plots)

### Metric distributions by model: narrative focus

We can also highlight individual narratives in these plots.  The narrative variable in the block below can be used to select a specific narrative.

In [None]:
narrative = 'arrests'
platform = 'twitter'
meas = 'number_of_shares'
    
strip_plot(grouped,[platform],[meas],narrative=narrative,log=False,save_plots=save_plots)

### Pairwise model comparisons

The plots below show a scatter plot comparison of each pair of models, with the values plotted being the metric result.  Each point is a single narrative and the error bars represent variation among multiple submissions from the same model.

In [None]:
if grouped['model'].nunique() > 1:
    pairwise_scatterplots(grouped,['twitter','youtube'],meas_list,log=True,save_plots=save_plots)
else:
    print('Must have more than one model.')

### CCDF Plots

The plots below show a comparison of the distribution of metrics across narratives for the different models. The line shows the percentage of narratives that have an error value as good or better than the value on the x-axis for a given model. The error band indicates variability due to multiple submissions from the same model. As with the above plots, you can set the log option to True to apply a log scale to any metrics which span more than a factor of 100.

In [None]:
ccdf_plots(df,['twitter','youtube'],meas_list,log=True,save_plots=save_plots)

## Measurement-Level Plots
Plot the measurements of the model and the ground truth

In [None]:
gt_df, sim_df = load_measurements(fns,narratives,meas_list_scalar,meas_list_temporal)

In [None]:
sim_time_df = sim_df.groupby(['platform','informationID','meas',
                              'nodeTime','model'])['value'].agg([np.mean,np.std]).reset_index()
gt_time_df = gt_df.groupby(['platform','informationID','meas',
                              'nodeTime'])['value'].mean().reset_index()

sim_time_df = sim_time_df.merge(gt_time_df,on=['platform','informationID','meas','nodeTime'])

### Temporal Measurements

The plots below compare the ground truth time series measurements with the simulation measurements for temporal measurements. The specific narrative to plot can be specified using the narrative variable in the block below.

In [None]:
meas = 'number_of_shares_over_time'
platform = 'twitter'
narrative = 'arrests'

time_series_plot(sim_time_df,platform,meas,narrative,save_plots=save_plots)

In [None]:
meas = 'number_of_shares_over_time'
platform = 'youtube'

time_series_plot(sim_time_df,platform,meas,narrative,save_plots=save_plots)

In [None]:
meas = 'activated_users_over_time'
platform = 'twitter'

time_series_plot(sim_time_df,platform,meas,narrative,save_plots=save_plots)

In [None]:
meas = 'activated_users_over_time'
platform = 'youtube'

time_series_plot(sim_time_df,platform,meas,narrative,save_plots=save_plots)

In [None]:
sim_scalar_df = sim_df[sim_df['nodeTime'].isnull()].drop('nodeTime',axis=1)
gt_scalar_df = gt_df[gt_df['nodeTime'].isnull()].drop('nodeTime',axis=1)

sim_scalar_df = sim_scalar_df.groupby(['platform','informationID',
                                       'meas','model'])['value'].agg([np.mean,np.std]).reset_index()
gt_scalar_df = gt_scalar_df.groupby(['platform','informationID','meas'])['value'].mean().reset_index()

sim_scalar_df = sim_scalar_df.merge(gt_scalar_df,on=['platform','informationID','meas'])
sim_scalar_df

### Scalar measurements plot
The below plots show the measured value of the scalar measurements in the simulation versus the ground truth value.  Each point is an individual narrative and the error bars indicate variation over multiple submissions.

In [None]:
scatter_plot(sim_scalar_df,['twitter','youtube'],meas_list_scalar,log=False,save_plots=save_plots)