# Visualize Simulated Boycott Results

In [25]:
from collections import defaultdict
import json

In [26]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="dark")
from scipy import stats

from scipy.interpolate import interp1d

NUM_USERS = 6040

First things first: let's load our master data file (a single, aggregated csv)
and take a glance at the data.

In [27]:
df = pd.read_csv('all_results.csv', dtype={'indices': str})
print(len(df.index))
df = df[df.ratingfrac.notna()]
print(len(df.index))
df.sample(20)

240
63


Unnamed: 0.2,Unnamed: 0,name,algo_name,ratingfrac,userfrac,Unnamed: 0.1,fit_time_all,fit_time_all-like-boycott,fit_time_boycott,fit_time_non-boycott,...,tailrec10t4_non-boycott,tailrec5t4_all,tailrec5t4_all-like-boycott,tailrec5t4_boycott,tailrec5t4_non-boycott,test_times_all,test_times_all-like-boycott,test_times_boycott,test_times_non-boycott,type
147,11,users from state US_TX excluded,SVD,1.0,1.0,0005_SVD,90.904163,90.904163,90.904163,90.904163,...,0.738558,0.562542,0.544054,0.544054,0.563542,16.464619,40.729066,39.83599,31.845351,state
231,95,users from state US_WY excluded,SVD,1.0,1.0,0047_SVD,94.942842,94.942842,94.942842,94.942842,...,0.738677,0.563861,0.52693,0.52693,0.563906,22.548624,47.897437,47.888953,42.250958,state
171,35,users from state US_PA excluded,SVD,1.0,1.0,0017_SVD,94.274341,94.274341,94.274341,94.274341,...,0.737835,0.562981,0.57309,0.57309,0.562676,23.299574,46.355781,46.123752,41.465493,state
133,1,Top 10% contributors excluded,SVD,1.0,1.0,0000_SVD,27.699954,27.699954,27.699954,27.699954,...,0.795404,0.557856,0.103379,0.103379,0.611477,3.238245,11.626934,9.740503,7.218202,power
15,1,male users excluded,SVD,1.0,1.0,0000_SVD,10.492612,10.492612,10.492612,10.492612,...,0.750788,0.550342,0.53917,0.53917,0.578677,2.800397,11.560548,7.983919,5.431609,gender
223,87,users from state US_AR excluded,SVD,1.0,1.0,0043_SVD,91.973524,91.973524,91.973524,91.973524,...,0.738467,0.562201,0.484207,0.484207,0.562583,33.277314,55.088475,55.05379,50.056825,state
161,25,users from state US_KY excluded,SVD,1.0,1.0,0012_SVD,97.046745,97.046745,97.046745,97.046745,...,0.738902,0.562158,0.518297,0.518297,0.562495,13.469376,37.730448,37.674502,32.513923,state
167,31,users from state US_IA excluded,SVD,1.0,1.0,0015_SVD,95.863534,95.863534,95.863534,95.863534,...,0.737296,0.563003,0.583082,0.583082,0.562812,15.259125,35.563554,35.503796,30.784526,state
173,37,users from state US_MD excluded,SVD,1.0,1.0,0018_SVD,94.571637,94.571637,94.571637,94.571637,...,0.738232,0.563094,0.552291,0.552291,0.563248,23.317024,50.075086,49.39214,44.823855,state
7,7,35-44 excluded,SVD,1.0,1.0,0003_SVD,39.469553,39.469553,39.469553,39.469553,...,0.739259,0.560704,0.551851,0.551851,0.562888,5.009918,14.136927,12.733035,10.12427,age


Let's apply transformation that will affect the entire dataframe.
1. Calculate the number of users included in each experiment.

In [30]:
df = df.assign(num_users_boycotting = [NUM_USERS - int(x) for x in df.num_users])

Define all the lists that can be used throughout. By editing this cell we can easily modify the full notebook.

In [42]:
id_vars = ['name','algo_name', 'indices', 'ratingfrac', 'userfrac', 'num_ratings', 'num_users', 'num_users_boycotting']
metrics = [
    'rmse',
    'ndcg10',
    'prec10t4',
    'ndcgfull',
]
organized_experiments = [
    'gender', 'age', 'state',
    'occupation', 'power', 'genre'
]
algo_names = [
    #'KNNBaseline_item_msd',
    'SVD']
standard_algo_names = ['KNNBaseline_item_msd', 'SVD', 'GlobalMean', 'MovieMean', 'GuessThree']
standard_results = {}
algo_to_metric_vs_global_mean = defaultdict(dict)
algo_to_metric_vs_movie_mean = defaultdict(dict)
algo_to_metric_vs_guess_three = defaultdict(dict)

for algo_name in standard_algo_names:
    filename_ratingcv_standards = 'standard_results/{}_ratingcv_standards_for_{}.json'.format(
        'ml-1m', algo_name)
    with open(filename_ratingcv_standards, 'r') as f:
        standard_results[algo_name] = json.load(f)
for main_algo_name in algo_names:
    for metric in metrics:
        goodval = standard_results[main_algo_name][metric]
        gmean = standard_results['GlobalMean'][metric]
        mmean = standard_results['MovieMean'][metric]
        guess_three = standard_results['GuessThree'][metric]
        algo_to_metric_vs_global_mean[main_algo_name][metric] = (gmean - goodval) / goodval * 100
        algo_to_metric_vs_movie_mean[main_algo_name][metric] = (mmean - goodval) / goodval * 100
        algo_to_metric_vs_guess_three[main_algo_name][metric] = (guess_three - goodval) / goodval * 100
print(algo_to_metric_vs_global_mean)
print(algo_to_metric_vs_movie_mean)
print(algo_to_metric_vs_guess_three)

defaultdict(<class 'dict'>, {'SVD': {'rmse': 27.757625644825417, 'ndcg10': -25.181601807059277, 'prec10t4': -29.910310919612286, 'ndcgfull': -9.072962376431905}})
defaultdict(<class 'dict'>, {'SVD': {'rmse': 12.02164117801497, 'ndcg10': -2.4938085680150364, 'prec10t4': -12.206143440573765, 'ndcgfull': -0.764087492022332}})
defaultdict(<class 'dict'>, {'SVD': {'rmse': 44.03351674123327, 'ndcg10': -25.31588987035041, 'prec10t4': -29.787873088855292, 'ndcgfull': -8.970830512291785}})


In [43]:
algo_to_metric_to_slope = defaultdict(dict)
algo_to_metric_to_intercept = defaultdict(dict)
algo_to_metric_to_average_boycott = defaultdict(dict)


In [44]:
samples_df = df[df['type'] == 'sample_users']
if not samples_df.empty:
    print(samples_df[['num_users', 'num_users_boycotting']].sample(1))
org_df = df[df['type'].isin(organized_experiments)]

In [45]:
org_df.name = [
    x.replace('excluded', '')
    .replace('users from', '')
    .replace('US_', '')
    .replace('state', '')
    .strip()
    for x in list(org_df.name)
]

First, let's see how well the # of ratings correlates with error.
This will be an important piece of context for analyzing the experiments.

In [49]:
for algo_name in algo_names:
    filt = org_df[org_df.algo_name == algo_name]
    for metric in metrics:
        key = 'percent_increase_{}_non-boycott'.format(metric)
        print(filt)
        sns.jointplot(filt.num_ratings, y=filt[key], kind="reg").fig.suptitle(algo_name)
        plt.show()
        slope, intercept, r_value, p_value, std_err = stats.linregress(filt.num_ratings, y=filt[key])
        print(slope, intercept, r_value, p_value)
        algo_to_metric_to_slope[algo_name][metric] = slope
        algo_to_metric_to_intercept[algo_name][metric] = intercept
        
        # calculate the average error for all boycott users in each dataframe
        algo_to_metric_to_average_boycott[algo_name][metric] = np.mean(
            samples_df[(
                (samples_df.algo_name == algo_name) &
                (samples_df.userfrac == 1.0) & 
                (samples_df.ratingfrac == 1.0)
            )][key.replace('non-boycott', 'boycott')]
        )

        print(algo_to_metric_to_average_boycott)

percent_increase_rmse_non-boycott
1      [-0.24132923  0.11279136 -0.45995888 -0.445859...
3      [-0.29912427  0.05479116 -0.51762726 -0.503536...
5      [1.70677741 2.06781334 1.48387832 1.49825267 1...
7      [1.16636669 1.52548428 0.94465196 0.95894993 0...
9      [0.54866895 0.90559385 0.32830795 0.34251863 0...
11     [0.45420517 0.81079475 0.2340512  0.24824852 0...
13     [ 0.22965113  0.5854436   0.00998929  0.024154...
15     [7.13159164 7.51188443 6.89680361 6.91194466 6...
17     [-0.73362629 -0.38125324 -0.95117702 -0.937147...
133    [4.03078281 4.40006843 3.80279046 3.81749327 3...
135    [-0.12294083  0.23160001 -0.34182993 -0.327714...
137    [0.83135279 1.18928116 0.61037227 0.62462289 0...
139    [ 0.18037054  0.53598807 -0.03918329 -0.025024...
141    [ 0.14972174  0.50523047 -0.06976493 -0.055610...
143    [ 0.24988576  0.60575005  0.03017957  0.044348...
145    [ 0.17696846  0.53257391 -0.04257792 -0.028419...
147    [ 0.25191444  0.60778594  0.03220381  0.046372.

TypeError: can't multiply sequence by non-int of type 'float'

In [None]:
# list(samples_df.columns.values)

In [None]:
def select_cols(cols, metrics, groups, percents):
    """ rename?"""
    increase_cols = [
        x for x in cols if (any(metric in x for metric in metrics) and 'increase' in x)
    ]
    increase_cols = [
        x for x in increase_cols if any(group == x.split('_')[-1] for group in groups)
    ]
    if percents:
        increase_cols = [x for x in increase_cols if 'percent' in x]
    else:
        increase_cols = [x for x in increase_cols if 'percent' not in x]
    return increase_cols

In [None]:
def fill_in_longform(df):
    """Fill in a longform dataframe with metric, group, and name information"""
    df = df.assign(
        metric=[x.split('_')[-2] for x in df.increase_type]
    )
    df = df.assign(
        group=[x.split('_')[-1] for x in df.increase_type]
    )
    return df
    

In [None]:
def p_b_curve(df, metrics, groups, percents=False, lm_plot=True, reg_plot=False):
    algo_to_metric_to_interp = defaultdict(dict)
    if lm_plot:
        increase_cols = select_cols(list(df.columns.values), metrics, groups, percents)
        longform = df[increase_cols + id_vars].melt(
            id_vars = id_vars,
            var_name='increase_type'
        )
        longform = fill_in_longform(longform)
        
        grid = sns.lmplot(
            x="num_users_boycotting", y="value", hue="group", data=longform,
            sharey='row', sharex='col',
            size=7, row='metric', col='algo_name',
            fit_reg=False,
            x_estimator=np.mean, ci=99,
        )
        # CALC LOG PLOT
#         vals = [float(x) for x in list(longform.value)]
#         minval = np.min(vals)
#         shifted = vals + - minval + 0.00001
#         print(np.min(shifted))
#         longform = longform.assign(logy = np.log2(shifted))
#         g = sns.lmplot(
#             x="num_users_boycotting", y="logy", hue="algo_name", data=longform,
#             size=6, row='metric',
#             x_estimator=np.mean,
#             order=2
#         )
    
    for metric in metrics:
        for algo_name in algo_names:
            filt = df[df.algo_name == algo_name]
            key = 'increase_{}_non-boycott'.format(metric)
            if percents:
                key = 'percent_' + key
            x = filt.num_users_boycotting
            user_nums = list(set(filt.num_users_boycotting))
            y = filt[key]
            
            num_to_mean = {}
            for num_users_boycotting in user_nums:
                filt_by_name = filt[filt.num_users_boycotting == num_users_boycotting]
                num_to_mean[num_users_boycotting] = np.mean(filt_by_name[key])
            meany = np.array(list(num_to_mean.values()))
            
            smoothf = interp1d(user_nums, meany, kind='cubic', bounds_error=False, fill_value='extrapolate')
            algo_to_metric_to_interp[algo_name][metric] = smoothf
            xnew = np.linspace(min(user_nums), max(user_nums), num=500)
            if reg_plot:
                _, ax = plt.subplots()
                g = sns.regplot(
                    x=x, y=y,
                    x_estimator=np.mean,
                    x_bins=user_nums,
                    ax=ax,
                    ci=99,
                    fit_reg=False,
                )
                g.set_title(algo_name)
                
                plt.plot(xnew, smoothf(xnew), '-')
                plt.axhline(0, color='0.5', linestyle='--')
                plt.axhline(algo_to_metric_vs_movie_mean[algo_name][metric], color='0.5', linestyle='--')
    for x in grid.facet_data():
        i_row, i_col, i_hue = x[0]
        metric = grid.row_names[i_row]
        algo_name = grid.col_names[i_col]
        group = grid.hue_names[i_hue]
        if group != 'non-boycott':
            continue
        # flag. 
        ax = grid.axes[i_row, i_col]
        ax.plot(xnew, algo_to_metric_to_interp[algo_name][metric](xnew), '-', color=grid._colors[i_hue])
        ax.axhline(0, color='0.5', linestyle='--')
        ax.axhline(algo_to_metric_vs_movie_mean[algo_name][metric], color='0.5', linestyle='--')
#     for metric in metrics:
#         for algo_name in algo_names

    return algo_to_metric_to_interp


Working through log plots
y = log(x)
y = e ^ x
ln(y) = x

In [None]:
algo_to_metric_to_interp = p_b_curve(
    samples_df,
    metrics=metrics,
    percents=True,
    groups=['non-boycott', 'all', ]
)
plt.show()

In [None]:
algo_to_metric_to_interp = p_b_curve(
    samples_df,
    metrics=metrics,
    percents=True,
    groups=['all',]
)
plt.show()

In [None]:
for metric in metrics:
    for algo_name in algo_names:
        key = 'percent_increase_{}_expected'.format(metric)
        expected_vals = algo_to_metric_to_interp[algo_name][metric](org_df.num_users_boycotting)
        print(key)
        kwargs = {key: expected_vals}
        org_df = org_df.assign(**kwargs)

In [None]:
# Use this cell to show raw values instead of percents.

# g = plot1(
#     samples_df,
#     metrics=['rmse', 'ndcg', 'prec'],
#     percents=False,
#     groups=['non-boycott', 'all',]
# )
# plt.show()

In [None]:
algo_to_metric_vs_movie_mean

In [None]:
def plot2(df, metrics, groups, percents=False, kind='bar', size=10):
    increase_cols = select_cols(list(df.columns.values), metrics, groups, percents)
    longform = df[increase_cols + id_vars].melt(
        id_vars = id_vars,
        var_name='increase_type'
    )
    longform = fill_in_longform(longform)
    longform = longform.assign(
        name_plus_ratings=['{} ({}k)'.format(
            name, int(round(num_ratings/1000,0))
        ) for name, num_ratings in zip(longform.name, longform.num_ratings)
    ])
    g = sns.factorplot(
        x="value", y="name", hue="group", data=longform,
        size=size, kind=kind, col='algo_name', row='metric',
        sharex=False,
        # linestyles='None',
        # capsize=0.1
    )
    a = g.axes
    
    # flag: hardcoded right now. Not good.
    # use .facet_data() ?
    algo_order = ['KNNBaseline_item_msd', 'SVD']
    metric_order = ['ndcg10', 'ndcgfull', 'rmse']
    for y in range(a.shape[0]):
        for x in range(a.shape[1]):
            val = algo_to_metric_vs_movie_mean[algo_order[x]][metric_order[y]]
            g.axes[y,x].axvline(0, color='0.5', linestyle='--')
            g.axes[y,x].axvline(val, color='0.5', linestyle='--')
    return g


In [None]:
state_boycotts = org_df[org_df['type'] == 'state']
gender_boycotts = org_df[org_df['type'] == 'gender']
age_boycotts = org_df[org_df['type'] == 'age']
occupation_boycotts = org_df[org_df['type'] == 'occupation']
power_boycotts = org_df[org_df['type'] == 'power']
genre_boycotts = org_df[org_df['type'] == 'genre']

In [None]:
def all_users_all_ratings(df):
    return df[(
        (df.userfrac == 1.0) & (df.ratingfrac == 1.0)
    )]

In [None]:
def half_users(df):
    return df[(
        (df.userfrac == 0.5) & (df.ratingfrac == 1.0)
    )]

In [None]:
def half_ratings(df):
    return df[(
        (df.userfrac == 1.0) & (df.ratingfrac == 0.5)
    )]

In [None]:
def plot_all_three_scenarios(df, size=6):
    plot2(
        all_users_all_ratings(df),
        metrics=metrics,
        percents=True,
        groups=['non-boycott', 'expected'],
        size=size
    ).fig.suptitle('All Users, All Ratings')
    plot2(
        half_users(df),
        metrics=metrics,
        percents=True,
        groups=['non-boycott', 'boycott', 'like-boycott', 'expected'],
        size=size
    ).fig.suptitle('Half Users')
    plot2(
        half_ratings(df),
        metrics=metrics,
        percents=True,
        groups=['non-boycott', 'boycott', 'like-boycott', 'expected'],
        size=size,
    ).fig.suptitle('Half Ratings')
    plt.show()

## Let's do gender first...

In [None]:
plot_all_three_scenarios(gender_boycotts)

States seemed interesting and slowed the page down a bit, so I commented out for now.
Main conclusion was that state-based boycotts seem to be pretty ineffective for basically every state.
One funny result was that it seems California users have especially low-value ratings - the performance degradation was quite off from the expected degradation (b/c CA has a lot of users).

In [None]:
# plot_all_three_scenarios(state_boycotts, size=12)

Below, the power boycotts show a very weird results. Warrants double checks.

It appears for KNN, removing all power users actually improves performance (as opposed to the large expected drop).
But at the same time, RMSE increases! This is one of the example where RMSE and NDCG do not correlate...

Note the artifact in plot #5 below: it looks like ndcg has a huge increase when bottom 10% users do a half boycott... but this is b/c ndcg@10 is just getting easier to "succeed" for these users

In [None]:
plot_all_three_scenarios(power_boycotts)

In [None]:
plot_all_three_scenarios(age_boycotts)

In [None]:
plot_all_three_scenarios(occupation_boycotts, size=10)

In [None]:
plot_all_three_scenarios(genre_boycotts, size=10)

### Looking at our real numbers
Full dataset precision@10 is 
SVD: 0.8026069529047541
KNN: 0.7999920231446164
w/ 3000 users boycotting:
KNN: 0.785440336
SVD: 0.784764938

In [None]:
prec_w_0 = {
    'SVD': 0.8026069529047541,
    'KNN': 0.7999920231446164
}
prec_w_3000 = {
    'SVD': 0.784764938,
    'KNN': 0.785440336
}

In [None]:
for n in ('SVD', 'KNN'):
    old = prec_w_0[n]
    new = prec_w_3000[n]
    err_rate_old = (1 - old) * 10
    err_rate_new = (1 - new) * 10
    delta = err_rate_new - err_rate_old
    print('err rate old vs new:', err_rate_old, err_rate_new, err_rate_new / err_rate_old)
    print('% change err rate', delta / err_rate_old * 100)
    percent_change = (new - old) / old * 100
    raw_change = new - old
    print(percent_change, '%', raw_change, 'raw')
    err_per_user = raw_change * 10
    print(err_per_user / err_rate_old * 100)
    
    err_per_1m = err_per_user * 1e6
    users_needed_to_see_err = 1 / err_per_user
    print('Error per user:', err_per_user)
    print('Error per 1m users:', err_per_1m)
    print(users_needed_to_see_err, 'users')