In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%load_ext autoreload
%autoreload 2
from generic.latexify import *

In [None]:
from visualization.settings import *
from visualization.helpers import *
from visualization.paperplots import *

In [None]:
from optimize.analyze_results import *

# Load the data

In [None]:
template = 'combined_directfromoptimization3'
path = 'cached_values/outputs/organized'

In [None]:
methods = ['thiele_pav', 'thiele_independent', 'thiele_squared']

In [None]:
dfs = {method:load_organized_df(path, template, method, directfromoptimization = True) for method in methods}

In [None]:
dfs[methods[0]].head()

In [None]:
dfs[methods[0]].tail()

In [None]:
# dfs[methods[0]].N_districts.unique()

In [None]:
states = dfs[methods[0]].state.unique()

In [None]:
parties = ['Republican', 'Democrat']
methods_to_plot = ['thiele_pav', 'thiele_independent', 'thiele_squared']
method_names

In [None]:
print(dfs[methods[0]].columns)

In [None]:
# STV still loading from my way
dfstvpartisan = load_organized_df(path, 'combined_partisan', 'stv')

In [None]:
dfs['stv'] = load_organized_df(path, 'combined', 'stv')

# Proportionality stuff

In [None]:
distributions = {method: state_seat_share_distributions_nikhil(dfs[method]) for method in methods}

In [None]:
distributions

## Overall proportionality for each method

In [None]:
ax1 = None
fig = plt.figure(figsize = (20, 5))
for enn, method in enumerate(methods_to_plot):
    if enn == 0:
        ax = plt.subplot(int('1{}{}'.format(len(methods_to_plot), enn+1)))
        ax1 = ax
    else:
        ax = plt.subplot(int('1{}{}'.format(len(methods_to_plot), enn+1)), sharey = ax1)
    print(method)
    _ = plot_all_state_distribution_generic(distributions[method]
                                                         , prop_val = get_prop(dfs[method])
                                                         , do_vertical_integers = True, legend = enn==len(methods_to_plot)-1
                                            , ax = ax, party_colors = True, bbox_to_anchor=(-2.25, 1.055)
                                            , legendncol = 4, legendfontsize = 20)
    ax.set_title(method_names[method], fontsize = 20)
saveimage('prop_differentmethods', extension = 'pdf')

## Proportionality gap for a few relevant states

In [None]:
# 4 states for main text
from visualization.fancy_plots import *
method = 'thiele_pav'#'stv'
ax1 = None
fig = plt.figure(figsize = (12, 5))
state_names = {}
states_to_do= ['CA', 'MA', 'FL', 'TX'] #['NY', 'MA', 'OK', 'FL', 'TX'] #dfs[method].state.unique()#
for enn, state in enumerate(states_to_do):
    if enn == 0:
        ax = plt.subplot(int('1{}{}'.format(len(states_to_do), enn+1)))
        ax1 = ax
    else:
        ax = plt.subplot(int('1{}{}'.format(len(states_to_do), enn+1)), sharey = ax1)
        
    seats = state_constants[state]["seats"]
    xbins = [x/seats for x in range(1, int(seats)+1)]
    print(state, seats)
    dfstate = dfs[method].query('state==@state')
    
    boxplot_per_district_for_single_state_per_method(
        dfstate,
        state,
        do_extremes_and_prop_line=True,
        additional_filters={},ax = ax
    )    
    ax.set_title(state_names.get(state, state), fontsize = 20)
ax1.set_ylabel('Republican seat share', fontsize = 20)
ax1.set_ylim((0, 1))
#    plt.show()
saveimage('prop_states_boxNY', extension = 'pdf')

In [None]:
# all states for appendix
from visualization.fancy_plots import *
method = 'thiele_pav'#'stv'
ax1 = None
fig = plt.figure(figsize = (24, 42))
state_names = {}
states_to_do= dfs[method].state.unique() #['NY', 'MA', 'FL', 'TX'] #['NY', 'MA', 'OK', 'FL', 'TX'] #dfs[method].state.unique()#
width = 6
height = int(np.ceil(len(states_to_do)/width))
for enn, state in enumerate(states_to_do):
    print(state, int(enn/width) + 1, width, (enn%width)+1)
#     ax = plt.subplot(int('{}{}{}'.format(int(enn/width) + 1, width, (enn%width)+1)))
    ax = plt.subplot(height,width, enn+1)

    seats = state_constants[state]["seats"]
    xbins = [x/seats for x in range(1, int(seats)+1)]
    print(state, seats)
    dfstate = dfs[method].query('state==@state')
    
    boxplot_per_district_for_single_state_per_method(
        dfstate,
        state,
        do_extremes_and_prop_line=True,
        additional_filters={},ax = ax
    )
    ax.set_xlabel('')
    ax.set_title(state_names.get(state, state), fontsize = 20)
# ax1.set_ylabel('Republican seat share', fontsize = 20)
#    plt.show()
saveimage('prop_states_all', extension = 'pdf')

## Cumulative proportionality gap

In [None]:
import copy
#Appendix plot -- difference between parties
def get_prop_gap_by_state_demrepdifference(dfsmet):
    ret = copy.deepcopy(dfsmet)
    prop = get_prop(dfs[method].query('state==@state'))
    for state in states:
        ret[state] = (ret[state] - .485)
#         ret[state].loc['Party difference',:] = ret[state].loc['Most Democratic',:] + ret[state].loc['Most Republican',:]
        ret[state] = ret[state].loc[['Most Republican', 'Most Democratic']] # 'Party difference',
    return ret

fig = plt.figure(figsize = (18, 5))
ax = plt.subplot(1,3, 1)
ret = get_prop_gap_by_state_demrepdifference(distributions['thiele_pav'])
_ = plot_all_state_distribution_generic(
    ret, prop_val=None, do_vertical_integers=False
    , ymin=-.05, ymax=.25, xbins=None, legend=False, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.01, .8),do_broken_axes = False, legendncol = 2, legendfontsize = 15,
    loc="lower left", do_abs_after_combining = True, ax = ax, party_colors=  True
)
ax.set_title('STV and PAV', fontsize = 20)

ax = plt.subplot(1,3, 2)
ret = get_prop_gap_by_state_demrepdifference(distributions['thiele_independent'])
_ = plot_all_state_distribution_generic(
    ret, prop_val=None, do_vertical_integers=False
    , ymin=-.05, ymax=.25, xbins=None, legend=False, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.01, .7),do_broken_axes = False, legendncol = 1, legendfontsize = 15,
    loc="lower left", do_abs_after_combining = True, ax= ax, party_colors=  True
)
ax.set_title('Winner takes all', fontsize = 20)


ax = plt.subplot(1,3, 3)
ret = get_prop_gap_by_state_demrepdifference(distributions['thiele_squared'])
_ = plot_all_state_distribution_generic(
    ret, prop_val=None, do_vertical_integers=False
    , ymin=-.05, ymax=.25, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.01, .7),do_broken_axes = False, legendncol = 1, legendfontsize = 15,
    loc="lower left", do_abs_after_combining = True, ax = ax, party_colors=  True
)
ax.set_title('Thiele Squared', fontsize = 20)

saveimage('gerrymandering_advantage_by_rule_fixed', extension = 'pdf')

In [None]:
# re-orienting it so each line is a method, not min/max/median/most fair
def get_proportionality_gaps(optimization = 'Most Fair in each state', get_max_instead = False):
    distributions_gaps = {}#{state:{} for method in methods}
    for state in states:
        d = {'method': [method_names[met] for met in methods_to_plot]}
        for method in methods_to_plot:
            prop = get_prop(dfs[method].query('state==@state'))
            if not get_max_instead:
                vals= (distributions[method][state] - prop).abs().loc[optimization]
            else:
                vals= (distributions[method][state] - prop).abs().max(axis = 0)
            for i in vals.index:
                d[i] = d.get(i,[]) + [vals[i]]
        distributions_gaps[state] = pd.DataFrame(d).set_index('method')
    return distributions_gaps


In [None]:
# distributions_gaps

In [None]:
distributions_gaps = get_proportionality_gaps(optimization = 'Most Fair in each state')

In [None]:
_ = plot_all_state_distribution_generic(
    distributions_gaps, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.10, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,
    loc="lower left",
)
saveimage('prop_gap', extension = 'pdf')

In [None]:
distributions_gaps_median = get_proportionality_gaps(optimization = 'Median')

In [None]:
distributions_gaps_max = get_proportionality_gaps(optimization = '', get_max_instead = True)

In [None]:
distributions_gaps_rep = get_proportionality_gaps(optimization = 'Most Republican')
distributions_gaps_dem = get_proportionality_gaps(optimization = 'Most Democratic')

In [None]:
distributions_gaps_dem

In [None]:
fig = plt.figure(figsize = (18, 5))
ax = plt.subplot(1,3, 1)
_ = plot_all_state_distribution_generic(
    distributions_gaps_median, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.3, xbins=None, legend=False, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False, ax = ax,
    loc="lower left",
)
ax.set_title('Median maps', fontsize = 20)
ax = plt.subplot(1,3, 2)
_ = plot_all_state_distribution_generic(
    distributions_gaps_rep, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.3, xbins=None, legend=False, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,ax = ax,
    loc="lower left",
)
ax.set_title('Most Republican maps', fontsize = 20)
ax = plt.subplot(1,3, 3)
_ = plot_all_state_distribution_generic(
    distributions_gaps_dem, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.3, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,ax = ax,
    loc="lower left",
)
ax.set_title('Most Democratic maps', fontsize = 20)
saveimage('prop_gap_medianrepdem', extension = 'pdf')

In [None]:
_ = plot_all_state_distribution_generic(
    distributions_gaps_median, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.3, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,
    loc="lower left",
)
saveimage('prop_gap_median', extension = 'pdf')

In [None]:
#Proportionality gap by the maximum gerrymanderes
_ = plot_all_state_distribution_generic(
    distributions_gaps_max, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.6, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,
    loc="lower left",
)
saveimage('prop_gap_maxgerrymandered', extension = 'pdf')
_ = plot_all_state_distribution_generic(
    distributions_gaps_max, prop_val=None, do_vertical_integers=False
    , ymin=0, ymax=.6, xbins=None, legend=True, xlabel="Avg proportionality gap", do_zoom = False,
    bbox_to_anchor=(0.5, .6),do_broken_axes = False,#ncols = 2,
    loc="lower left",
)

# Competitiveness

In [None]:
# For each district, wasted votes is just distance to the nearest droop quota threshold
# For each plan (in master dataframe) I just sum across wasted votes in each district.
# and then I think I can use that seat share distribution thing to create the thing that will lead to the line plot

In [None]:
from elections.median_voter import thiele_voting_methods
from elections.thiele_competitiveness_helpers import get_vote_thresholds_for_n_winners
# thiele_voting_methods

In [None]:
methods

In [None]:
method_share_thresholds_all_rules = {met:{} for met in methods}
def get_wasted_votes_per_row(row, rule, rulename):
    vs = row.district_vote_shares
    nwinners = row.district_n_winners
    wasted = 0
    for en, nw in enumerate(nwinners):
        if nw not in method_share_thresholds_all_rules[rulename]:
            method_share_thresholds_all_rules[rulename][nw] = get_vote_thresholds_for_n_winners(rule, nw)
        wasted += min([abs(vs[en] - x) for x in method_share_thresholds_all_rules[rulename][nw]])*nw/row.total_winners
    return wasted

In [None]:
def evalmaybelist(val):
    if type(val) == str:
        return eval(val)
    return val

In [None]:
from functools import partial
def add_wasted_votes_to_df(dft, rule, rulename, do_just_subsampled = False):
    if do_just_subsampled:
        is_subsampled = dft.eval('optimization=="subsampled" or optimization=="single_district_for_state"')
    else:
        t=True
        is_subsampled = dft.eval('optimization=="subsampled" or @t')
    print(rule, sum(is_subsampled))
    dft.loc[is_subsampled,'district_vote_shares'] = dft.loc[is_subsampled,'district_vote_shares'].apply(evalmaybelist)
    dft.loc[is_subsampled,'district_n_winners'] = dft.loc[is_subsampled,'district_n_winners'].apply(evalmaybelist)
    fun = partial(get_wasted_votes_per_row, rule = rule, rulename = rulename)
    dft.loc[is_subsampled,'wasted_votes'] = dft.loc[is_subsampled].apply(fun, axis = 1)
    return dft

In [None]:
# re-orienting it so each line is a method, not min/max/median/most fair
def get_competitiveness_per_method(distribibutions_compet, optimization = 'Median'):
    distributions_gaps = {}
    for state in states:
        d = {'method': [method_names[met] for met in methods_to_plot]}
        for method in methods_to_plot:
            vals = distribibutions_compet[method][state].loc[optimization]
            for i in vals.index:
                d[i] = d.get(i,[]) + [vals[i]]
        distributions_gaps[state] = pd.DataFrame(d).set_index('method')
    return distributions_gaps

In [None]:
# takes about 10-20 minutes or so
dfs['thiele_pav'] = add_wasted_votes_to_df(dfs['thiele_pav'], thiele_pav, rulename = 'thiele_pav')
dfs['thiele_independent'] = add_wasted_votes_to_df(dfs['thiele_independent'], thiele_approvalindependent, rulename = 'thiele_independent')
dfs['thiele_squared'] = add_wasted_votes_to_df(dfs['thiele_squared'], thiele_squared, rulename = 'thiele_squared')

In [None]:
distributions_compet = {method: state_seat_share_distributions_nikhil(dfs[method]
                               , col = 'wasted_votes'
                                      , divide = False
                                      , col_for_minmaxmostfair = 'N_winners_Republican') for method in methods}
distributions_compet

In [None]:
distributions_compet['thiele_pav']['AL']

In [None]:
distributions_compet['thiele_independent']['AL']

In [None]:
distributions_compet_methods_median = get_competitiveness_per_method(distributions_compet)
_ = plot_all_state_distribution_generic(
    distributions_compet_methods_median, ymin=0, ymax=.25, xbins=None, legend=False, xlabel="Avg Vote shift needed",
    bbox_to_anchor=(0.5, .6),
    loc="lower left",
)
saveimage('competitiveness_median', extension = 'pdf')

In [None]:
# Most fair 
distributions_compet_methods_mostfair = get_competitiveness_per_method(distributions_compet,optimization= 'Most Fair in each state')
_ = plot_all_state_distribution_generic(
    distributions_compet_methods_mostfair, ymin=0, ymax=.25, xbins=None, legend=False, xlabel="Avg Vote shift needed",
    bbox_to_anchor=(0.5, .6),
    loc="lower left",
)
saveimage('competitiveness_mostfair', extension = 'pdf')

In [None]:
distributions_compet_methods_mostrep = get_competitiveness_per_method(distributions_compet,optimization= 'Most Republican')
_ = plot_all_state_distribution_generic(
    distributions_compet_methods_mostrep, ymin=0, ymax=.25, xbins=None, legend=False, xlabel="Avg Vote shift needed",
    bbox_to_anchor=(0.5, .6),
    loc="lower left",
)
saveimage('competitiveness_mostrep', extension = 'pdf')

In [None]:
distributions_compet_methods_mostdem = get_competitiveness_per_method(distributions_compet,optimization= 'Most Democratic')
_ = plot_all_state_distribution_generic(
    distributions_compet_methods_mostdem, ymin=0, ymax=.25, xbins=None, legend=True, xlabel="Avg Vote shift needed",
    bbox_to_anchor=(0.5, .6),
    loc="lower left",
)
saveimage('competitiveness_mostdem', extension = 'pdf')

# Intra party stuff

## Cohesion

In [None]:
def get_cohesion_df(dfstv = dfs['stv'], coltemplate = "cohesion_partisan_score_{}"):
    parties = ['Republican','Democrat']
    party_names = {'Republican':'Republican', 'Democrat':'Democratic'}
    
    distributions_cohesion_party = {}
    for party in parties:
        distributions_cohesion_party[party] = state_seat_share_distributions_nikhil(
            dfstv, col=coltemplate.format(party), do_most_fair=False, divide=False
            , min_name="Least cohesive", max_name="Most cohesive"
        )
        
    # same plot as the methods one, except now cohesion for each party on partisan score
    distributions_partisan_cohesion = {}#{state:{} for method in methods}

    for state in dfstv.state.unique():
        d = {'party': [party_names[party] for party in parties]}
        for party in parties:
            vals =distributions_cohesion_party[party][state].loc['Median']
            for i in vals.index:
                d[i] = d.get(i,[]) + [vals[i]]
        distributions_partisan_cohesion[state] = pd.DataFrame(d).set_index('party')
    return distributions_partisan_cohesion

def plot_cohesion(dfstv = dfs['stv'], coltemplate = "cohesion_partisan_score_{}"
                  , cohesionlabel = 'Coalition diversity: Partisan'
                  , bbox_to_anchor=(.6,1), ax = None, do_legend = True):
    distributions_partisan_cohesion= get_cohesion_df(dfstv = dfstv, coltemplate = coltemplate)
    _ = plot_all_state_distribution_generic(
        distributions_partisan_cohesion, prop_val=None, do_vertical_integers=False
        , ymin=None, ymax=None, xbins=None, legend=do_legend, xlabel=cohesionlabel, do_zoom = False,
        bbox_to_anchor=bbox_to_anchor,do_broken_axes = False, set_ylim = False, 
        loc="upper left", party_colors = True, ax = ax
    )
    # saveimage('prop_gap', extension = 'pdf')

In [None]:
for party in parties:
    for ddd in [dfs['stv'], dfstvpartisan]:
        for cohesion in ["partisan_score", "education", "income"]:
            print(cohesion, party)
            col = "cohesion_{}_{}".format(cohesion, party)
            ddd.loc[:,col] = (-ddd.loc[:,col]).apply(np.sqrt)
        
        for cohesion in ["geographic"]:
            print(cohesion, party)
            col = "cohesion_{}_{}".format(cohesion, party)
            ddd.loc[:,col] = -ddd.loc[:,col]/1000
# dfs['stv']

In [None]:
plot_cohesion(dfstv = dfs['stv'], coltemplate = "cohesion_partisan_score_{}")

In [None]:
plot_cohesion(dfstv = dfs['stv'], coltemplate = "cohesion_geographic_{}", cohesionlabel = 'Coalition diversity: Geographic')

Save plot when ranking second by partisan

In [None]:
fig = plt.figure(figsize = (12, 5))
ax = plt.subplot(1,2, 1)
plot_cohesion(dfstv = dfstvpartisan, coltemplate = "cohesion_partisan_score_{}"
              , ax = ax, cohesionlabel = 'Coalition diversity', do_legend = False)
ax.set_title('Partisan diversity', fontsize = 20)
ax = plt.subplot(1,2, 2)
plot_cohesion(dfstv = dfstvpartisan, coltemplate = "cohesion_geographic_{}"
              , cohesionlabel = 'Coalition diversity (km)', ax = ax)
ax.set_title('Geographic diversity', fontsize = 20)
saveimage('cohesion_whenrankpartisan_both', extension = 'pdf')

Save plot when ranking by goegraphy

In [None]:
fig = plt.figure(figsize = (12, 5))
ax = plt.subplot(1,2, 1)
plot_cohesion(dfstv = dfs['stv'], coltemplate = "cohesion_partisan_score_{}"
              , ax = ax, cohesionlabel = 'Coalition diversity', do_legend = False)
ax.set_title('Partisan diversity', fontsize = 20)
ax = plt.subplot(1,2, 2)
plot_cohesion(dfstv = dfs['stv'], coltemplate = "cohesion_geographic_{}"
              , cohesionlabel = 'Coalition diversity (km)', ax = ax)
ax.set_title('Geographic diversity', fontsize = 20)
saveimage('cohesion_whenrankgeog_both', extension = 'pdf')

## Use medians -- intra-party winner diversity

In [None]:
def deal_with_medians(df):
    df.loc[:,'medians'] = df.medians.apply(eval)
    return df

def add_intraparty_variances(df):
    def fun_rep (x):
        part = [y for y in x if y <=50]
        if len(part) == 0: return np.nan
        return np.std(part)
    def fun_dem (x):
        part = [y for y in x if y >50]
        if len(part) == 0: return np.nan
        return np.std(part)
    
    df['Republican_variance'] = df.medians.apply(fun_rep)
    df['Democrat_variance'] = df.medians.apply(fun_dem)    
    return df

In [None]:
dfstvpartisan = deal_with_medians(dfstvpartisan)

In [None]:
dfstvpartisan = add_intraparty_variances(dfstvpartisan)

In [None]:
plot_cohesion(dfstv = dfstvpartisan, coltemplate = "{}_variance", bbox_to_anchor=(.6,1)
              , cohesionlabel = 'Intra-Party diversity: Partisan')
saveimage('winnerdiversity_partisan_whenrankpartisan', extension = 'pdf')

In [None]:
dfs['stv'] = deal_with_medians(dfs['stv'])
dfs['stv'] = add_intraparty_variances(dfs['stv'])

In [None]:
plot_cohesion(dfstv = dfs['stv'], coltemplate = "{}_variance", bbox_to_anchor=(.4,.8)
             , cohesionlabel = 'Intra-Party diversity: Partisan')
saveimage('winnerdiversity_partisan_whenrankgeo', extension = 'pdf')