In [1]:
import os
import pandas as pd

import sys; sys.path.append("..") # Adds parent directory to python modules path.
from topdown_parsers import *

import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import geopandas as gpd
from sklearn.linear_model import LinearRegression

from ER_noise import *

In [2]:
csv_dir_without_hhs = "er_csvs/new_runs_without_hhs"
csv_dir_with_hhs = "er_csvs/new_runs_with_hhs"
runoff_filepath = "reconstructed_dallas_precicnts_w_elects.csv"

epsilon_values = ["0.25", "0.5", "1", "2"]
epsilon_splits = ["equal", "bottom-heavy", "mid-heavy", "top-heavy"]

In [3]:
def label_split_and_budget(df, eps_split, eps_budget):
    """
    """
    if eps_split == "mid":
        df["split"] = "mid-heavy"
    elif eps_split == "top":
        df["split"] = "top-heavy"
    elif eps_split == "bottom":
        df["split"] = "bottom-heavy"
    elif eps_split == "eq":
        df["split"] = "equal"
    else:
        raise ValueError("Split value is {}, but was expecting one of [equal, top, mid, heavy]".format(eps_split))
        
    if eps_budget == "0pt25":
        df["epsilon"] = "0.25"
    elif eps_budget == "0pt5":
        df["epsilon"] = "0.5"
    elif eps_budget == "1":
        df["epsilon"] = "1"
    elif eps_budget == "2":
        df["epsilon"] = "2"
    else:
        raise ValueError("Budget value is {}, but was expecting one of [0pt25, 0pt5, 1, 2]".format(eps_budget))
        
    return df
    
def add_runoff(runoff_filepath, df, precinct_col="Precinct"):
    """ Adds the runoff vote data in runoff_filepath to `df`.
    """
    runoff = pd.read_csv(runoff_filepath)
    runoff = runoff[runoff["CNTYVTD"].isin(df[precinct_col])] # only keeping dallas
    
    df = df.merge(runoff, how="left", left_on="Precinct", right_on="CNTYVTD")
    return df
    
def combine_csvs(csv_dir, runoff_filepath, with_hh):
    """ Combines all the csvs in `csv_dir`, labels their budget and split, and then 
        merges the `runoff_filepath` file to it to combine vote data.
        `with_hh` is a Bool that is True if the csvs are runs with Household Constraints.
    """
    dfs = []
    
    for root, dirs, files in os.walk(csv_dir):
        for file in files:
            
            if os.path.splitext(file)[1] != ".csv":
                continue
            
            if with_hh: 
                eps_split = file[:-4].split("_")[3]
                eps_budget = file[:-4].split("_")[4]
            else:
                eps_split = file[:-4].split("_")[2]
                eps_budget = file[:-4].split("_")[3]
            
            df = pd.read_csv(os.path.join(root, file))
            df = label_split_and_budget(df, eps_split, eps_budget)
            df = add_runoff(runoff_filepath, df)
            
            dfs.append(df)
    
    main_df = pd.concat(dfs)
    
    return main_df

def plot_er_and_point_estimates(df, epsilon_values, epsilon_splits, race, with_hh=False):
    """
    """
    save_hh_str = "with_hh" if with_hh else "without_hh"
    title_hh_str = "with HH constraints" if with_hh else "without HH constraints"

    # configs
    settings = ["weight", "filter", "no_filter"]
    for sett in settings:
        if sett == "weight":
            title_sett_str = "weighted"; save_sett_str = "weighted"; weight = True; filt = False
        elif sett == "filter":
            title_sett_str = "filter at thresh=10"; save_sett_str = "filt_10"; weight = False; filt = True
        else:
            title_sett_str = "no_filter"; save_sett_str = "no_filt"; weight = False; filt = False

        # ER
        fig, axs = plot_elect_grid(epsilon_values, 
                                   epsilon_splits, 
                                   df, 
                                   "Valdez", 
                                   race, 
                                   "D_18R_Governor_pct", 
                                   "18R_Governor_D_tot", 
                                   figsize=(20,20), 
                                   filt=filt, 
                                   weight=weight, 
                                   n_samps=20, 
                                   title="ER: Votes for Valdez: TX Statewide 2018 Dem Runoff Governor | TopDown Noise with {}, {}".format(title_sett_str, title_hh_str)
                                  )
        plt.savefig("TopDown_{}_{}_{}_er_Valdez.png".format(race, 
                                                            save_sett_str, 
                                                            save_hh_str), 
                    dpi=300)

        # Point estimates
        fig, axs = plot_point_estimate_grid(epsilon_values, 
                                            epsilon_splits, 
                                            df, 
                                            "Valdez", 
                                            race, 
                                            "2018 Dem Governor Runoff", 
                                            "D_18R_Governor_pct", 
                                            "18R_Governor_D_tot", 
                                            20, 
                                            figsize=(20,20), 
                                            filt=filt, 
                                            weight=weight, 
                                            title="Point Estimates: Votes for Valdez: TX Statewide 2018 Dem Runoff Governor | TopDown Noise with {}, {}".format(title_sett_str, title_hh_str),
                                            x_lims=(0,1)
                                           )
        plt.savefig("TopDown_{}_{}_{}_point_estimates_Valdez.png".format(race, 
                                                                         save_sett_str, 
                                                                         save_hh_str), 
                    dpi=300)

        plt.close(fig="all")

In [4]:
df_without_hhs = combine_csvs(csv_dir_without_hhs, runoff_filepath, False)
df_with_hhs = combine_csvs(csv_dir_with_hhs, runoff_filepath, True)

plot_er_and_point_estimates(df_without_hhs, epsilon_values, epsilon_splits, "HVAP", with_hh=False)
plot_er_and_point_estimates(df_without_hhs, epsilon_values, epsilon_splits, "BVAP", with_hh=False)
plot_er_and_point_estimates(df_without_hhs, epsilon_values, epsilon_splits, "WVAP", with_hh=False)

plot_er_and_point_estimates(df_with_hhs, epsilon_values, epsilon_splits, "HVAP", with_hh=True)
plot_er_and_point_estimates(df_with_hhs, epsilon_values, epsilon_splits, "BVAP", with_hh=True)
plot_er_and_point_estimates(df_with_hhs, epsilon_values, epsilon_splits, "WVAP", with_hh=True)

In [7]:
df_with_hhs[(df_with_hhs["epsilon"] == "1") ]

Unnamed: 0,Precinct,0_HVAP_noise,1_HVAP_noise,2_HVAP_noise,3_HVAP_noise,4_HVAP_noise,5_HVAP_noise,6_HVAP_noise,7_HVAP_noise,8_HVAP_noise,9_HVAP_noise,10_HVAP_noise,11_HVAP_noise,12_HVAP_noise,13_HVAP_noise,14_HVAP_noise,15_HVAP_noise,16_HVAP_noise,17_HVAP_noise,18_HVAP_noise,19_HVAP_noise,0_WVAP_noise,1_WVAP_noise,2_WVAP_noise,3_WVAP_noise,4_WVAP_noise,5_WVAP_noise,6_WVAP_noise,7_WVAP_noise,8_WVAP_noise,9_WVAP_noise,10_WVAP_noise,11_WVAP_noise,12_WVAP_noise,13_WVAP_noise,14_WVAP_noise,15_WVAP_noise,16_WVAP_noise,17_WVAP_noise,18_WVAP_noise,19_WVAP_noise,0_BVAP_noise,1_BVAP_noise,2_BVAP_noise,3_BVAP_noise,4_BVAP_noise,5_BVAP_noise,6_BVAP_noise,7_BVAP_noise,8_BVAP_noise,9_BVAP_noise,10_BVAP_noise,11_BVAP_noise,12_BVAP_noise,13_BVAP_noise,14_BVAP_noise,15_BVAP_noise,16_BVAP_noise,17_BVAP_noise,18_BVAP_noise,19_BVAP_noise,0_VAP_noise,1_VAP_noise,2_VAP_noise,3_VAP_noise,4_VAP_noise,5_VAP_noise,6_VAP_noise,7_VAP_noise,8_VAP_noise,9_VAP_noise,10_VAP_noise,11_VAP_noise,12_VAP_noise,13_VAP_noise,14_VAP_noise,15_VAP_noise,16_VAP_noise,17_VAP_noise,18_VAP_noise,19_VAP_noise,split,epsilon,CNTYVTD,18R_Governor_D_tot,ValdezD_18R_Governor_pct,WhiteD_18R_Governor_pct,TOTPOP,HISP,NH_WHITE,NH_BLACK,NH_AMIN,NH_ASIAN,NH_NHPI,NH_OTHER*,VAP,HVAP,WVAP,BVAP,AMINVAP,ASIANVAP,NHPIVAP,OTHERVAP*,HVAP_pct,BVAP_pct,WVAP_pct
0,1131000,866.0,837.0,904.0,854.0,886.0,915.0,785.0,915.0,868.0,814.0,772.0,863.0,984.0,921.0,903.0,871.0,818.0,819.0,904.0,871.0,261.0,298.0,269.0,268.0,273.0,328.0,313.0,298.0,290.0,278.0,345.0,386.0,348.0,256.0,247.0,311.0,314.0,231.0,275.0,310.0,609.0,707.0,648.0,610.0,699.0,700.0,608.0,697.0,700.0,625.0,615.0,643.0,598.0,634.0,574.0,650.0,673.0,654.0,660.0,608.0,1849.0,1932.0,1906.0,1809.0,2010.0,1978.0,1805.0,2008.0,1925.0,1840.0,1854.0,1990.0,1971.0,1867.0,1804.0,1941.0,1872.0,1831.0,1913.0,1892.0,mid-heavy,1,1131000,12,0.916667,0.083333,2740.0,1604.0,447.0,534.0,7.0,53.0,1.0,94.0,2012.0,1090.0,412.0,417.0,7.0,43.0,1.0,42.0,0.541750,0.207256,0.204771
1,1131001,735.0,736.0,752.0,729.0,735.0,723.0,747.0,707.0,735.0,726.0,720.0,745.0,721.0,739.0,730.0,726.0,724.0,738.0,741.0,735.0,309.0,307.0,318.0,305.0,311.0,308.0,307.0,303.0,298.0,300.0,302.0,315.0,301.0,319.0,308.0,304.0,292.0,299.0,308.0,311.0,396.0,401.0,399.0,394.0,405.0,399.0,396.0,391.0,394.0,403.0,398.0,395.0,402.0,391.0,388.0,405.0,400.0,406.0,400.0,397.0,1518.0,1530.0,1566.0,1511.0,1564.0,1533.0,1543.0,1492.0,1508.0,1549.0,1510.0,1548.0,1530.0,1534.0,1504.0,1517.0,1515.0,1527.0,1516.0,1523.0,mid-heavy,1,1131001,18,1.000000,0.000000,2022.0,1378.0,287.0,297.0,19.0,38.0,0.0,3.0,1486.0,984.0,266.0,188.0,12.0,34.0,0.0,2.0,0.662180,0.126514,0.179004
2,1131002,1954.0,1852.0,1898.0,1866.0,1906.0,1855.0,1855.0,1922.0,1861.0,1880.0,1891.0,1990.0,1944.0,1826.0,1769.0,1816.0,1885.0,1962.0,1912.0,1884.0,600.0,585.0,490.0,539.0,549.0,514.0,533.0,544.0,477.0,586.0,525.0,570.0,541.0,507.0,569.0,512.0,548.0,539.0,458.0,499.0,902.0,936.0,882.0,884.0,892.0,896.0,841.0,898.0,793.0,872.0,973.0,858.0,867.0,897.0,963.0,807.0,865.0,946.0,864.0,824.0,3599.0,3539.0,3400.0,3385.0,3479.0,3373.0,3385.0,3543.0,3314.0,3451.0,3550.0,3508.0,3451.0,3307.0,3436.0,3247.0,3460.0,3599.0,3348.0,3314.0,mid-heavy,1,1131002,28,0.785714,0.214286,4872.0,3241.0,896.0,596.0,5.0,100.0,2.0,32.0,3488.0,2215.0,741.0,433.0,4.0,64.0,2.0,29.0,0.635034,0.124140,0.212443
3,1131003,552.0,587.0,541.0,536.0,539.0,573.0,531.0,567.0,570.0,564.0,590.0,521.0,530.0,618.0,548.0,535.0,535.0,563.0,563.0,590.0,722.0,694.0,704.0,690.0,709.0,777.0,720.0,754.0,690.0,838.0,716.0,739.0,704.0,689.0,699.0,733.0,718.0,701.0,709.0,713.0,2363.0,2381.0,2350.0,2359.0,2359.0,2366.0,2371.0,2358.0,2350.0,2399.0,2365.0,2395.0,2398.0,2366.0,2376.0,2356.0,2369.0,2366.0,2371.0,2349.0,3961.0,4019.0,3907.0,3942.0,3906.0,4039.0,3983.0,3990.0,3899.0,4156.0,4011.0,3997.0,3981.0,3980.0,3983.0,3961.0,4024.0,4061.0,4005.0,4007.0,mid-heavy,1,1131003,260,0.834615,0.165385,5037.0,1388.0,663.0,2757.0,9.0,160.0,1.0,59.0,4073.0,1013.0,549.0,2329.0,9.0,135.0,1.0,37.0,0.248711,0.571814,0.134790
4,1131004,244.0,258.0,241.0,233.0,234.0,262.0,219.0,195.0,252.0,268.0,242.0,222.0,234.0,293.0,261.0,175.0,208.0,234.0,250.0,243.0,1089.0,1058.0,1060.0,1086.0,1090.0,1069.0,1059.0,1104.0,1084.0,1085.0,1085.0,1057.0,1119.0,1092.0,1095.0,1083.0,1063.0,1050.0,1064.0,1069.0,407.0,350.0,374.0,348.0,388.0,405.0,415.0,384.0,366.0,390.0,405.0,325.0,376.0,397.0,384.0,393.0,439.0,403.0,389.0,446.0,1843.0,1781.0,1771.0,1776.0,1800.0,1850.0,1778.0,1762.0,1824.0,1828.0,1837.0,1724.0,1822.0,1850.0,1833.0,1779.0,1819.0,1776.0,1818.0,1832.0,mid-heavy,1,1131004,99,0.363636,0.636364,2022.0,547.0,1185.0,209.0,6.0,62.0,0.0,13.0,1650.0,368.0,1037.0,177.0,5.0,53.0,0.0,10.0,0.223030,0.107273,0.628485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
822,1134660,0.0,33.0,45.0,32.0,74.0,38.0,40.0,49.0,79.0,40.0,27.0,47.0,41.0,48.0,10.0,53.0,17.0,36.0,23.0,33.0,13.0,61.0,18.0,0.0,25.0,13.0,17.0,19.0,21.0,24.0,0.0,10.0,0.0,32.0,5.0,20.0,14.0,10.0,21.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,26.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,13.0,94.0,63.0,32.0,99.0,51.0,60.0,68.0,107.0,64.0,27.0,83.0,41.0,80.0,15.0,73.0,31.0,48.0,44.0,64.0,equal,1,1134660,0,0.000000,0.000000,84.0,49.0,35.0,0.0,0.0,0.0,0.0,0.0,79.0,44.0,35.0,0.0,0.0,0.0,0.0,0.0,0.556962,0.000000,0.443038
823,1134661,1717.0,1784.0,1738.0,1706.0,1733.0,1766.0,1679.0,1736.0,1704.0,1796.0,1673.0,1677.0,1762.0,1602.0,1808.0,1759.0,1722.0,1727.0,1742.0,1621.0,370.0,249.0,284.0,269.0,316.0,254.0,297.0,292.0,314.0,331.0,301.0,264.0,275.0,320.0,369.0,324.0,250.0,283.0,253.0,229.0,64.0,122.0,60.0,53.0,71.0,118.0,77.0,37.0,52.0,56.0,84.0,33.0,85.0,41.0,54.0,74.0,56.0,92.0,39.0,80.0,2211.0,2238.0,2132.0,2082.0,2162.0,2197.0,2149.0,2119.0,2147.0,2281.0,2173.0,2033.0,2176.0,2051.0,2309.0,2208.0,2143.0,2161.0,2086.0,2000.0,equal,1,1134661,19,0.684211,0.315789,4074.0,3050.0,869.0,88.0,5.0,57.0,0.0,5.0,3175.0,2291.0,765.0,67.0,3.0,46.0,0.0,3.0,0.721575,0.021102,0.240945
824,1134662,360.0,392.0,403.0,322.0,304.0,328.0,382.0,595.0,399.0,370.0,302.0,297.0,379.0,410.0,324.0,391.0,370.0,363.0,298.0,431.0,221.0,241.0,204.0,145.0,153.0,144.0,145.0,165.0,152.0,145.0,224.0,235.0,159.0,191.0,140.0,160.0,121.0,118.0,151.0,124.0,0.0,0.0,1.0,19.0,0.0,9.0,21.0,17.0,1.0,1.0,26.0,39.0,12.0,22.0,16.0,31.0,35.0,28.0,32.0,35.0,588.0,666.0,608.0,489.0,487.0,499.0,560.0,798.0,574.0,557.0,573.0,624.0,562.0,627.0,518.0,621.0,529.0,528.0,491.0,607.0,equal,1,1134662,8,0.750000,0.250000,703.0,451.0,224.0,9.0,0.0,19.0,0.0,0.0,567.0,349.0,194.0,8.0,0.0,16.0,0.0,0.0,0.615520,0.014109,0.342152
825,1134663,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,equal,1,1134663,0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
