In [1]:
import zipfile
import os
import pandas as pd
import numpy as np
from functools import reduce

import sys; sys.path.append("..") # Adds parent directory to python modules path.
from topdown_parsers import *

*** Race Key ***

    White: 1
    Black: 2
    American Indian or Alaska native: 3
    Asian : 4
    Hawaiian : 5
    Other, Multiracial: 6

In [2]:
with_hh_dirname = "./with_hhs/"
without_hh_dirname = "./without_hhs/"
dallas_filename = "DALLAS.dat"
precinct_assignments_fp = "block_prec_assign.csv"
state_id = 48

In [3]:
def get_precinct_assignments(precinct_assignments_fp):
    """ Reads `precinct_assignments_fp` as Pandas DataFrame, cleans it and returns it.
    """
    precinct_assignments = pd.read_csv(precinct_assignments_fp)
    precinct_assignments.columns = ["GEOID10", "Precinct"]
    precinct_assignments["GEOID10"] = precinct_assignments["GEOID10"].astype(str)
    return precinct_assignments

def extract_from_zip(zipfile_fp, 
                     destination_fp):
    """ Unzips the file `zipfile_fp` at the filepath location `destination_fp`.
    """
    print("Extracting {}".format(zipfile_fp))
    
    with zipfile.ZipFile(zipfile_fp, 'r') as zip_ref:
        zip_ref.extractall(destination_fp)
        
def clean_df(df):
    """ Some simple cleanups on `df` to ready it to make ER csvs.
    """
    df["Enumdist"] = df["Enumdist"].astype(str).str.pad(width=11, side='left', fillchar='0')
    df["County"] = df["County"].astype(str).str.pad(width=3, side='left', fillchar='0')
    df["GEOID10"] = df["State"].astype(str) + df["County"] + df["Enumdist"]
    df["GEOID10"] = df["GEOID10"].str[:11] + df["GEOID10"].str[-4:]
    df = df.drop(columns=["State"])
    df = df.fillna(0)
    
    return df

def rename_cols(df, string):
    """ Renames each column in `df` named Run_x as (x-1)_`string`_noise. 
        Eg if string is `HVAP` and a column is called Run_1, it would be renamed as 
        "0_HVAP_noise".
    """
    df = df.fillna(0)
    for col_name in df.columns:
        if col_name[:3] == "Run":
            new_name = str(int(col_name[4:]) - 1) + "_{}_noise".format(string)
            df = df.rename(columns={col_name: new_name})
    return df

def build_er_df(dir_name, state_id, filename, precinct_assignments_fp):
    """ Builds a CSV that can be fed into plot_elect_grid() to easily make ER plots.
        
        Args:
            dir_name (str) : Filepath where `filename` exists.
            state_id (int) : FIPS code of state the runs were run on.
            filename (str) : Name of MDF file in `dir_name` to open.
            precinct_assignments_fp (str) : Filepath to where the precinct_assignments file is.
    """
    hvap = collect_by_enumdist(dir_name, state_id, filename, hisp=True, vap=True)
    wvap = collect_by_enumdist(dir_name, state_id, filename, race=1, vap=True)
    bvap = collect_by_enumdist(dir_name, state_id, filename, race=2, vap=True)
    vap = collect_by_enumdist(dir_name, state_id, filename, vap=True)
    
    # clean
    hvap = clean_df(hvap)
    wvap = clean_df(wvap)
    bvap = clean_df(bvap)
    vap = clean_df(vap)
    
    # merge with precincts
    precinct_assignments = get_precinct_assignments(precinct_assignments_fp)
    
    hvap = hvap.merge(precinct_assignments, on="GEOID10", how="outer")
    wvap = wvap.merge(precinct_assignments, on="GEOID10", how="outer")
    bvap = bvap.merge(precinct_assignments, on="GEOID10", how="outer")
    vap = vap.merge(precinct_assignments, on="GEOID10", how="outer")
    
    # rename
    hvap= rename_cols(hvap, "HVAP")
    wvap= rename_cols(wvap, "WVAP")
    bvap= rename_cols(bvap, "BVAP")
    vap= rename_cols(vap, "VAP")
    
    # groupby Precincts
    hvap = hvap.groupby("Precinct").sum()
    wvap = wvap.groupby("Precinct").sum()
    bvap = bvap.groupby("Precinct").sum()
    vap = vap.groupby("Precinct").sum()
    
    df_merged = reduce(lambda left,right: pd.merge(left, right, how='outer', left_index=True, right_index=True), 
                       [hvap, wvap, bvap, vap])
    return df_merged

def get_pops(dir_name, state_id, filename):
    """ Gets the population from each file `filename` in the "output_" dirs in `dirname`, 
        cleans it and returns it in a Pandas DataFrame.
        State ID is the FIPS code of the state where the runs are run on.
    """
    pops = collect_by_enumdist(dir_name, state_id, filename)
    pops = clean_df(pops)
    return pops

def save_populations_at_block_level(runs_dirname, 
                                    zips_arr, 
                                    dallas_filename,
                                    state_id=48,
                                    save_filename=None):
    """ Save the population at the blocks level for the runs in `zips_arr`.
        
        Args:
            runs_dirname (str) : directory where the files in `zips_arr` are stored.
            zips_arr (list str): list of zip files that contain the runs 
            state_id     (int) : FIPS code of state the runs are from. Defaults to 48 for TX
            dallas_filename (str) : Name of Dallas MDF file in each output dir
            save_filename (str) : Filename to save the populations file at.
    """
    for run in zips_arr:
        filename = run[:-8]
        extract_from_zip(runs_dirname + run, runs_dirname)               
        tot_pops = get_pops(runs_dirname + runs_dirname + run[:-4], state_id, dallas_filename)
        tot_pops.to_csv(filename + "_block_pops.csv")
        os.system("rm -r " + runs_dirname + runs_dirname)
        
def build_csvs(runs_dirname, dallas_filename, precinct_assignments_fp, state_id):
    """
    """
    for root, dirs, files in os.walk(runs_dirname):
        for file in files:
            if file[-4:] == ".zip":
                texas_fp = root + file
                dirname = texas_fp[:-4]
                extract_from_zip(texas_fp, root)               
                df = build_er_df(runs_dirname, state_id, dallas_filename, precinct_assignments_fp)
                save_filename = file[:-8]
                df.to_csv(save_filename + ".csv")
                print("Deleting {}".format(runs_dirname + runs_dirname[:-1]))
                os.system("rm -r " + runs_dirname + runs_dirname[:-1])
                print()

In [5]:
build_csvs(with_hh_dirname, dallas_filename, precinct_assignments_fp, state_id)

Extracting ./with_hhs/TEXAS_STUB_HH_eq_2.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_mid_1.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_eq_0pt25.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_bottom_0pt25.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_eq_1.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_mid_2.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_top_0pt5.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_mid_0pt5.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_top_0pt25.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_top_2.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_eq_0pt5.ini.zip
Deleting ./with_hhs/./with_hhs

Extracting ./with_hhs/TEXAS_STUB_HH_bottom_0pt5.ini.zip
Deleting ./with_hhs/./

In [6]:
build_csvs(without_hh_dirname, dallas_filename, precinct_assignments_fp, state_id)

Extracting ./without_hhs/TEXAS_STUB_bottom_0pt25.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_eq_0pt5.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_top_0pt25.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_eq_1.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_top_1.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_top_0pt5.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_mid_0pt5.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_eq_2.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_top_2.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_mid_0pt25.ini.zip
Deleting ./without_hhs/./without_hhs

Extracting ./without_hhs/TEXAS_STUB_eq_0pt25.ini.zip
Deleting ./without_hhs/./without_hhs

Extracti

In [22]:
runs_with_hhs = ['TEXAS_STUB_HH_mid_1.ini.zip',
                 'TEXAS_STUB_HH_eq_1.ini.zip',
                 'TEXAS_STUB_HH_top_1.ini.zip']

save_populations_at_block_level("with_hhs/", runs_with_hhs, dallas_filename)

Extracting with_hhs/TEXAS_STUB_HH_mid_1.ini.zip
Extracting with_hhs/TEXAS_STUB_HH_eq_1.ini.zip
Extracting with_hhs/TEXAS_STUB_HH_top_1.ini.zip


In [7]:
remaining_runs_dir = "./remaining_runs/"
build_csvs(remaining_runs_dir, dallas_filename, precinct_assignments_fp, state_id)

Extracting ./remaining_runs/TEXAS_STUB_HH_bottom_2.ini.zip
Deleting ./remaining_runs/./remaining_runs

Extracting ./remaining_runs/TEXAS_STUB_HH_bottom_1.ini.zip
Deleting ./remaining_runs/./remaining_runs

Extracting ./remaining_runs/TEXAS_STUB_bottom_1.ini.zip
Deleting ./remaining_runs/./remaining_runs

Extracting ./remaining_runs/TEXAS_STUB_bottom_2.ini.zip
Deleting ./remaining_runs/./remaining_runs



In [19]:
runs_without_hhs = ['TEXAS_STUB_HH_bottom_1.ini.zip',
                     'TEXAS_STUB_eq_1_eps_100.ini.zip',
                     'TEXAS_STUB_eq_1_not_detailed.ini.zip',
                     'TEXAS_STUB_bottom_1.ini.zip',
                     'TEXAS_STUB_mid_1_bg_weighted.ini.zip']
                     
save_populations_at_block_level("./remaining_runs/", runs_without_hhs, dallas_filename)

Extracting ./remaining_runs/TEXAS_STUB_HH_bottom_1.ini.zip
Extracting ./remaining_runs/TEXAS_STUB_eq_1_eps_100.ini.zip
Extracting ./remaining_runs/TEXAS_STUB_eq_1_not_detailed.ini.zip
Extracting ./remaining_runs/TEXAS_STUB_bottom_1.ini.zip
Extracting ./remaining_runs/TEXAS_STUB_mid_1_bg_weighted.ini.zip


In [21]:
os.listdir("./with_hhs")

['TEXAS_STUB_HH_eq_2.ini.zip',
 '.DS_Store',
 'TEXAS_STUB_HH_bottom_2.ini.zip',
 'TEXAS_STUB_HH_mid_1.ini.zip',
 'TEXAS_STUB_HH_eq_0pt25.ini.zip',
 'TEXAS_STUB_HH_bottom_0pt25.ini.zip',
 'TEXAS_STUB_HH_eq_1.ini.zip',
 'TEXAS_STUB_HH_mid_2.ini.zip',
 'TEXAS_STUB_HH_top_0pt5.ini.zip',
 'TEXAS_STUB_HH_mid_0pt5.ini.zip',
 'TEXAS_STUB_HH_top_0pt25.ini.zip',
 'TEXAS_STUB_HH_top_2.ini.zip',
 'TEXAS_STUB_HH_eq_0pt5.ini.zip',
 'TEXAS_STUB_HH_bottom_0pt5.ini.zip',
 'TEXAS_STUB_HH_mid_0pt25.ini.zip',
 'TEXAS_STUB_HH_top_1.ini.zip']