In [1]:
import os
import pandas as pd
import maup
import geopandas as gp

import sys; sys.path.append("..") # Adds parent directory to python modules path.
from topdown_parsers import *

In [2]:
def modify_race(race):
    """
    """
    if race == "w":
        return 1
    elif race == "b":
        return 2
    elif race == "i":
        return 3
    elif race == "a":
        return 4
    elif race == "h":
        return 5
    else:
        return 6

def compute_pops_and_vaps(df, groupby_cols):
    # make bool columns for the counts
    df["race"] = df["race"].map(lambda race: modify_race(race))

    # sex
    df["male"] = df["sex"] == 0
    df["female"] = df["sex"] == 1

    # voting age
    df["VAP"] = df["age"] >= 18

    df = df.rename(columns={"ethn": "HISP"})

    # ethnicity vap
    df["HVAP"] = (df["HISP"] == 1) & (df["VAP"] == True)

    # race pops
    df["NH_WHITEPOP"] = (df["race"] == 1) &  (df["HISP"] != 1)
    df["NH_BLACKPOP"] = (df["race"] == 2) & (df["HISP"] != 1)
    df["NH_AMINPOP"] = (df["race"] == 3) & (df["HISP"] != 1)
    df["NH_ASIANPOP"] = (df["race"] == 4) & (df["HISP"] != 1)
    df["NH_HAWAIIANPOP"] = (df["race"] == 5) & (df["HISP"] != 1)
    df["NH_OTHERPOP"] = (df["race"] == 6) & (df["HISP"] != 1)

    # race vaps
    df["NH_WVAP"] = (df["race"] == 1) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_BVAP"] = (df["race"] == 2) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_AMINVAP"] = (df["race"] == 3) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_ASIANVAP"] = (df["race"] == 4) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_HAWAIIANVAP"] = (df["race"] == 5) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_OTHERVAP"] = (df["race"] == 6) & (df["VAP"] == True) & (df["HISP"] != 1)
    
    df["TOTPOP"] = 1

    df = df.drop(columns=["sex", "age", "GEOID", "race", "sol"])
    df = df.groupby(groupby_cols).sum().reset_index()
    
    return df

In [None]:
# do it for all of Texas
texas_df = read_and_process_reconstructed_csvs("./Reconstructed")
print("Computing pops and vaps")
texas_df = compute_pops_and_vaps(texas_df, ["state", "county"])
texas_df

In [None]:
# do dallas at the block level
dallas_df = read_and_process_reconstructed_csvs("./Reconstructed/Texas_Dallas County")
dallas_df = compute_pops_and_vaps(dallas_df, ["state", "county", "tract", "bg", "block"])
dallas_df

In [None]:
texas_df.to_csv("texas_reconstructions.csv", index=False)
dallas_df.to_csv("dallas_reconstructions.csv", index=False)

In [3]:
# put all of the county in the same enumdist (except for Dallas)
all_dirs = []
for root, dirs, files in os.walk("./Reconstructed"):
    for d in dirs:
        all_dirs.append(os.path.join(root, d))
        
for index, d in enumerate(sorted(all_dirs)):
    county_name = d[:-7].split("_")[1]
    print()
    print(county_name)
    print("County {} of {}".format(index, len(all_dirs)))
    if county_name == "Dallas":
        continue
    convert_reconstructions_to_ipums_same_block(d, county_name + ".dat", break_size=1000)


Anderson
County 0 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Andrews
County 1 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Angelina
County 2 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Aransas
County 3 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleani


Callahan
County 29 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Cameron
County 30 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Camp
County 31 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Carson
County 32 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleanin


Deaf Smith
County 59 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Delta
County 60 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Denton
County 61 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Dickens
County 62 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Clea


Gonzales
County 88 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Gray
County 89 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Grayson
County 90 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Gregg
County 91 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning


Irion
County 117 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Jack
County 118 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Jackson
County 119 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Jasper
County 120 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleani


Limestone
County 146 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Lipscomb
County 147 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Live Oak
County 148 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Llano
County 149 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits..


Newton
County 175 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Nolan
County 176 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Nueces
County 177 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Ochiltree
County 178 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cl

Building Enumdist column
Done with reading and preparing data.

San Patricio
County 204 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

San Saba
County 205 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Schleicher
County 206 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Scurry
County 207 of 254
Reading files...
Duplicating rows... (based on 

Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Van Zandt
County 233 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Victoria
County 234 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Walker
County 235 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.

Waller
County 236 of 254
Readi