In [1]:
import os
import pandas as pd
import maup
import geopandas as gp

import sys; sys.path.append("..") # Adds parent directory to python modules path.
from topdown_parsers import *

In [2]:
def modify_race(race):
    """
    """
    if race == "w":
        return 1
    elif race == "b":
        return 2
    elif race == "i":
        return 3
    elif race == "a":
        return 4
    elif race == "h":
        return 5
    else:
        return 6

def compute_pops_and_vaps(df, groupby_cols):
    # make bool columns for the counts
    df["race"] = df["race"].map(lambda race: modify_race(race))

    # sex
    df["male"] = df["sex"] == 0
    df["female"] = df["sex"] == 1

    # voting age
    df["VAP"] = df["age"] >= 18

    df = df.rename(columns={"ethn": "HISP"})

    # ethnicity vap
    df["HVAP"] = (df["HISP"] == 1) & (df["VAP"] == True)

    # race pops
    df["NH_WHITEPOP"] = (df["race"] == 1) &  (df["HISP"] != 1)
    df["NH_BLACKPOP"] = (df["race"] == 2) & (df["HISP"] != 1)
    df["NH_AMINPOP"] = (df["race"] == 3) & (df["HISP"] != 1)
    df["NH_ASIANPOP"] = (df["race"] == 4) & (df["HISP"] != 1)
    df["NH_HAWAIIANPOP"] = (df["race"] == 5) & (df["HISP"] != 1)
    df["NH_OTHERPOP"] = (df["race"] == 6) & (df["HISP"] != 1)

    # race vaps
    df["NH_WVAP"] = (df["race"] == 1) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_BVAP"] = (df["race"] == 2) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_AMINVAP"] = (df["race"] == 3) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_ASIANVAP"] = (df["race"] == 4) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_HAWAIIANVAP"] = (df["race"] == 5) & (df["VAP"] == True) & (df["HISP"] != 1)
    df["NH_OTHERVAP"] = (df["race"] == 6) & (df["VAP"] == True) & (df["HISP"] != 1)
    
    df["TOTPOP"] = 1

    df = df.drop(columns=["sex", "age", "GEOID", "race", "sol"])
    df = df.groupby(groupby_cols).sum().reset_index()
    
    return df

In [None]:
# do it for all of Texas
texas_df = read_and_process_reconstructed_csvs("./Reconstructed")
print("Computing pops and vaps")
texas_df = compute_pops_and_vaps(texas_df, ["state", "county"])
texas_df

In [None]:
# do dallas at the block level
dallas_df = read_and_process_reconstructed_csvs("./Reconstructed/Texas_Dallas County")
dallas_df = compute_pops_and_vaps(dallas_df, ["state", "county", "tract", "bg", "block"])
dallas_df

In [None]:
texas_df.to_csv("texas_reconstructions.csv", index=False)
dallas_df.to_csv("dallas_reconstructions.csv", index=False)

In [3]:
all_dirs = []
for root, dirs, files in os.walk("./Reconstructed"):
    for d in dirs:
        all_dirs.append(os.path.join(root, d))
        
for index, d in enumerate(sorted(all_dirs)):
    county_name = d[:-7].split("_")[1]
    print()
    print(county_name)
    print("County {} of {}".format(index, len(all_dirs)))
    convert_reconstructions_to_ipums(d, county_name + ".dat", break_size=1000)


Anderson
County 0 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1604.
Writing block 1000 of 1604.

Andrews
County 1 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 543.

Angelina
County 2 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Bui

Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 4891.
Writing block 1000 of 4891.
Writing block 2000 of 4891.
Writing block 3000 of 4891.
Writing block 4000 of 4891.

Brazos
County 20 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 2396.
Writing block 1000 of 2396.
Writing block 2000 of 2396.

Brewster
County 21 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data a


Coleman
County 41 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1003.
Writing block 1000 of 1003.

Collin
County 42 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 9604.
Writing block 1000 of 9604.
Writing block 2000 of 9604.
Writing block 3000 of 9604.
Writing block 4000 of 9604.
Writing block 5000 of 9604.
Writing block 6000 of 9604.
Writing block 7000 of 9604.
Writing block 8000 of 9604.


Delta
County 60 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 407.

Denton
County 61 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 8362.
Writing block 1000 of 8362.
Writing block 2000 of 8362.
Writing block 3000 of 8362.
Writing block 4000 of 8362.
Writing block 5000 of 8362.
Writing block 6000 of 8362.
Writing block 7000 of 8362.
Writing block 8000 of 8362.

Dickens
County 62 of 254
Read


Freestone
County 80 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 962.

Frio
County 81 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 760.

Gaines
County 82 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done wit

Finished grouping the data.
Writing block 0 of 39294.
Writing block 1000 of 39294.
Writing block 2000 of 39294.
Writing block 3000 of 39294.
Writing block 4000 of 39294.
Writing block 5000 of 39294.
Writing block 6000 of 39294.
Writing block 7000 of 39294.
Writing block 8000 of 39294.
Writing block 9000 of 39294.
Writing block 10000 of 39294.
Writing block 11000 of 39294.
Writing block 12000 of 39294.
Writing block 13000 of 39294.
Writing block 14000 of 39294.
Writing block 15000 of 39294.
Writing block 16000 of 39294.
Writing block 17000 of 39294.
Writing block 18000 of 39294.
Writing block 19000 of 39294.
Writing block 20000 of 39294.
Writing block 21000 of 39294.
Writing block 22000 of 39294.
Writing block 23000 of 39294.
Writing block 24000 of 39294.
Writing block 25000 of 39294.
Writing block 26000 of 39294.
Writing block 27000 of 39294.
Writing block 28000 of 39294.
Writing block 29000 of 39294.
Writing block 30000 of 39294.
Writing block 31000 of 39294.
Writing block 32000 of 39


Jack
County 118 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 583.

Jackson
County 119 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 775.

Jasper
County 120 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done wi


Lamar
County 139 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1583.
Writing block 1000 of 1583.

Lamb
County 140 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1190.
Writing block 1000 of 1190.

Lampasas
County 141 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips code


McLennan
County 160 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 5115.
Writing block 1000 of 5115.
Writing block 2000 of 5115.
Writing block 3000 of 5115.
Writing block 4000 of 5115.
Writing block 5000 of 5115.

McMullen
County 161 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 103.

Medina
County 162 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processin

Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1737.
Writing block 1000 of 1737.

Palo Pinto
County 181 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 1499.
Writing block 1000 of 1499.

Panola
County 182 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished gro


San Augustine
County 202 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 475.

San Jacinto
County 203 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 811.

San Patricio
County 204 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enum


Terry
County 222 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 700.

Throckmorton
County 223 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 199.

Titus
County 224 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Do

Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 2769.
Writing block 1000 of 2769.
Writing block 2000 of 2769.

Wilbarger
County 243 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enumdist column
Done with reading and preparing data.
Grouping the data at a block level...
Finished grouping the data.
Writing block 0 of 717.

Willacy
County 244 of 254
Reading files...
Duplicating rows... (based on reconstructed solutions)
Processing Geo IDs
Splitting into geounits...
Cleaning up the geounits...
Converting to fips codes and paddings them up...
Building Enum