## data
### for grabbing / processing data

In [1]:
from evaltools.data import *
import pandas as pd
import us

### census
Uses the US Census Bureau's API to retrieve 2020 Decennnial Census PL 94-171 data at the stated geometry level. The five tables are
 * P1: Race
 * P2: Hispanic or Latino, and Not Hispanic or Latino by Race
 * P3: Race for the Population 18 Years and Over (Race by VAP)
 * P4: Hispanic or Latino, and Not Hispanic or Latino by Race for the Population 18 Years and Over
 * P5: Group Quarters Population by Major Group Quarters Type

In [2]:
%%time
df = census(us.states.MA, 
            table="P3", # Table from which we retrieve data, defaults to "P1"
            columns={}, # mapping Census column names from the table to human-readable names, if desired
            geometry="tract", # data granularity, one of "block" (default), "block group", or "tract"
           )

JSONDecodeError: [Errno Expecting value] 
<html>
    <head>
        <title>Invalid Key</title>
    </head>
    <body>
        <p>
            A valid <em>key</em> must be included with each data API request.
            You included a key with this request, however, it is not valid.
            Please check your key and try again.
        </p>
        <p>
            If you do not have a key you my sign up for one <a href="key_signup.html">here</a>.
        </p>
    </body>
</html>
: 1

In [3]:
# The `variables()` function produces the default mapping that `census()` uses 
# to map Census column-names to human-readable ones
mapping = variables("P4")

### acs5
Uses the US Census Bureau's API to retrieve 5-year population estimates from the American Community Survey (ACS) for the provided state, geometry level, and year.

In [4]:
%%time
acs5_df = acs5(us.states.MA,
          geometry="tract", # data granularity, either "tract" (default) or "block group"
          year=2019, # Year for which data is retrieved. Defaults to 2019, i.e. 2015-19 ACS 5-year
         )

CPU times: user 485 ms, sys: 58.9 ms, total: 543 ms
Wall time: 10 s


### cvap
Uses the US Census Bureau's API to retrieve the 2019 5-year CVAP (Citizen Voting Age Population) data for the provided state at the specified geometry. Please note that the geometries are from the **2010 Census**.

In [5]:
%%time
cvap_df = cvap(us.states.MA,
          geometry="tract", # data granularity, either "tract" (default) or "block group"
         )

CPU times: user 3.17 s, sys: 236 ms, total: 3.41 s
Wall time: 3.42 s


In [6]:
for col in set(cvap_df.columns).intersection(set(acs5_df.columns)):
    if col == "TRACT10":
        continue
    print(f"acs5 {col}: {acs5_df[col].sum()}")
    print(f"cvap {col}: {cvap_df[col].sum()}")

acs5 HCVAP19: 427807
cvap HCVAP19: 427852
acs5 CVAP19: 4992533
cvap CVAP19: 4992540
acs5 NHWCVAP19: 3931402
cvap NHWCVAP19: 3954475


In [7]:
cvap_df.columns

Index(['TRACT10', 'CVAP19', 'CVAP19e', 'NHCVAP19', 'NHCVAP19e', 'NHAICVAP19',
       'NHAICVAP19e', 'NHACVAP19', 'NHACVAP19e', 'NHBCVAP19', 'NHBCVAP19e',
       'NHNHPICVAP19', 'NHNHPICVAP19e', 'NHWCVAP19', 'NHWCVAP19e',
       'NHAIWCVAP19', 'NHAIWCVAP19e', 'NHAWCVAP19', 'NHAWCVAP19e',
       'NHBWCVAP19', 'NHBWCVAP19e', 'NHAIBCVAP19', 'NHAIBCVAP19e',
       'NHOTHCVAP19', 'NHOTHCVAP19e', 'HCVAP19', 'HCVAP19e', 'POCCVAP19'],
      dtype='object')

In [8]:
acs5_df.columns

Index(['TRACT10', 'TOTPOP19', 'WHITE19', 'BLACK19', 'AMIN19', 'ASIAN19',
       'NHPI19', 'OTH19', '2MORE19', 'NHISP19', 'WVAP19', 'BVAP19',
       'AMINVAP19', 'ASIANVAP19', 'NHPIVAP19', 'OTHVAP19', '2MOREVAP19',
       'NHWVAP19', 'HVAP19', 'WCVAP19', 'BCVAP19', 'AMINCVAP19', 'ASIANCVAP19',
       'NHPICVAP19', 'OTHCVAP19', '2MORECVAP19', 'NHWCVAP19', 'HCVAP19',
       'VAP19', 'CVAP19', 'POCVAP19'],
      dtype='object')

In [9]:
acs5_df.WCVAP19.sum()

4183054

In [10]:
cvap_df.NHWCVAP19.sum()

3954475