In [1]:
# data science imports
import pandas as pd
import matplotlib.pyplot as plt

# census data imports
import censusdis.data as ced
import censusdis.maps as cem

# other imports
import os

In [24]:
# improves pandas readability
pd.set_option('display.max_columns', None)
pd.set_option("display.max_colwidth", 250)

In [25]:
with open("../census_api_key.txt", "r") as f:
    CENSUS_API_KEY = f.read().strip()

# print(CENSUS_API_KEY)

In [22]:
# grab a list of all the available datasets
df_datasets = ced.variables.all_data_sets()
df_datasets.head()

Unnamed: 0,YEAR,SYMBOL,DATASET,TITLE,DESCRIPTION,API BASE URL
0,1986,CBP,cbp,1986 County Business Patterns: Business Patterns,"County Business Patterns (CBP) is an annual series that provides economic data by industry at the U.S., State, County and Metropolitan Area levels. This series includes the number of establishments, employment during the week of March 12, first quarter payroll, and annual payroll. CBP provides s...",http://api.census.gov/data/1986/cbp
1,1987,CBP,cbp,1987 County Business Patterns: Business Patterns,"County Business Patterns (CBP) is an annual series that provides economic data by industry at the U.S., State, County and Metropolitan Area levels. This series includes the number of establishments, employment during the week of March 12, first quarter payroll, and annual payroll. CBP provides s...",http://api.census.gov/data/1987/cbp
2,1988,CBP,cbp,1988 County Business Patterns: Business Patterns,"County Business Patterns (CBP) is an annual series that provides economic data by industry at the U.S., State, County and Metropolitan Area levels. This series includes the number of establishments, employment during the week of March 12, first quarter payroll, and annual payroll. CBP provides s...",http://api.census.gov/data/1988/cbp
3,1989,CBP,cbp,1989 County Business Patterns: Business Patterns,"County Business Patterns (CBP) is an annual series that provides economic data by industry at the U.S., State, County and Metropolitan Area levels. This series includes the number of establishments, employment during the week of March 12, first quarter payroll, and annual payroll. CBP provides s...",http://api.census.gov/data/1989/cbp
4,1989,CPS_BASIC_APR,cps/basic/apr,Apr 1989 Current Population Survey: Basic Monthly,"To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of Labor Statistics (BLS) to determine the distributio...",http://api.census.gov/data/1989/cps/basic/apr


In [56]:
# you can search the df_datasets dataframe for a specific term
df_datasets.query("DESCRIPTION.str.contains('migration', case=False)").query("YEAR > 2020").head()

Unnamed: 0,YEAR,SYMBOL,DATASET,TITLE,DESCRIPTION,API BASE URL
1444,2021,ACS_FLOWS,acs/flows,American Community Survey: 5-Year Migration Flows,"Migration flows are derived from the relationship between the location of current residence in the American Community Survey (ACS) sample and the responses given to the migration question ""Where did you live 1 year ago?"". There are flow statistic...",http://api.census.gov/data/2021/acs/flows
1447,2021,CPS_BASIC_APR,cps/basic/apr,Current Population Survey: Basic Monthly,"To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of ...",http://api.census.gov/data/2021/cps/basic/apr
1448,2021,CPS_BASIC_AUG,cps/basic/aug,Current Population Survey: Basic Monthly,"To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of ...",http://api.census.gov/data/2021/cps/basic/aug
1449,2021,CPS_BASIC_DEC,cps/basic/dec,Current Population Survey: Basic Monthly,"To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of ...",http://api.census.gov/data/2021/cps/basic/dec
1450,2021,CPS_BASIC_FEB,cps/basic/feb,Current Population Survey: Basic Monthly,"To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of ...",http://api.census.gov/data/2021/cps/basic/feb


In [65]:
# just the acs
df_datasets.query("DATASET.str.contains('acs', case=False)").head()

Unnamed: 0,YEAR,SYMBOL,DATASET,TITLE,DESCRIPTION,API BASE URL
443,2004,ACS1_PUMS,acs/acs1/pums,2004 American Community Survey: 1-Year Estimates - Public Use Microdata Sample,"The American Community Survey (ACS) Public Use Microdata Sample (PUMS) contains a sample of responses to the ACS. The ACS PUMS dataset includes variables for nearly every question on the survey, as well as many new variables that were derived aft...",http://api.census.gov/data/2004/acs/acs1/pums
487,2005,ACS1,acs/acs1,American Community Survey: 1-Year Estimates: Detailed Tables 1-Year,"The American Community Survey (ACS) is an ongoing survey that provides data every year -- giving communities the current information they need to plan investments and services. The ACS covers a broad range of topics about social, economic, demogr...",http://api.census.gov/data/2005/acs/acs1
488,2005,ACS1_PROFILE,acs/acs1/profile,American Community Survey: 1-Year Estimates: Data Profiles 1-Year,"The American Community Survey (ACS) is an ongoing survey that provides data every year -- giving communities the current information they need to plan investments and services. The ACS covers a broad range of topics about social, economic, demogr...",http://api.census.gov/data/2005/acs/acs1/profile
489,2005,ACS1_PUMS,acs/acs1/pums,2005 American Community Survey: 1-Year Estimates - Public Use Microdata Sample,"The American Community Survey (ACS) Public Use Microdata Sample (PUMS) contains a sample of responses to the ACS. The ACS PUMS dataset includes variables for nearly every question on the survey, as well as many new variables that were derived aft...",http://api.census.gov/data/2005/acs/acs1/pums
490,2005,ACS1_PUMSPR,acs/acs1/pumspr,2005 American Community Survey: 1-Year Estimates - Puerto Rico Public Use Microdata Sample,"The Public Use Microdata Sample (PUMS) for Puerto Rico (PR) contains a sample of responses to the Puerto Rico Community Survey (PRCS). The PRCS is similar to, but separate from, the American Community Survey (ACS). The PRCS collects data about th...",http://api.census.gov/data/2005/acs/acs1/pumspr


In [None]:
# grab a list of all the available groups
acs_groups = ced.variables.all_groups('acs/acs1', 2023)
acs_groups

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1,2023,B01001,Sex by Age
1,acs/acs1,2023,B01001A,Sex by Age (White Alone)
2,acs/acs1,2023,B01001B,Sex by Age (Black or African American Alone)
3,acs/acs1,2023,B01001C,Sex by Age (American Indian and Alaska Native Alone)
4,acs/acs1,2023,B01001D,Sex by Age (Asian Alone)
...,...,...,...,...
1427,acs/acs1,2023,C27014,Public Health Insurance by Work Experience
1428,acs/acs1,2023,C27016,Health Insurance Coverage Status by Ratio of Income to Poverty Level in the Past 12 Months by Age
1429,acs/acs1,2023,C27017,Private Health Insurance by Ratio of Income to Poverty Level in the Past 12 Months by Age
1430,acs/acs1,2023,C27018,Public Health Insurance by Ratio of Income to Poverty Level in the Past 12 Months by Age


In [116]:
# you can use this to search
acs_groups.query("DESCRIPTION.str.contains('rent', case=False) & DESCRIPTION.str.contains('dollars', case=False)")

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
136,acs/acs1,2023,B07010,Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Current Residence in the United States
137,acs/acs1,2023,B07010PR,Geographical Mobility in the Past Year by Individual Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Current Residence in Puerto Rico
138,acs/acs1,2023,B07011,Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Current Residence in the United States
139,acs/acs1,2023,B07011PR,Median Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) by Geographical Mobility in the Past Year for Current Residence in Puerto Rico
560,acs/acs1,2023,B19064,"Aggregate Interest, Dividends, or Net Rental Income in the Past 12 Months (in 2023 Inflation-Adjusted Dollars) for Households"
855,acs/acs1,2023,B25057,Lower Contract Rent Quartile (Dollars)
856,acs/acs1,2023,B25058,Median Contract Rent (Dollars)
857,acs/acs1,2023,B25059,Upper Contract Rent Quartile (Dollars)
858,acs/acs1,2023,B25060,Aggregate Contract Rent (Dollars)
860,acs/acs1,2023,B25062,Aggregate Rent Asked (Dollars)


In [139]:
# there is also a handy function to search for variables
ced.variables.search_groups('acs/acs1', 2023, pattern = 'means of transportation', case=False).head(30)

Unnamed: 0,DATASET,YEAR,GROUP,DESCRIPTION
0,acs/acs1,2023,B08006,Sex of Workers by Means of Transportation to Work
1,acs/acs1,2023,B08101,Means of Transportation to Work by Age
2,acs/acs1,2023,B08103,Median Age by Means of Transportation to Work
3,acs/acs1,2023,B08105A,Means of Transportation to Work (White Alone)
4,acs/acs1,2023,B08105B,Means of Transportation to Work (Black or African American Alone)
5,acs/acs1,2023,B08105C,Means of Transportation to Work (American Indian and Alaska Native Alone)
6,acs/acs1,2023,B08105D,Means of Transportation to Work (Asian Alone)
7,acs/acs1,2023,B08105E,Means of Transportation to Work (Native Hawaiian and Other Pacific Islander Alone)
8,acs/acs1,2023,B08105F,Means of Transportation to Work (Some Other Race Alone)
9,acs/acs1,2023,B08105G,Means of Transportation to Work (Two or More Races)


In [88]:
# lets say we're interested in two tables. B25071 (rent and income) and B08301 (means of transport to work)

# simplest way to explore
ced.variables.all_variables('acs/acs1', 2023, 'B25071')

Unnamed: 0,YEAR,DATASET,GROUP,VARIABLE,LABEL,SUGGESTED_WEIGHT,VALUES
0,2023,acs/acs1,B25071,B25071_001E,Estimate!!Median gross rent as a percentage of household income,,
1,2023,acs/acs1,B25070,GEO_ID,Geography,,
2,2023,acs/acs1,B25070,NAME,Geographic Area Name,,


In [143]:

ced.variables.all_variables('acs/acs1', 2023, 'B08301')

Unnamed: 0,YEAR,DATASET,GROUP,VARIABLE,LABEL,SUGGESTED_WEIGHT,VALUES
0,2023,acs/acs1,B08301,B08301_001E,Estimate!!Total:,,
1,2023,acs/acs1,B08301,B08301_002E,"Estimate!!Total:!!Car, truck, or van:",,
2,2023,acs/acs1,B08301,B08301_003E,"Estimate!!Total:!!Car, truck, or van:!!Drove alone",,
3,2023,acs/acs1,B08301,B08301_004E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:",,
4,2023,acs/acs1,B08301,B08301_005E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 2-person carpool",,
5,2023,acs/acs1,B08301,B08301_006E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 3-person carpool",,
6,2023,acs/acs1,B08301,B08301_007E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 4-person carpool",,
7,2023,acs/acs1,B08301,B08301_008E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 5- or 6-person carpool",,
8,2023,acs/acs1,B08301,B08301_009E,"Estimate!!Total:!!Car, truck, or van:!!Carpooled:!!In 7-or-more-person carpool",,
9,2023,acs/acs1,B08301,B08301_010E,Estimate!!Total:!!Public transportation (excluding taxicab):,,


In [141]:
# easier to see in a tree format
ced.variables.group_tree('acs/acs1', 2023, 'B08301')

+ Estimate
    + Total: (B08301_001E)
        + Car, truck, or van: (B08301_002E)
            + Drove alone (B08301_003E)
            + Carpooled: (B08301_004E)
                + In 2-person carpool (B08301_005E)
                + In 3-person carpool (B08301_006E)
                + In 4-person carpool (B08301_007E)
                + In 5- or 6-person carpool (B08301_008E)
                + In 7-or-more-person carpool (B08301_009E)
        + Public transportation (excluding taxicab): (B08301_010E)
            + Bus (B08301_011E)
            + Subway or elevated rail (B08301_012E)
            + Long-distance train or commuter rail (B08301_013E)
            + Light rail, streetcar or trolley (carro público in Puerto Rico) (B08301_014E)
            + Ferryboat (B08301_015E)
        + Taxicab (B08301_016E)
        + Motorcycle (B08301_017E)
        + Bicycle (B08301_018E)
        + Walked (B08301_019E)
        + Other means (B08301_020E)
        + Worked from home (B08301_021E)
+ Geography 

In [144]:
variables = {
    "NAME": "name",
    "B08301_001E": "working_pop",
    "B08301_010E": "public_transport",
    "B25071_001E": "percent_rent"
}

In [154]:
from censusdis.states import ALL_STATES_AND_DC, MN

In [None]:
# let's plot commutes from the two counties in Minneapolis, Hennepin and Ramsey

In [166]:
# what's the fips?
counties = (
    ced.download(
        "acs/acs5",
        2023,
        ["NAME"],
        state = MN,
        county = "*"
    )
)

counties.query("NAME.str.contains('Hennepin|Ramsey', case=False)")

Unnamed: 0,STATE,COUNTY,NAME
26,27,53,"Hennepin County, Minnesota"
61,27,123,"Ramsey County, Minnesota"


In [167]:
COUNTIES = ["053", "123"]

In [176]:
(
    ced
    .download(
        dataset = "acs/acs5",
        vintage = 2023,
        download_variables = variables.keys(),
        state = MN,
        county=COUNTIES,
        tract = "*",
    )
    .rename(
        columns = variables
    )
    .assign(
        percent_public_transport = lambda x: x["public_transport"] / x["working_pop"] * 100
    )
)

Unnamed: 0,STATE,COUNTY,TRACT,name,working_pop,public_transport,percent_rent,percent_public_transport
0,27,053,000101,Census Tract 1.01; Hennepin County; Minnesota,1622,116,17.6,7.151665
1,27,053,000102,Census Tract 1.02; Hennepin County; Minnesota,2601,81,34.8,3.114187
2,27,053,000300,Census Tract 3; Hennepin County; Minnesota,1628,42,33.2,2.579853
3,27,053,000601,Census Tract 6.01; Hennepin County; Minnesota,2543,175,23.5,6.881636
4,27,053,000603,Census Tract 6.03; Hennepin County; Minnesota,2293,12,19.5,0.523332
...,...,...,...,...,...,...,...,...
467,27,123,042800,Census Tract 428; Ramsey County; Minnesota,1023,105,31.1,10.263930
468,27,123,042900,Census Tract 429; Ramsey County; Minnesota,2697,148,24.4,5.487579
469,27,123,043001,Census Tract 430.01; Ramsey County; Minnesota,2342,148,33.8,6.319385
470,27,123,043002,Census Tract 430.02; Ramsey County; Minnesota,1114,44,22.4,3.949731
