# Census Data Tools

In [1]:
from morpc.logs import config_logs

config_logs('./temp_data/morpc-census-demo.log', level='info')

2025-11-05 09:25:48,441 | INFO | morpc.logs.config_logs: Set up logging save to file ./temp_data/morpc-census-demo.log


In [15]:
from morpc.census.api import get_api_request

req = get_api_request('acs/acs5', 2023, 'B01001', 'region15', scale='block group', variables=['B01001_001E'])

req

2025-11-05 11:45:43,159 | INFO | morpc.census.api.valid_survey_table: acs/acs5 is valid and implemented.
2025-11-05 11:45:43,162 | INFO | morpc.census.api.valid_vintage: 2023 is valid vintage for acs/acs5
2025-11-05 11:45:43,164 | INFO | morpc.census.api.valid_survey_table: acs/acs5 is valid and implemented.
2025-11-05 11:45:43,165 | INFO | morpc.census.api.valid_vintage: 2023 is valid vintage for acs/acs5
2025-11-05 11:45:43,166 | INFO | morpc.req.get_json_safely: Getting data from https://api.census.gov/data/2023/acs/acs5/groups.json with parameters None.
2025-11-05 11:45:43,642 | INFO | morpc.census.api.valid_group: Group B01001 valid group for 2023 acs/acs5.
2025-11-05 11:45:43,643 | ERROR | morpc.census.api.valid_survey_table: survey and table 2023 combination not available or not yet implemented.


UnboundLocalError: cannot access local variable 'json' where it is not associated with a value

In [11]:
from morpc.req import get_json_safely
import pandas as pd

pd.DataFrame.from_records(get_json_safely(**req))

2025-11-05 09:29:40,249 | INFO | morpc.req.get_json_safely: Getting data from https://api.census.gov/data/2023/acs/acs5? with parameters {'get': 'NAME,GEO_ID,group(B12001)', 'ucgid': 'pseudo(0500000US39041$1500000,0500000US39045$1500000,0500000US39047$1500000,0500000US39049$1500000,0500000US39073$1500000,0500000US39083$1500000,0500000US39089$1500000,0500000US39091$1500000,0500000US39097$1500000,0500000US39101$1500000,0500000US39117$1500000,0500000US39127$1500000,0500000US39129$1500000,0500000US39141$1500000,0500000US39159$1500000)'}.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,71,72,73,74,75,76,77,78,79,80
0,NAME,GEO_ID,B12001_001E,B12001_001EA,B12001_001M,B12001_001MA,B12001_002E,B12001_002EA,B12001_002M,B12001_002MA,...,B12001_018EA,B12001_018M,B12001_018MA,B12001_019E,B12001_019EA,B12001_019M,B12001_019MA,GEO_ID,NAME,ucgid
1,Block Group 3; Census Tract 87.20; Franklin Co...,1500000US390490087203,846,,382,,552,,303,,...,,13,,20,,24,,1500000US390490087203,Block Group 3; Census Tract 87.20; Franklin Co...,1500000US390490087203
2,Block Group 2; Census Tract 105.02; Franklin C...,1500000US390490105022,1368,,331,,605,,147,,...,,32,,84,,55,,1500000US390490105022,Block Group 2; Census Tract 105.02; Franklin C...,1500000US390490105022
3,Block Group 2; Census Tract 38; Logan County; ...,1500000US390910038002,632,,150,,333,,80,,...,,13,,38,,34,,1500000US390910038002,Block Group 2; Census Tract 38; Logan County; ...,1500000US390910038002
4,Block Group 1; Census Tract 83.50; Franklin Co...,1500000US390490083501,2346,,776,,1115,,504,,...,,69,,281,,182,,1500000US390490083501,Block Group 1; Census Tract 83.50; Franklin Co...,1500000US390490083501
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1701,Block Group 1; Census Tract 7565; Licking Coun...,1500000US390897565001,1869,,326,,881,,194,,...,,26,,147,,104,,1500000US390897565001,Block Group 1; Census Tract 7565; Licking Coun...,1500000US390897565001
1702,Block Group 1; Census Tract 63.72; Franklin Co...,1500000US390490063721,1590,,473,,779,,285,,...,,131,,123,,117,,1500000US390490063721,Block Group 1; Census Tract 63.72; Franklin Co...,1500000US390490063721
1703,Block Group 1; Census Tract 81.69; Franklin Co...,1500000US390490081691,2371,,443,,1195,,220,,...,,47,,93,,84,,1500000US390490081691,Block Group 1; Census Tract 81.69; Franklin Co...,1500000US390490081691
1704,Block Group 1; Census Tract 93.11; Franklin Co...,1500000US390490093111,783,,230,,313,,116,,...,,63,,70,,51,,1500000US390490093111,Block Group 1; Census Tract 93.11; Franklin Co...,1500000US390490093111


### Getting all available datasets in the api.

MORPC works regularly with census data, including but not limited to ACS 5 and 1-year, Decennial Census, PEP, and geographies. The following module is useful for gathering and organizing census data for processes in various workflow. Those workflows are linked when appropriate. 

## API functions and variables

api_get() is a low-level wrapper for Census API requests that returns the results as a pandas dataframe. If necessary, it splits the request into several smaller requests to bypass the 50-variable limit imposed by the API.  

The resulting dataframe is indexed by GEOID (regardless of whether it was requested) and omits other fields that are not requested but which are returned automatically with each API request (e.g. "state", "county") 

In [None]:
url = 'https://api.census.gov/data/2022/acs/acs1'
params = {
    "get": "GEO_ID,NAME,B01001_001E",
    "for": "county:049,041",
    "in": "state:39"
}

In [None]:
api = morpc.census.api_get(url, params)

In [None]:
api

## Geography tools

In [None]:
from morpc.census import geos

In [None]:
geos.SCOPES['region15']

In [None]:
for_params=geos.param_from_scope(scope='region15')


In [None]:
for_params[1], 

In [None]:
geos.geoids_from_params(for_params=for_params[1], in_params=for_params[0])

## American Community Survey (ACS) Data Class

When using ACS data, generally we will be digesting data produded using the [morpc-censusacs-fetch](https://github.com/morpc/morpc-censusacs-fetch) workflow. The data that is produced from that script is by default saved in its output_data folders ./morpc-censusacs-fetch/output_data/

The Census ACS Fetch script leverages the `acs_data` class form `morpc.census`


### Create an initial object which represents a variable in the ACS data api.

The class takes 3 arguments:

1. variable group number
2. the year
3. the type of survey (1 or 5 year estimates)

In [None]:
from morpc.census import ACS

import morpc

morpc.logs.config_logs('./temp_data/morpc-census-demo.log', level='debug')

In [None]:
acs = ACS('B01001', '2023', 'acs5')

The initial call creates queries the Census for the variable definitions and returns a dictionary of the available variables in the group. see `acs.VARS`

In [None]:
acs.VARS

The initial call alse fetchs a list of dimensions from a cached json file in ./morpc/census/acs_variable_group.json and is stored in morpc.census.ACS_VAR_GROUPS.

#### Manual verfication for variable dimension names. 

The list of dimensions are automatically created from the Census Variable labels and need verified before being used. If the dimesion names have not be verified, the will not be stored. Navigate to the JSON and check to make sure that there are the correct number of dimension and that they are in the correct order. Change the verfication field to `true`.

In [None]:
acs.DIMENSIONS

### Query the API for the deisred variables and geography

The `.query()` method queries the API and caches the data in memory under `acs.DATA`. At the same time it creates a frictionless schema that corrosponds with the data. 


In [None]:
acs = acs.query(for_param='county:*', in_param='state:39')


#### scope:
These are pre-defined sumlevels and scopes for commonly queried geographies. see `morpc.census.SCOPES`.

In [None]:
acs = acs.scope(scale="block group", scope='region15')

### For custom queries, use for and in parameters to pass to api query. 

#### for_param:
(optional) The geographies for which to call the the query "state:*" represents all states. "state:39" represent Ohio.

#### in_param:
(optional) A filter for the for parameter. In combinations this allows you do call for small geograhpies inside larger ones. 

> Examples: for_param="county:\*", in_param="state:39" would get all counties in Ohio.
> for_param="tract:\*", in_param='state:39,county:041,049' gets all census tracts in Delaware and Franklin Counties.

### Filter the variables using the get parameter

#### get_param:
(Optional) If you want to return a subset of variables, they can be passed here as a list.

### Dimension Tables

When the query is called the class makes table with the dimensions included that can be used to get summaries of the data. 

This can be used to get quick queries for summaries. 

In [None]:
acs.DIM_TABLE.LONG

In [None]:
acs.DIM_TABLE.WIDE

In [None]:
acs.DIM_TABLE.PERCENT

### Save raw data (not dim table) as a frictionless resource with schema

After querying the data, save the data as a frictionless resource with reasonable descriptors. 

In [None]:
acs.save(output_dir='./temp_data/')

In [None]:
acs.SCHEMA

In [None]:
acs.RESOURCE

## Load data from cached file

In [None]:
import morpc

In [None]:
acs = morpc.census.ACS('B25010', '2023', '5').load(scope='region15-tracts', dirname='./temp_data/')

## Georeference the data to map

Add geometries by joining GEOS to DATA.

In [None]:
acs.GEOS

In [None]:
import geopandas as gpd
acs.DATA = gpd.GeoDataFrame(acs.DATA.join(acs.GEOS), geometry='geometry')

In [None]:
acs.DATA.plot(column='B01001_002E')

## Use the built in .explore() method to view a map of all the columns in data

In [None]:
acs.explore(table='PERCENTS')

In [None]:
acs.MAP

## Using the rest_api module to fetch geometry data from Census API

In [None]:
import morpc.rest_api as rest_api
import morpc.census as census

In [None]:

url =  rest_api.get_layer_url(2024, 'county subdivisions', survey='ACS')

query = "STATE = '39' and COUNTY = '049'"

resource = rest_api.resource(
    name = 'morpc-franklin-tracts',
    url = url,
    where = query,
    max_record_count=500
)

In [None]:
gdf = rest_api.gdf_from_resource(resource)

## Below should still be functional, but hoping to implement into ACS class

#### Load the data using frictionless.load_data()

In [None]:
data, resource, schema = morpc.frictionless.load_data('./temp_data/morpc-acs5-2023-state-B01001.resource.yaml', verbose=False)

#### Using ACS_ID_FIELDS to get the fields ids

In [None]:
morpc.census.acs_generate_universe_table(data.set_index("GEO_ID"), "B01001_001")

#### Create a dimension table with the data and the dimension names

In [None]:
dim_table = morpc.census.acs_generate_dimension_table(data.set_index("GEO_ID"), schema, idFields=idFields, dimensionNames=["Sex", "Age group"])

In [None]:
dim_table.loc[dim_table['Variable type'] == 'Estimate'].head()

### Build ACS Variable Group JSON for Dimension names