In [124]:
import requests
import json
import pandas as pd
import geopandas as gpd
import os
from dotenv import load_dotenv
from tqdm import tqdm

## Identify the variables to call from the ACS

### All Possible ACS Vars

In [89]:
## get a list of all the possible variables

url = "https://api.census.gov/data/2018/acs/acs5/variables.json"

response = requests.get(url)
variables_data = response.json()

variables_list = []

for variable, info in variables_data['variables'].items():
    variables_list.append({
        'Variable': variable,
        'Label': info.get('label', ''),
        'Concept': info.get('concept', ''),
        'Group': info.get('group', '')
    })

# Convert the list of dictionaries into a DataFrame
acs_all_variables = pd.DataFrame(variables_list)

In [None]:
# save for future reference without calling api again

# acs_all_variables.to_csv("./data/acs/variables/acs5_2018_all_vars.csv", index=False)

### Manually Compiled Vars

In [None]:
# load csv with my manually compiled variables (probably with errors)
variables = pd.read_csv("./data/acs/variables/compiled_acs_vars.csv",usecols=["Variable Code"])


### Merge 
Merge the complete variable list with my manually compiled one to get all the details directly from the census and identify errors

In [96]:
merged_variables = pd.merge(variables, acs_all_variables, how="left", left_on="Variable Code", right_on="Variable")

merged_variables

Unnamed: 0,Variable Code,Variable,Label,Concept,Group
0,B03002_003E,B03002_003E,Estimate!!Total!!Not Hispanic or Latino!!White...,HISPANIC OR LATINO ORIGIN BY RACE,B03002
1,B01001_002E,B01001_002E,Estimate!!Total!!Male,SEX BY AGE,B01001
2,B01001_003E,B01001_003E,Estimate!!Total!!Male!!Under 5 years,SEX BY AGE,B01001
3,B19013_001E,B19013_001E,Estimate!!Median household income in the past ...,MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS ...,B19013
4,B17001_002E,B17001_002E,Estimate!!Total!!Income in the past 12 months ...,POVERTY STATUS IN THE PAST 12 MONTHS BY SEX BY...,B17001
...,...,...,...,...,...
310,B25083_001E,B25083_001E,Estimate!!Median value (dollars),MEDIAN VALUE (DOLLARS) FOR MOBILE HOMES,B25083
311,B25084_001E,,,,
312,B25086_001E,B25086_001E,Estimate!!Aggregate price asked (dollars),AGGREGATE PRICE ASKED (DOLLARS),B25086
313,B25087_001E,B25087_001E,Estimate!!Total,MORTGAGE STATUS AND SELECTED MONTHLY OWNER COSTS,B25087


In [110]:
# drop incorrect variables 
merged_variables = merged_variables.dropna(subset=["Variable"])
# drop duplicated variables
merged_variables = merged_variables.drop_duplicates()
# drop duplicate column
merged_variables = merged_variables.drop("Variable Code",axis=1)

In [111]:
merged_variables

Unnamed: 0,Variable,Label,Concept,Group
0,B03002_003E,Estimate!!Total!!Not Hispanic or Latino!!White...,HISPANIC OR LATINO ORIGIN BY RACE,B03002
1,B01001_002E,Estimate!!Total!!Male,SEX BY AGE,B01001
2,B01001_003E,Estimate!!Total!!Male!!Under 5 years,SEX BY AGE,B01001
3,B19013_001E,Estimate!!Median household income in the past ...,MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS ...,B19013
4,B17001_002E,Estimate!!Total!!Income in the past 12 months ...,POVERTY STATUS IN THE PAST 12 MONTHS BY SEX BY...,B17001
...,...,...,...,...
309,B25082_001E,Estimate!!Aggregate value (dollars),AGGREGATE VALUE (DOLLARS) BY MORTGAGE STATUS,B25082
310,B25083_001E,Estimate!!Median value (dollars),MEDIAN VALUE (DOLLARS) FOR MOBILE HOMES,B25083
312,B25086_001E,Estimate!!Aggregate price asked (dollars),AGGREGATE PRICE ASKED (DOLLARS),B25086
313,B25087_001E,Estimate!!Total,MORTGAGE STATUS AND SELECTED MONTHLY OWNER COSTS,B25087


In [112]:
# save final variable df 

merged_variables.to_csv("./data/acs/variables/acs5_2018_selected_vars.csv", index=False)

## Use API to call data for each state

In [None]:
load_dotenv()

api_key = os.getenv('CENSUS_API')

variable_list = list(merged_variables["Variable"])

In [None]:
####################################################################################################

def fetch_vars(year, variables, state, api_key):

    url = f"https://api.census.gov/data/{year}/acs/acs5?get={variables}&for=block%20group:*&in=state:{state}%20county:*%20tract:*&key={api_key}"

    try:
        response = requests.get(url)

        if response.status_code == 200:

            print("Response Code 200")
            return response.json()
        
        else:

            print(f"Failed batch request")
            return None
        
    except requests.exceptions.RequestException as e:
        print(f"Request error for {variable}: {e}")
        return None

####################################################################################################

def merge_dataframes(df_list):

    final_df = df_list[0]

    for df in df_list[1:]:

        final_df = pd.merge(final_df, df, on=["state","county","tract","block group"])

    return final_df

####################################################################################################

def make_geoid(df):

    df["GEOID"] = (
        df["state"].str.zfill(2) +
        df["county"].str.zfill(3) +
        df["tract"].str.zfill(6) +
        df["block group"]
    )


In [180]:
batch_size = 50 
batches = [variable_list[i:i + batch_size] for i in range(0, len(variable_list), batch_size)]

len(batches)

6

In [181]:
md_results = []

for batch in tqdm(batches):

    variables = ",".join(batch)

    data = fetch_vars(year=2018, variables=variables, state="24", api_key=api_key)

    if data:
        df_batch = pd.DataFrame(data[1:], columns=data[0])
        md_results.append(df_batch)

    else:
        print("batch failed")

 17%|█▋        | 1/6 [01:20<06:40, 80.03s/it]

Response Code 200


 33%|███▎      | 2/6 [02:38<05:17, 79.33s/it]

Response Code 200


 50%|█████     | 3/6 [03:59<04:00, 80.10s/it]

Response Code 200


 67%|██████▋   | 4/6 [05:17<02:38, 79.22s/it]

Response Code 200


 83%|████████▎ | 5/6 [06:38<01:19, 79.65s/it]

Response Code 200


100%|██████████| 6/6 [07:59<00:00, 79.89s/it]

Response Code 200





In [182]:
md_final_df = merge_dataframes(md_results)

make_geoid(md_final_df)

In [None]:
md_final_df.to_csv("./data/acs/acs5_2018_selected_vars_MD.csv",index=False)