######### https://api.census.gov/data/2020/acs/acs5/variables.html

### Import Necessary Libraries

In [3]:
# Load necessary libraries
%load_ext autotime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
from census import Census
from us import states
import os
from dotenv import load_dotenv
import requests

time: 1.98 s (started: 2024-09-24 00:47:05 -04:00)


In [4]:
# Load environment variables
load_dotenv(dotenv_path='key.env')

# Retrieve API key from the enivronment
api_key = os.getenv('API_KEY')


time: 0 ns (started: 2024-09-24 00:47:07 -04:00)


### Fetch and Parse Census Variables Information

In [6]:
# Load the URl to get the variables from the 2009 ACS datset
variables_url = 'https://api.census.gov/data/2009/acs/acs5/variables.html'

# send a request to the url
response = requests.get(variables_url)

# check if the response is successful
if response.status_code ==200:
    # Parse the content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the table in the parsed HTML
    table = soup.find('table')

    # Prceed if the table is found
    if table:
        # Extract all table rows
        rows = table.find_all('tr')

        # Initialize a dict to map variable codes to their labels
        variable_label_mapping = {}
        # Loop through each row, starting from the second
        # we are skipping the first row as it is header
        for row in rows[1:]:
            # Extract columns 
            cols = row.find_all('td')
            if len(cols) >=2:
                # Get the varibale code and label
                variable_code = cols[0].text.strip()
                variable_label = cols[1].text.strip()

                # Store the mapping of varibales code to label
                variable_label_mapping[variable_code] = variable_label


    

time: 8.77 s (started: 2024-09-24 00:47:07 -04:00)


### Define Race Variables and Buid API Request

In [8]:
# Define the list of race-related variable codes
race_variable_codes = [
        'B02001_001E', 'B02001_002E', 'B02001_003E', 'B02001_004E', 
        'B02001_005E', 'B02001_006E', 'B02001_007E', 'B02001_008E'
    ]

# construct the url to request race-related data from the Census API
url = f'https://api.census.gov/data/2009/acs/acs5?get=NAME,{",".join(race_variable_codes)}&for=state:11&key={api_key}'


time: 0 ns (started: 2024-09-24 00:47:16 -04:00)


### Fetch and Process Census Data

In [10]:
# Send the request to the Census API to get the race data
data_response = requests.get(url)

# Check if the response is successful
if data_response.status_code ==200:
    # Parse the JSON responce
    data = data_response.json()

    # Extract the columns using the varibale label mapping
    columns = data[0]    # First row contains column names
    df = pd.DataFrame(data[1:], columns=columns)     # Create DataFrame with data

    # Rename the columns using the variable label mapping
    df.rename(columns={code: variable_label_mapping.get(code, code) for code in race_variable_codes}, inplace=True)

    # Select only the relevant columns (state name and race- related data)
    df = df[['NAME']+ [variable_label_mapping[code] for code in race_variable_codes]]

time: 625 ms (started: 2024-09-24 00:47:16 -04:00)


In [11]:
# Display the first few rows of the DataFrame to check the result
df.head(10)

Unnamed: 0,NAME,Estimate!!Total,Estimate!!Total!!White alone,Estimate!!Total!!Black or African American alone,Estimate!!Total!!American Indian and Alaska Native alone,Estimate!!Total!!Asian alone,Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone,Estimate!!Total!!Some other race alone,Estimate!!Total!!Two or more races
0,District of Columbia,588433,210983,324631,1573,17439,426,23935,9446


time: 47 ms (started: 2024-09-24 00:47:17 -04:00)


In [12]:
# save the dataframe to a CSV file
df.to_csv('census_data_dc_2009.csv', index=False)

time: 0 ns (started: 2024-09-24 00:47:17 -04:00)
