# Fetch and Merge ACS Variables

This notebook fetches selected variables from the ACS Census API for specified years and merges them with preprocessed life expectancy data. Each final dataset includes the target variable `mean_life_expectancy` and the fetched ACS variable for further analysis.

In [7]:
import pandas as pd
import requests

In [9]:
def fetch_and_merge_acs_variable(variable_code, variable_name, year, api_key):
    """
    Fetches an ACS variable from the Census API and merges it with the preprocessed DataFrame.
    
    Parameters:
    variable_code (str): ACS variable code to fetch (e.g., 'B19083_001E' for Gini Index).
    variable_name (str): Descriptive name for the variable (e.g., 'Gini_Index').
    year (int): Year of the ACS data (e.g., 2011).
    api_key (str): API key for accessing the Census API.
    
    Returns:
    pd.DataFrame: Final merged DataFrame with the ACS variable and life expectancy column added.
    """
    # Paths for input and output
    preprocessed_path = f'../data/processed/preprocessed_fips_life_expectancy/preprocessed_life_fips_{year}.csv'
    output_path = f'../data/processed/final_dataset/dataset_with_{variable_name}_{year}.csv'
    
    # Load preprocessed life expectancy and FIPS DataFrame
    print(f"Loading preprocessed data for {year}...")
    preprocessed_df = pd.read_csv(preprocessed_path)
    
    # API Endpoint and Parameters
    print(f"Fetching {variable_name} ({variable_code}) from the Census API for {year}...")
    acs_endpoint = f'https://api.census.gov/data/{year}/acs/acs5'
    params = {
        'get': variable_code,
        'for': 'county:*',  # All counties
        'in': 'state:*',    # All states
        'key': api_key      # Authentication
    }
    
    # Make the GET request
    response = requests.get(acs_endpoint, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch {variable_name}. Status code: {response.status_code}")
    
    # Parse the API Response
    acs_data = response.json()
    acs_df = pd.DataFrame(columns=acs_data[0], data=acs_data[1:])
    acs_df = acs_df.rename(columns={
        variable_code: variable_name,
        'state': 'State_FIPS',
        'county': 'County_FIPS'
    })

    # Ensure FIPS codes are formatted as strings in preprocessed_df
    preprocessed_df['State_FIPS'] = preprocessed_df['State_FIPS'].astype(str).str.zfill(2)
    preprocessed_df['County_FIPS'] = preprocessed_df['County_FIPS'].astype(str).str.zfill(3)
        
    # str.zfill() ensures that the State_FIPS and County_FIPS columns are properly formatted with leading zeros
    acs_df['State_FIPS'] = acs_df['State_FIPS'].str.zfill(2)
    acs_df['County_FIPS'] = acs_df['County_FIPS'].str.zfill(3)
    acs_df[variable_name] = pd.to_numeric(acs_df[variable_name], errors='coerce')
    
    # Merge ACS Data with Preprocessed Data
    final_df = pd.merge(
        preprocessed_df,
        acs_df[['State_FIPS', 'County_FIPS', variable_name]],
        on=['State_FIPS', 'County_FIPS'],
        how='left'
    )
    
    # Ensure `MeanLifeExpectancy` is in the final dataset
    if 'mean_life_expectancy' not in final_df.columns:
        raise ValueError("'mean_life_expectancy' column is missing in the final merged dataset.")
    
    # Save the Merged Dataset
    final_df.to_csv(output_path, index=False)
    
    print(f"Task completed. Final dataset saved: {output_path}")
    return final_df

**Importing the variables that use summary tables from the ACS. Notice that all these variables have the letter "S" in front of them. Rewriting the funtion again with a different ACS endpoint to accomodate for this change.**

In [12]:
def fetch_and_merge_acs_variable_summary(variable_code, variable_name, year, api_key):
    """
    Fetches an ACS variable from the Census API and merges it with the preprocessed DataFrame.
    
    Parameters:
    variable_code (str): ACS variable code to fetch (e.g., 'B19083_001E' for Gini Index).
    variable_name (str): Descriptive name for the variable (e.g., 'Gini_Index').
    year (int): Year of the ACS data (e.g., 2011).
    api_key (str): API key for accessing the Census API.
    
    Returns:
    pd.DataFrame: Final merged DataFrame with the ACS variable and life expectancy column added.
    """
    # Paths for input and output
    preprocessed_path = f'../data/processed/preprocessed_fips_life_expectancy/preprocessed_life_fips_{year}.csv'
    output_path = f'../data/processed/final_dataset/dataset_with_{variable_name}_{year}.csv'
    
    # Load preprocessed life expectancy and FIPS DataFrame
    print(f"Loading preprocessed data for {year}...")
    preprocessed_df = pd.read_csv(preprocessed_path)
      
    # API Endpoint and Parameters
    print(f"Fetching {variable_name} ({variable_code}) from the Census API for {year}...")
    acs_endpoint = f'https://api.census.gov/data/{year}/acs/acs5/subject'
    params = {
        'get': variable_code,
        'for': 'county:*',  # All counties
        'in': 'state:*',    # All states
        'key': api_key      # Authentication
    }
    
    # Make the GET request
    response = requests.get(acs_endpoint, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch {variable_name}. Status code: {response.status_code}")
    
    # Parse the API Response
    acs_data = response.json()
    acs_df = pd.DataFrame(columns=acs_data[0], data=acs_data[1:])
    acs_df = acs_df.rename(columns={
        variable_code: variable_name,
        'state': 'State_FIPS',
        'county': 'County_FIPS'
    })

    # Ensure FIPS codes are formatted as strings in preprocessed_df
    preprocessed_df['State_FIPS'] = preprocessed_df['State_FIPS'].astype(str).str.zfill(2)
    preprocessed_df['County_FIPS'] = preprocessed_df['County_FIPS'].astype(str).str.zfill(3)
        
    # str.zfill() ensures that the State_FIPS and County_FIPS columns are properly formatted with leading zeros
    acs_df['State_FIPS'] = acs_df['State_FIPS'].str.zfill(2)
    acs_df['County_FIPS'] = acs_df['County_FIPS'].str.zfill(3)
    acs_df[variable_name] = pd.to_numeric(acs_df[variable_name], errors='coerce')
    
    # Merge ACS Data with Preprocessed Data
    final_df = pd.merge(
        preprocessed_df,
        acs_df[['State_FIPS', 'County_FIPS', variable_name]],
        on=['State_FIPS', 'County_FIPS'],
        how='left'
    )
    
    # Ensure `MeanLifeExpectancy` is in the final dataset
    if 'mean_life_expectancy' not in final_df.columns:
        raise ValueError("'mean_life_expectancy' column is missing in the final merged dataset.")
    
    # Save the Merged Dataset
    final_df.to_csv(output_path, index=False)
    
    print(f"Task completed. Final dataset saved: {output_path}")
    return final_df

**The code below imports all the single-variable features that do not involve doing arithmetic operations on them like summing or finding percentages.**

In [15]:
# Housing and income features
variables = [{'code': 'B19013_001E', 'name': 'median_household_income'},
             {'code': 'B01003_001E', 'name': 'total_population'},
             {'code': 'B19083_001E', 'name': 'gini_index'},
             {'code': 'B25064_001E', 'name': 'median_gross_rent'},
             {'code': 'B25077_001E', 'name': 'median_home_value'},
             {'code': 'B25070_007E', 'name': 'housing_cost_burden'},
             {'code': 'B19301_001E', 'name': 'per_capita_income'}
             ]
for i in range(2012,2020):
    for var in variables:
        final_df = fetch_and_merge_acs_variable(
            variable_code= var['code'],  # Gini Index variable
            variable_name= var['name'],
            year= i,
            api_key='2a58865a16f7670d452bcfcb4a5b767db1ce8973'
        )

Loading preprocessed data for 2012...
Fetching median_household_income (B19013_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_median_household_income_2012.csv
Loading preprocessed data for 2012...
Fetching total_population (B01003_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_total_population_2012.csv
Loading preprocessed data for 2012...
Fetching gini_index (B19083_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_gini_index_2012.csv
Loading preprocessed data for 2012...
Fetching median_gross_rent (B25064_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_median_gross_rent_2012.csv
Loading preprocessed data for 2012...
Fetching median_home_value (B25077_001E) from the Census API for 2012...
Task complete

**Now the variables that are related to race. Check https://api.census.gov/data/2015/acs/acs5/groups/B02001.html for more information.**

In [18]:
# Race features

variables = [
    {'code': 'B02001_001E', 'name': 'total_population_race'},           
    {'code': 'B02001_003E', 'name': 'black_population'},         
    {'code': 'B02001_002E', 'name': 'white_population'},   
    {'code': 'B02001_005E', 'name': 'asian_population'},    
    {'code': 'B02001_004E', 'name': 'native_american_population'}       
]
for year in range(2012, 2020):
    for var in variables:
        fetch_and_merge_acs_variable(
            variable_code=var['code'],
            variable_name=var['name'],
            year=year,
            api_key='2a58865a16f7670d452bcfcb4a5b767db1ce8973'
        )

Loading preprocessed data for 2012...
Fetching total_population_race (B02001_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_total_population_race_2012.csv
Loading preprocessed data for 2012...
Fetching black_population (B02001_003E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_black_population_2012.csv
Loading preprocessed data for 2012...
Fetching white_population (B02001_002E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_white_population_2012.csv
Loading preprocessed data for 2012...
Fetching asian_population (B02001_005E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_asian_population_2012.csv
Loading preprocessed data for 2012...
Fetching native_american_population (B02001_004E) from the Census API for 2012..

In [20]:
# Summary table features

variables = [
    {'code': 'S1701_C03_001E', 'name': 'poverty_rate'},
    {'code': 'S2701_C03_001E', 'name': 'health_insurance_rate'},
    {'code': 'S2301_C04_001E', 'name': 'unemployment_rate'},
    {'code': 'S1810_C03_001E', 'name': 'disability_rate'}
    ]

for year in range(2012, 2020):
    for var in variables:
        fetch_and_merge_acs_variable_summary(
            variable_code=var['code'],
            variable_name=var['name'],
            year=year,
            api_key='2a58865a16f7670d452bcfcb4a5b767db1ce8973'
        )

Loading preprocessed data for 2012...
Fetching poverty_rate (S1701_C03_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_poverty_rate_2012.csv
Loading preprocessed data for 2012...
Fetching health_insurance_rate (S2701_C03_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_health_insurance_rate_2012.csv
Loading preprocessed data for 2012...
Fetching unemployment_rate (S2301_C04_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_unemployment_rate_2012.csv
Loading preprocessed data for 2012...
Fetching disability_rate (S1810_C03_001E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_disability_rate_2012.csv
Loading preprocessed data for 2013...
Fetching poverty_rate (S1701_C03_001E) from the Census API for 2013...
Task 

**Now importing age variables. Check https://api.census.gov/data/2019/acs/acs5/subject/groups/S0101.html for more info.**

In [22]:
# Age features
variables = [
    {'code': 'S0101_C02_022E', 'name': 'age_18_and_under_percentage'},
    {'code': 'S0101_C02_024E', 'name': 'age_15_to_44_percentage'},
    {'code': 'S0101_C02_028E', 'name': 'age_60_and_over'}
    ]
for year in range(2012, 2020):
    for var in variables:
        fetch_and_merge_acs_variable_summary(
            variable_code=var['code'],
            variable_name=var['name'],
            year=year,
            api_key='2a58865a16f7670d452bcfcb4a5b767db1ce8973'
        )

Loading preprocessed data for 2012...
Fetching age_18_and_under_percentage (S0101_C02_022E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_age_18_and_under_percentage_2012.csv
Loading preprocessed data for 2012...
Fetching age_15_to_44_percentage (S0101_C02_024E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_age_15_to_44_percentage_2012.csv
Loading preprocessed data for 2012...
Fetching age_60_and_over (S0101_C02_028E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_age_60_and_over_2012.csv
Loading preprocessed data for 2013...
Fetching age_18_and_under_percentage (S0101_C02_022E) from the Census API for 2013...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_age_18_and_under_percentage_2013.csv
Loading preprocessed data for 2013...
Fetching age_15_to_44_

**The code below are all the variables that are related to educational attainment. 

Visit https://api.census.gov/data/2019/acs/acs5/subject/groups/S1501.html for more information about the variables.**

In [24]:
# Education Features 

variables = [
    {'code': 'S1501_C02_015E', 'name': 'bachelors_and_above_percentage'},
    {'code': 'S1501_C02_007E', 'name': 'less_than_9th_grade_percentage'},
    {'code': 'S1501_C02_009E', 'name': 'high_school_only_percentage'}
    ]
for year in range(2012, 2020):
    for var in variables:
        fetch_and_merge_acs_variable_summary(
            variable_code=var['code'],
            variable_name=var['name'],
            year=year,
            api_key='2a58865a16f7670d452bcfcb4a5b767db1ce8973'
        )

Loading preprocessed data for 2012...
Fetching bachelors_and_above_percentage (S1501_C02_015E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_bachelors_and_above_percentage_2012.csv
Loading preprocessed data for 2012...
Fetching less_than_9th_grade_percentage (S1501_C02_007E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_less_than_9th_grade_percentage_2012.csv
Loading preprocessed data for 2012...
Fetching high_school_only_percentage (S1501_C02_009E) from the Census API for 2012...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_high_school_only_percentage_2012.csv
Loading preprocessed data for 2013...
Fetching bachelors_and_above_percentage (S1501_C02_015E) from the Census API for 2013...
Task completed. Final dataset saved: ../data/processed/final_dataset/dataset_with_bachelors_and_above_percentage_2013.csv
Loading pr