# CENSUS - ACS 5-year data

In [128]:
import matplotlib.pyplot as plt
import pandas as pd
import os

In [129]:
CENSUS_API_KEY = "1d3aeba09035e3cf49f90df444bd1b39b913d5a6"

CENSUS_URL = "https://api.census.gov/data"
CENSUS_YEAR = "2015" # "2019"
CENSUS_DATASET = "acs/acs5"
CENSUS_GEOGRAPHY = "county:*"
URL = f"{CENSUS_URL}/{CENSUS_YEAR}/{CENSUS_DATASET}"

https://api.census.gov/data/2023/acs/acs5?get=NAME,B01001_001E&for=county:*&key=api_key

## One request

In [130]:
CENSUS_VARIABLES = "NAME,B16001_002E"

In [131]:
import requests

# Parámetros de la consulta
params = {
    "get": CENSUS_VARIABLES,  # Nombre del condado y población total
    "for": CENSUS_GEOGRAPHY,          # Obtener datos para todos los condados
    "key": CENSUS_API_KEY              # Tu clave de API
}

# Hacer la petición GET
response = requests.get(URL, params=params)

# Convertir la respuesta en DataFrame
if response.status_code == 200:
    data = response.json()
    df = pd.DataFrame(data[1:], columns=data[0])  # Primera fila son los nombres de columna
else:
    print("Error en la petición:", response.status_code)
    print(response.text)

In [132]:
df

Unnamed: 0,NAME,B16001_002E,state,county
0,"Childress County, Texas",5845,48,075
1,"Comal County, Texas",92947,48,091
2,"Houston County, Texas",20111,48,225
3,"Navarro County, Texas",34838,48,349
4,"Scurry County, Texas",11606,48,415
...,...,...,...,...
3215,"Charlotte County, Virginia",11492,51,037
3216,"Burleson County, Texas",13939,48,051
3217,"Greensville County, Virginia",10633,51,081
3218,"Thurston County, Washington",218908,53,067


## Create Data

In [133]:
# https://api.census.gov/data/2023/acs/acs5/variables.html
variables = {
    # POPULATION
    f"pop_total_{CENSUS_YEAR}": ["B01001_001E"],
    f"pop_total_male_{CENSUS_YEAR}": ["B01001_002E"],
    f"pop_total_female_{CENSUS_YEAR}": ["B01001_026E"],
    f"pop_18_39_male_{CENSUS_YEAR}": ["B01001_007E", "B01001_008E", "B01001_009E", "B01001_010E", "B01001_011E", "B01001_012E", "B01001_013E"],
    f"pop_18_39_female_{CENSUS_YEAR}": ["B01001_031E", "B01001_032E", "B01001_033E", "B01001_034E", "B01001_035E", "B01001_036E", "B01001_037E"],
    f"pop_40_64_male_{CENSUS_YEAR}": ["B01001_014E", "B01001_015E", "B01001_016E", "B01001_017E", "B01001_018E", "B01001_019E"],
    f"pop_40_64_female_{CENSUS_YEAR}": ["B01001_038E", "B01001_039E", "B01001_040E", "B01001_041E", "B01001_042E", "B01001_043E"],
    f"pop_over_65_male_{CENSUS_YEAR}": ["B01001_020E", "B01001_021E", "B01001_022E", "B01001_023E", "B01001_024E", "B01001_025E"],
    f"pop_over_65_female_{CENSUS_YEAR}": ["B01001_044E", "B01001_045E", "B01001_046E", "B01001_047E", "B01001_048E", "B01001_049E"],
    f"median_age_{CENSUS_YEAR}": ["B01002_001E"],
    f"veterans_{CENSUS_YEAR}": ["B21001_002E"],
    f"inmigrants_{CENSUS_YEAR}": ["B05002_013E"],
    f"labor_force_{CENSUS_YEAR}": ["B23025_003E"],
    
    # RACE
    f"white_{CENSUS_YEAR}": ["B02001_002E"],
    f"black_{CENSUS_YEAR}": ["B02001_003E"],
    f"native_{CENSUS_YEAR}": ["B02001_004E"],
    f"asian_{CENSUS_YEAR}": ["B02001_005E"],
    f"pacific_{CENSUS_YEAR}": ["B02001_006E"],
    f"other_{CENSUS_YEAR}": ["B02001_007E"],
    f"hispanic_{CENSUS_YEAR}": ["B03001_003E"],
    f"two_more_races_{CENSUS_YEAR}": ["B02001_008E"],
    
    # EDUCATION
    f"high_school_{CENSUS_YEAR}": ["B15003_017E"],
    f"bachelors_{CENSUS_YEAR}": ["B15003_022E"],

    # INCOME
    f"median_income_{CENSUS_YEAR}": ["B19013_001E"],
    f"poverty_{CENSUS_YEAR}": ["B17001_002E"],

    # HOUSING
    f"households_median_value_{CENSUS_YEAR}": ["B25077_001E"],
    f"households_avg_size_{CENSUS_YEAR}": ["B25010_001E"],
    f"households_renter_{CENSUS_YEAR}": ["B25003_003E"],
    f"households_owner_{CENSUS_YEAR}": ["B25003_002E"],
    f"households_total_{CENSUS_YEAR}": ["B25003_001E"],
    f"households_median_gross_rent_{CENSUS_YEAR}": ["B25064_001E"],
    #f"households_limited_english_{CENSUS_YEAR}": ["C16002_004E", "C16002_007E", "C16002_010E", "C16002_013E", ],
    #f"households_no_internet_{CENSUS_YEAR}": ["B28002_013E"],


    # EMPLOYMENT
    f"unemployment_{CENSUS_YEAR}": ["B23025_005E"],
    f"public_transport_{CENSUS_YEAR}": ["B08301_010E"],
    f"mean_travel_time_{CENSUS_YEAR}": ["B08303_001E"],

    # HEALTH
    f"no_health_insurance_{CENSUS_YEAR}": ["B27010_017E", "B27010_033E", "B27010_050E", "B27010_066E"],

}

In [134]:
# Crear DataFrame vacío
df = pd.DataFrame()

# Hacer peticiones una por una
for key, codes in variables.items():
    print(f"Obteniendo datos para: {key}")

    # Realizar la petición GET
    params = {
        "get": ",".join(codes),  # Obtener solo las variables necesarias
        "for": CENSUS_GEOGRAPHY,  # Obtener datos por condado
        "key": CENSUS_API_KEY
    }

    response = requests.get(URL, params=params)

    if response.status_code == 200:
        data = response.json()
        temp_df = pd.DataFrame(data[1:], columns=data[0])  # Primera fila son nombres de columna
        
        # Convertir a numérico
        for col in codes:
            temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce")

        # Si hay más de una variable, sumarlas
        if len(codes) > 1:
            temp_df[key] = temp_df[codes].sum(axis=1)
        else:
            temp_df[key] = temp_df[codes[0]]

        # Eliminar columnas extra
        temp_df = temp_df[["state", "county", key]]

        # Unir con el DataFrame principal
        if df.empty:
            df = temp_df
        else:
            df = df.merge(temp_df, on=["state", "county"], how="outer")

    else:
        print(f"Error al obtener {key}: {response.status_code}")

df

Obteniendo datos para: pop_total_2015
Obteniendo datos para: pop_total_male_2015
Obteniendo datos para: pop_total_female_2015
Obteniendo datos para: pop_18_39_male_2015
Obteniendo datos para: pop_18_39_female_2015
Obteniendo datos para: pop_40_64_male_2015
Obteniendo datos para: pop_40_64_female_2015
Obteniendo datos para: pop_over_65_male_2015
Obteniendo datos para: pop_over_65_female_2015
Obteniendo datos para: median_age_2015
Obteniendo datos para: veterans_2015
Obteniendo datos para: inmigrants_2015
Obteniendo datos para: labor_force_2015
Obteniendo datos para: white_2015
Obteniendo datos para: black_2015
Obteniendo datos para: native_2015
Obteniendo datos para: asian_2015
Obteniendo datos para: pacific_2015
Obteniendo datos para: other_2015
Obteniendo datos para: hispanic_2015
Obteniendo datos para: two_more_races_2015
Obteniendo datos para: high_school_2015
Obteniendo datos para: bachelors_2015
Obteniendo datos para: median_income_2015
Obteniendo datos para: poverty_2015
Obtenien

Unnamed: 0,state,county,pop_total_2015,pop_total_male_2015,pop_total_female_2015,pop_18_39_male_2015,pop_18_39_female_2015,pop_40_64_male_2015,pop_40_64_female_2015,pop_over_65_male_2015,...,households_median_value_2015,households_avg_size_2015,households_renter_2015,households_owner_2015,households_total_2015,households_median_gross_rent_2015,unemployment_2015,public_transport_2015,mean_travel_time_2015,no_health_insurance_2015
0,01,001,55221,26745,28476,7695,7955,8801,9248,3214,...,141300.0,2.68,5319,15077,20396,883.0,1984,24,23675,5508
1,01,003,195121,95314,99807,24274,23989,32386,35455,16465,...,169300.0,2.60,21107,52997,74104,879.0,6972,116,81184,24987
2,01,005,26932,14497,12435,5038,3169,4601,4061,1862,...,92200.0,2.61,3358,5864,9222,579.0,1842,35,8329,3732
3,01,007,22604,12073,10531,3894,2789,4193,3554,1454,...,102700.0,2.95,1749,5278,7027,651.0,752,44,8089,2007
4,01,009,57710,28512,29198,7502,7072,9846,10094,4236,...,119800.0,2.74,4393,16423,20816,601.0,1843,86,21450,6659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,72,145,56858,27379,29479,7888,8326,8557,9706,4152,...,116200.0,3.27,2952,14357,17309,549.0,2447,120,13430,3976
3216,72,147,9130,4585,4545,1003,1077,1713,1574,788,...,112700.0,3.29,456,2317,2773,590.0,398,27,2769,741
3217,72,149,24685,12086,12599,3730,3675,3692,4108,1474,...,95800.0,3.06,1913,6090,8003,329.0,2374,7,6270,812
3218,72,151,36279,17648,18631,4864,5170,5919,6419,2596,...,91600.0,2.97,3484,8687,12171,419.0,2592,0,7420,1705


### Convert to percentage

In [135]:
# Convertir valores a porcentaje
# **POPULATION**
df[f"pop_total_male_rate_{CENSUS_YEAR}"] = (df[f"pop_total_male_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_total_female_rate_{CENSUS_YEAR}"] = (df[f"pop_total_female_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_18_39_male_rate_{CENSUS_YEAR}"] = (df[f"pop_18_39_male_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_18_39_female_rate_{CENSUS_YEAR}"] = (df[f"pop_18_39_female_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_40_64_male_rate_{CENSUS_YEAR}"] = (df[f"pop_40_64_male_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_40_64_female_rate_{CENSUS_YEAR}"] = (df[f"pop_40_64_female_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_over_65_male_rate_{CENSUS_YEAR}"] = (df[f"pop_over_65_male_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pop_over_65_female_rate_{CENSUS_YEAR}"] = (df[f"pop_over_65_female_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"veterans_rate_{CENSUS_YEAR}"] = (df[f"veterans_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"inmigrants_rate_{CENSUS_YEAR}"] = (df[f"inmigrants_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

# **RACE**
df[f"white_rate_{CENSUS_YEAR}"] = (df[f"white_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"black_rate_{CENSUS_YEAR}"] = (df[f"black_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"native_rate_{CENSUS_YEAR}"] = (df[f"native_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"asian_rate_{CENSUS_YEAR}"] = (df[f"asian_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"pacific_rate_{CENSUS_YEAR}"] = (df[f"pacific_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"other_rate_{CENSUS_YEAR}"] = (df[f"other_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"hispanic_rate_{CENSUS_YEAR}"] = (df[f"hispanic_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"two_more_races_rate_{CENSUS_YEAR}"] = (df[f"two_more_races_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

# **EDUCATION**
df[f"high_school_rate_{CENSUS_YEAR}"] = (df[f"high_school_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100
df[f"bachelors_rate_{CENSUS_YEAR}"] = (df[f"bachelors_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

# **INCOME**
df[f"poverty_rate_{CENSUS_YEAR}"] = (df[f"poverty_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

# **HOUSING**
df[f"households_renter_rate_{CENSUS_YEAR}"] = (df[f"households_renter_{CENSUS_YEAR}"] / df[f"households_total_{CENSUS_YEAR}"]) * 100
df[f"households_owner_rate_{CENSUS_YEAR}"] = (df[f"households_owner_{CENSUS_YEAR}"] / df[f"households_total_{CENSUS_YEAR}"]) * 100
#df[f"households_limited_english_rate_{CENSUS_YEAR}"] = (df[f"households_limited_english_{CENSUS_YEAR}"] / df[f"households_total_{CENSUS_YEAR}"]) * 100
#df[f"households_no_internet_rate_{CENSUS_YEAR}"] = (df[f"households_no_internet_{CENSUS_YEAR}"] / df[f"households_total_{CENSUS_YEAR}"]) * 100

# **EMPLOYMENT**
df[f"unemployment_rate_{CENSUS_YEAR}"] = (df[f"unemployment_{CENSUS_YEAR}"] / df[f"labor_force_{CENSUS_YEAR}"]) * 100
df[f"public_transport_rate_{CENSUS_YEAR}"] = (df[f"public_transport_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

# **HEALTH**
df[f"no_health_insurance_rate_{CENSUS_YEAR}"] = (df[f"no_health_insurance_{CENSUS_YEAR}"] / df[f"pop_total_{CENSUS_YEAR}"]) * 100

In [136]:
# Redondear todas las columnas que terminan en "_rate_{CENSUS_YEAR}" a 2 decimales
for col in df.columns:
    if col.endswith(f"_rate_{CENSUS_YEAR}"):
        df[col] = df[col].round(2)

### Quitar columnas que ya se han convertido a porcentaje

In [137]:
# Lista de columnas que han sido convertidas a porcentaje
columns_to_remove = [
    f"pop_total_male_{CENSUS_YEAR}",
    f"pop_total_female_{CENSUS_YEAR}",
    f"pop_18_39_male_{CENSUS_YEAR}",
    f"pop_18_39_female_{CENSUS_YEAR}",
    f"pop_40_64_male_{CENSUS_YEAR}",
    f"pop_40_64_female_{CENSUS_YEAR}",
    f"pop_over_65_male_{CENSUS_YEAR}",
    f"pop_over_65_female_{CENSUS_YEAR}",
    f"veterans_{CENSUS_YEAR}",
    f"inmigrants_{CENSUS_YEAR}",
    f"labor_force_{CENSUS_YEAR}",

    # RACE
    f"white_{CENSUS_YEAR}",
    f"black_{CENSUS_YEAR}",
    f"native_{CENSUS_YEAR}",
    f"asian_{CENSUS_YEAR}",
    f"pacific_{CENSUS_YEAR}",
    f"other_{CENSUS_YEAR}",
    f"hispanic_{CENSUS_YEAR}",
    f"two_more_races_{CENSUS_YEAR}",

    # EDUCATION
    f"high_school_{CENSUS_YEAR}",
    f"bachelors_{CENSUS_YEAR}",

    # INCOME
    f"poverty_{CENSUS_YEAR}",

    # HOUSING
    f"households_renter_{CENSUS_YEAR}",
    f"households_owner_{CENSUS_YEAR}",
    #f"households_limited_english_{CENSUS_YEAR}",
    #f"households_no_internet_{CENSUS_YEAR}",

    # EMPLOYMENT
    f"unemployment_{CENSUS_YEAR}",
    f"public_transport_{CENSUS_YEAR}",

    # HEALTH
    f"no_health_insurance_{CENSUS_YEAR}"
]

# Eliminar las columnas originales
df.drop(columns=columns_to_remove, inplace=True, errors="ignore")

In [138]:
df

Unnamed: 0,state,county,pop_total_2015,median_age_2015,median_income_2015,households_median_value_2015,households_avg_size_2015,households_total_2015,households_median_gross_rent_2015,mean_travel_time_2015,...,hispanic_rate_2015,two_more_races_rate_2015,high_school_rate_2015,bachelors_rate_2015,poverty_rate_2015,households_renter_rate_2015,households_owner_rate_2015,unemployment_rate_2015,public_transport_rate_2015,no_health_insurance_rate_2015
0,01,001,55221,37.7,51281.0,141300.0,2.68,20396,883.0,23675,...,2.61,1.57,17.86,8.87,12.78,26.08,73.92,7.64,0.04,9.97
1,01,003,195121,42.2,50254.0,169300.0,2.60,74104,879.0,81184,...,4.50,1.74,16.53,13.52,13.22,28.48,71.52,7.50,0.06,12.81
2,01,005,26932,38.8,32964.0,92200.0,2.61,9222,579.0,8329,...,4.61,1.33,18.29,5.24,23.84,36.41,63.59,17.65,0.13,13.86
3,01,007,22604,38.9,38678.0,102700.0,2.95,7027,651.0,8089,...,2.22,1.37,23.59,4.19,15.43,24.89,75.11,8.31,0.19,8.88
4,01,009,57710,40.7,45813.0,119800.0,2.74,20816,601.0,21450,...,8.63,1.70,18.75,5.88,16.55,21.10,78.90,7.67,0.15,11.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,72,145,56858,38.5,16948.0,116200.0,3.27,17309,549.0,13430,...,96.39,1.39,6.87,10.37,48.12,17.05,82.95,15.19,0.21,6.99
3216,72,147,9130,42.8,18104.0,112700.0,3.29,2773,590.0,2769,...,96.70,1.12,21.85,7.79,39.09,16.44,83.56,12.22,0.30,8.12
3217,72,149,24685,35.9,17818.0,95800.0,3.06,8003,329.0,6270,...,99.68,27.83,20.87,8.76,53.57,23.90,76.10,25.89,0.03,3.29
3218,72,151,36279,39.6,15627.0,91600.0,2.97,12171,419.0,7420,...,99.78,2.44,16.01,7.86,52.09,28.63,71.37,24.28,0.00,4.70


### Crear County FIPS

In [139]:
df["county_fips"] = df["state"].astype(str) + df["county"].astype(str).str.zfill(3)

### Guardar en CSV

In [140]:
df.to_csv(f"data/census_data_acs_{CENSUS_YEAR}.csv", index=False)
df

Unnamed: 0,state,county,pop_total_2015,median_age_2015,median_income_2015,households_median_value_2015,households_avg_size_2015,households_total_2015,households_median_gross_rent_2015,mean_travel_time_2015,...,two_more_races_rate_2015,high_school_rate_2015,bachelors_rate_2015,poverty_rate_2015,households_renter_rate_2015,households_owner_rate_2015,unemployment_rate_2015,public_transport_rate_2015,no_health_insurance_rate_2015,county_fips
0,01,001,55221,37.7,51281.0,141300.0,2.68,20396,883.0,23675,...,1.57,17.86,8.87,12.78,26.08,73.92,7.64,0.04,9.97,01001
1,01,003,195121,42.2,50254.0,169300.0,2.60,74104,879.0,81184,...,1.74,16.53,13.52,13.22,28.48,71.52,7.50,0.06,12.81,01003
2,01,005,26932,38.8,32964.0,92200.0,2.61,9222,579.0,8329,...,1.33,18.29,5.24,23.84,36.41,63.59,17.65,0.13,13.86,01005
3,01,007,22604,38.9,38678.0,102700.0,2.95,7027,651.0,8089,...,1.37,23.59,4.19,15.43,24.89,75.11,8.31,0.19,8.88,01007
4,01,009,57710,40.7,45813.0,119800.0,2.74,20816,601.0,21450,...,1.70,18.75,5.88,16.55,21.10,78.90,7.67,0.15,11.54,01009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,72,145,56858,38.5,16948.0,116200.0,3.27,17309,549.0,13430,...,1.39,6.87,10.37,48.12,17.05,82.95,15.19,0.21,6.99,72145
3216,72,147,9130,42.8,18104.0,112700.0,3.29,2773,590.0,2769,...,1.12,21.85,7.79,39.09,16.44,83.56,12.22,0.30,8.12,72147
3217,72,149,24685,35.9,17818.0,95800.0,3.06,8003,329.0,6270,...,27.83,20.87,8.76,53.57,23.90,76.10,25.89,0.03,3.29,72149
3218,72,151,36279,39.6,15627.0,91600.0,2.97,12171,419.0,7420,...,2.44,16.01,7.86,52.09,28.63,71.37,24.28,0.00,4.70,72151
