In [1]:
import requests
import csv
import os 
import pandas as pd
import re

In [2]:
from uscensus_functions import uscensus_importcsv

from uscensus_functions import uscensus_modify

In [None]:
# Specify the desired variables and rename them
name_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S_": "Population",
}

# Specify the year of the data
name_year = "2000"

# Specify the file name for the csv file
name_output_file_name = "name_county_2023.csv"

name_output_file_path, name_specific_variables = uscensus_importcsv(name_dictionary, name_year, name_output_file_name)

uscensus_modify(name_output_file_path, name_specific_variables)

In [3]:
# Specify the folder where the file will be saved
data_folder = "data_uscensus"

# Specify the text file where the US Census API is stored
api_file = "apikey_uscensus.txt"

# Different data series require different api requests 
# https://censusreporter.org/topics/table-codes/
# check api link for specific table 
series_dictionary = {
    "B": "",
    "S": "/subject",
    "DP": "/profile"
}

# Specify the year of the data
year = "2023"

## Demographics

Link: 
- https://data.census.gov/table/ACSDP5Y2023.DP05?q=county&y=2023

Variables: 
- median_age:
    - Median Age across the total population.
    - "DP05_0018E","Estimate!!SEX AND AGE!!Total population!!Median age (years)"
- sex_ratio
    - Number of males per 100 females.
    - "DP05_0004E","Estimate!!SEX AND AGE!!Total population!!Sex ratio (males per 100 females)"

In [None]:
demographics_dictionary = {
    "DP05_0018E": "median_age",
    "DP05_0004E": "sex_ratio"
}

demographics_year = "2023"

demographics_output_file_name = "demographics_county_2023.csv"

In [None]:
demographics_output_file_path, demographics_specific_variables = uscensus_importcsv(demographics_dictionary, demographics_year, demographics_output_file_name)

uscensus_modify(demographics_output_file_path, demographics_specific_variables)

In [None]:
demographics_df = pd.read_csv("./datatest/demographics_county_2023.csv", dtype={"FIPS Code": str})

demographics_df.head()

## Education

Link: 
- https://data.census.gov/table/ACSST5Y2023.S1501?q=county+education&y=2023 

Variables: 
- highschool_or_higher_pct:
    - Median Age across the total population.
    - "DP05_0018E","Estimate!!SEX AND AGE!!Total population!!Median age (years)"
- bachelors_or_higher_pct:
    - Percentage of the Population (aged 25 years or older) that has a bachelors degree or higher.
    - bachelors_or_higher_pct = pop25_highschool_or_higher / pop25
    - "S1501_C01_006E","Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over”

Variables:
- pop25 


In [None]:
demographics_dictionary = {
    "DP05_0018E": "median_age",
    "DP05_0004E": "sex_ratio"
}

demographics_year = "2023"

demographics_output_file_name = "demographics_county_2023.csv"

In [None]:
demographics_output_file_path, demographics_specific_variables = uscensus_importcsv(demographics_dictionary, demographics_year, demographics_output_file_name)

uscensus_modify(demographics_output_file_path, demographics_specific_variables)

In [None]:
demographics_df = pd.read_csv("./datatest/demographics_county_2023.csv", dtype={"FIPS Code": str})

demographics_df.head()

In [None]:
education_dictionary = {
    "S1501_C01_006E": "pop25",
    "S1501_C01_014E": "pop25_highschool_or_higher",
    "S1501_C01_015E": "pop25_bachelor_or_higher"
}

education_output_file_name = "education_county_2023.csv"

In [None]:
education_output_file_path, education_specific_variables = uscensus_importcsv(education_dictionary, education_year, education_output_file_name)

uscensus_modify(education_output_file_path, education_specific_variables)

In [None]:
# Specify the desired variables and rename them
internetaccess_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "B28011_001E": "Total Households",
    "B28011_002E": "Households!With an Internet Subscription"
}

internetaccess_year = "2023"

# Specify the file name for the csv file
internetaccess_output_file_name = "internetaccess_county_2023.csv"

In [None]:
internetaccess_output_file_path, internetaccess_specific_variables = uscensus_importcsv(internetaccess_dictionary, internetaccess_year, internetaccess_output_file_name)

uscensus_modify(internetaccess_output_file_path, internetaccess_specific_variables)

In [None]:
# Specify the desired variables and rename them
householdsize_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "DP02_0001E": "Total Households",
    "DP02_0016E": "Households!Average Household Size"
}

# Specify the desired year of the data
householdsize_year = "2023"

# Specify the file name for the csv file
householdsize_output_file_name = "householdsize_county_2023.csv"

In [None]:
householdsize_output_file_path, householdsize_specific_variables = uscensus_importcsv(householdsize_dictionary, householdsize_year, householdsize_output_file_name)

uscensus_modify(householdsize_output_file_path, householdsize_specific_variables)

In [None]:
# Specify the desired variables and rename them
meanincome_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S1901_C01_001E": "Total Households",
    "S1901_C01_013E": "Households!Mean Income"
}

# Specify the desired year of the data
meanincome_year = "2023"

# Specify the file name for the csv file
meanincome_output_file_name = "meanincome_county_2023.csv"

In [None]:
meanincome_output_file_path, meanincome_specific_variables = uscensus_importcsv(meanincome_dictionary, meanincome_year, meanincome_output_file_name)

uscensus_modify(meanincome_output_file_path, meanincome_specific_variables)

In [None]:
# Specify the desired variables and rename them
industrycomposition_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S2405_C01_001E": "Total 16+ Employed",
    "S2405_C01_002E": "16+ Employed!Agriculture, forestry, fishing and hunting, and mining",
    "S2405_C01_003E": "16+ Employed!Construction",
    "S2405_C01_004E": "16+ Employed!Manufacturing",
    "S2405_C01_005E": "16+ Employed!Wholesale Trade",
    "S2405_C01_006E": "16+ Employed!Retail Trade",
    "S2405_C01_007E": "16+ Employed!Transportation and warehousing, and utilities",
    "S2405_C01_008E": "16+ Employed!Information",
    "S2405_C01_009E": "16+ Employed!Finance and insurance, and real estate and rental and leasing",
    "S2405_C01_010E": "16+ Employed!Professional, scientific, and management, and administrative and waste management services",
    "S2405_C01_011E": "16+ Employed!Educational services, and health care and social assistance",
    "S2405_C01_012E": "16+ Employed!Arts, entertainment, and recreation, and accommodation and food services",
    "S2405_C01_013E": "16+ Employed!Other services, except public administration",
    "S2405_C01_014E": "16+ Employed!Public administration"
}

# Specify the desired year of the data
industrycomposition_year = "2023"

# Specify the file name for the csv file
industrycomposition_output_file_name = "industrycomposition_county_2023.csv"

In [None]:
industrycomposition_output_file_path, industrycomposition_specific_variables = uscensus_importcsv(industrycomposition_dictionary, industrycomposition_year, industrycomposition_output_file_name)

uscensus_modify(industrycomposition_output_file_path, industrycomposition_specific_variables)

In [None]:
# Specify the desired variables and rename them
fertilityrate_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S1301_C01_001E": "Total Women 15 to 50 years",
    "S1301_C02_001E": "Women 15 to 50 years!Women with births in the last 12 months",
}

# Specify the desired year of the data
fertilityrate_year = "2023"

# Specify the file name for the csv file
fertilityrate_output_file_name = "fertilityrate_county_2023.csv"

In [None]:
fertilityrate_output_file_path, fertilityrate_specific_variables = uscensus_importcsv(fertilityrate_dictionary, fertilityrate_year, fertilityrate_output_file_name)

uscensus_modify(fertilityrate_output_file_path, fertilityrate_specific_variables)

### Total Population (compare to existing)
- Median Age
- Sex Ratio
- https://data.census.gov/table/ACSDP5Y2023.DP05?q=county&y=2023
- "DP05_0001E","Estimate!!SEX AND AGE!!Total population"
- "DP05_0018E","Estimate!!SEX AND AGE!!Total population!!Median age (years)"
- "DP05_0004E","Estimate!!SEX AND AGE!!Total population!!Sex ratio (males per 100 females)"


### Limited English Speaking Households Percentage 
- https://data.census.gov/table/ACSST5Y2023.S1602?q=county&y=2023
- "S1602_C04_001E","Estimate!!Percent limited English-speaking households!!All households"

In [5]:
# Specify the desired variables and rename them
limitedenglish_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S1602_C04_001E": "Households!LimitedEnglishSpeaking!Percent"
}

# Specify the desired year of the data
limitedenglish_year = "2023"

# Specify the file name for the csv file
limitedenglish_output_file_name = "limitedenglish_county_2023.csv"

In [8]:
limitedenglish_output_file_path, limitedenglish_specific_variables = uscensus_importcsv(limitedenglish_dictionary, limitedenglish_year, limitedenglish_output_file_name)

uscensus_modify(limitedenglish_output_file_path, limitedenglish_specific_variables)

NAME,S1602_C04_001E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S1602_C04_001E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
['NAME', 'S1602_C04_001E', 'state', 'county']
['State', 'County', 'Name', 'Households!LimitedEnglishSpeaking!Percent']
Imported Variables:['Households!LimitedEnglishSpeaking!Percent']
Data saved to datatest\limitedenglish_county_2023.csv
datatest\limitedenglish_county_2023.csv has been modified and saved


### Median Household Income
- https://data.census.gov/table/ACSST5Y2023.S1903?q=county&y=2023
- "S1903_C03_001E","Estimate!!Median income (dollars)!!HOUSEHOLD INCOME BY RACE AND HISPANIC OR LATINO ORIGIN OF HOUSEHOLDER!!Households"

In [9]:
# Specify the desired variables and rename them
medianincome_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S1903_C03_001E": "Households!MedianIncome"
}

# Specify the desired year of the data
medianincome_year = "2023"

# Specify the file name for the csv file
medianincome_output_file_name = "medianincome_county_2023.csv"

In [10]:
medianincome_output_file_path, medianincome_specific_variables = uscensus_importcsv(medianincome_dictionary, medianincome_year, medianincome_output_file_name)

uscensus_modify(medianincome_output_file_path, medianincome_specific_variables)

NAME,S1903_C03_001E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S1903_C03_001E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
['NAME', 'S1903_C03_001E', 'state', 'county']
['State', 'County', 'Name', 'Households!MedianIncome']
Imported Variables:['Households!MedianIncome']
Data saved to datatest\medianincome_county_2023.csv
datatest\medianincome_county_2023.csv has been modified and saved


### Labor Force Participation Rate
- https://data.census.gov/table/ACSST5Y2023.S2301?q=county&y=2023
- "S2301_C02_001E","Estimate!!Labor Force Participation Rate!!Population 16 years and over"


In [11]:
# Specify the desired variables and rename them
laborforceparticipation_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S2301_C02_001E": "LaborForceParticipation Rate"
}

# Specify the desired year of the data
laborforceparticipation_year = "2023"

# Specify the file name for the csv file
laborforceparticipation_output_file_name = "laborforceparticipation_county_2023.csv"

In [12]:
laborforceparticipation_output_file_path, laborforceparticipation_specific_variables = uscensus_importcsv(laborforceparticipation_dictionary, laborforceparticipation_year, laborforceparticipation_output_file_name)

uscensus_modify(laborforceparticipation_output_file_path, laborforceparticipation_specific_variables)

NAME,S2301_C02_001E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S2301_C02_001E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
['NAME', 'S2301_C02_001E', 'state', 'county']
['State', 'County', 'Name', 'LaborForceParticipation Rate']
Imported Variables:['LaborForceParticipation Rate']
Data saved to datatest\laborforceparticipation_county_2023.csv
datatest\laborforceparticipation_county_2023.csv has been modified and saved


### Monthly Housing Cost
- https://data.census.gov/table/ACSST5Y2023.S2503?q=county&y=2023
- "S2503_C01_024E","Estimate!!Occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS!!Median (dollars)"
- "S2503_C03_024E","Estimate!!Owner-occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS!!Median (dollars)"
- "S2503_C05_024E","Estimate!!Renter-occupied housing units!!Occupied housing units!!MONTHLY HOUSING COSTS!!Median (dollars)"

In [15]:
# Specify the desired variables and rename them
housingcost_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S2503_C01_024E": "Median MonthlyHousingCost",
    "S2503_C03_024E": "OwnerOccupied!Median MonthlyHousingCost",
    "S2503_C05_024E": "RenterOccupied!Median MonthlyHousingCost"
}

# Specify the desired year of the data
housingcost_year = "2023"

# Specify the file name for the csv file
housingcost_output_file_name = "housingcost_county_2023.csv"

In [16]:
housingcost_output_file_path, housingcost_specific_variables = uscensus_importcsv(housingcost_dictionary, housingcost_year, housingcost_output_file_name)

uscensus_modify(housingcost_output_file_path, housingcost_specific_variables)

NAME,S2503_C01_024E,S2503_C03_024E,S2503_C05_024E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S2503_C01_024E,S2503_C03_024E,S2503_C05_024E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
['NAME', 'S2503_C01_024E', 'S2503_C03_024E', 'S2503_C05_024E', 'state', 'county']
['State', 'County', 'Name', 'Median MonthlyHousingCost', 'OwnerOccupied!Median MonthlyHousingCost', 'RenterOccupied!Median MonthlyHousingCost']
Imported Variables:['Median MonthlyHousingCost', 'OwnerOccupied!Median MonthlyHousingCost', 'RenterOccupied!Median MonthlyHousingCost']
Data saved to datatest\housingcost_county_2023.csv
datatest\housingcost_county_2023.csv has been modified and saved


### Percentage of Households that rent or own their home:
- https://data.census.gov/table/ACSST5Y2023.S1101?q=county&y=2023
- "S1101_C01_001E","Estimate!!Total!!HOUSEHOLDS!!Total households"
- "S1101_C01_018E","Estimate!!Total!!Total households!!HOUSING TENURE!!Owner-occupied housing units"
- "S1101_C01_019E","Estimate!!Total!!Total households!!HOUSING TENURE!!Renter-occupied housing units"

In [19]:
# Specify the desired variables and rename them
housingtype_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S1101_C01_018E": "Households!OwnerOccupiedUnits!Percent",
    "S1101_C01_019E": "Households!RenterOccupiedUnits!Percent"
}

# Specify the desired year of the data
housingtype_year = "2023"

# Specify the file name for the csv file
housingtype_output_file_name = "housingtype_county_2023.csv"

In [20]:
housingtype_output_file_path, housingtype_specific_variables = uscensus_importcsv(housingtype_dictionary, housingtype_year, housingtype_output_file_name)

uscensus_modify(housingtype_output_file_path, housingtype_specific_variables)

NAME,S1101_C01_018E,S1101_C01_019E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S1101_C01_018E,S1101_C01_019E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
Existing file found, removing...
['NAME', 'S1101_C01_018E', 'S1101_C01_019E', 'state', 'county']
['State', 'County', 'Name', 'Households!OwnerOccupiedUnits!Percent', 'Households!RenterOccupiedUnits!Percent']
Imported Variables:['Households!OwnerOccupiedUnits!Percent', 'Households!RenterOccupiedUnits!Percent']
Data saved to datatest\housingtype_county_2023.csv
datatest\housingtype_county_2023.csv has been modified and saved


### Health Insurance Percentage
- https://data.census.gov/table/ACSST5Y2023.S2701?q=county&y=2023
- "S2701_C03_001E","Estimate!!Percent Insured!!Civilian noninstitutionalized population"

In [21]:
# Specify the desired variables and rename them
healthinsurance_dictionary = {
    # "USCensusVariable": "Desired Name" 
    "S2701_C03_001E": "PopulationNonInstitutionalized!Insured!Percent"
}

# Specify the desired year of the data
healthinsurance_year = "2023"

# Specify the file name for the csv file
healthinsurance_output_file_name = "healthinsurance_county_2023.csv"

In [22]:
healthinsurance_output_file_path, healthinsurance_specific_variables = uscensus_importcsv(healthinsurance_dictionary, healthinsurance_year, healthinsurance_output_file_name)

uscensus_modify(healthinsurance_output_file_path, healthinsurance_specific_variables)

NAME,S2701_C03_001E
All variables belong to S series, API link will be adjusted accordingly.
https://api.census.gov/data/2023/acs/acs5/subject?get=NAME,S2701_C03_001E&for=county:*&key=64db02f0ff22a5b790004c6424221aeb9d642921
['NAME', 'S2701_C03_001E', 'state', 'county']
['State', 'County', 'Name', 'PopulationNonInstitutionalized!Insured!Percent']
Imported Variables:['PopulationNonInstitutionalized!Insured!Percent']
Data saved to datatest\healthinsurance_county_2023.csv
datatest\healthinsurance_county_2023.csv has been modified and saved
