In [2]:
import requests
import pandas as pd
import os

### helpful links
### https://api.census.gov/data/2019/acs/acs5/variables.html
### https://api.census.gov/data/2019/acs/acs5/examples.html

In [3]:
#get present working directory
pwd = os.getcwd()

#get api_key
with open('census_api_key.txt') as key:
    api_key = key.read().strip()

In [41]:
#url = f'https://api.census.gov/data/2010/dec/sf1'
#base url to scrape data from for income and education data
ac_url = f'https://api.census.gov/data/2019/acs/acs5'


#index for states we are interested in
state_ids =[41,42,44,45,46,47,48,49,50,51,53,54,55,56]

#dictionary of race ID's and column names
race_colm = columns={'B02001_001E' : 'Total',
                    'B02001_002E' : 'Total White alone',
                    'B02001_003E' : 'Total Black or African American alone',
                    'B02001_004E' : 'Total American Indian and Alaska native alone',
                    'B02001_005E' : 'Total Asian alone',
                    'B02001_006E' : 'Total Native Hawaiian and Other Pacific Islander alone',
                    'B02001_007E' : 'Total Some Other Race Alone',
                    'B02001_008E' : 'Total Two or More Races'}

#dictionary of age group ID's and column names
s_age_colm = columns={'B01001_001E' : 'Total',
                   'B01001_002E' : 'M total',
                   'B01001_003E' : 'm_<5_years',
                   'B01001_004E' : 'm_5_to_9_years',
                   'B01001_005E' : 'm_10_to_14_years',
                   'B01001_006E' : 'm_15_to_17_years',
                   'B01001_007E' : 'm_18_and_19_years',
                   'B01001_008E' : 'm_20_years',
                   'B01001_009E' : 'm_21_years',
                   'B01001_010E' : 'm_22_to_24_years',
                   'B01001_011E' : 'm_25_to_29_years',
                   'B01001_012E' : 'm_30_to_34_years',
                   'B01001_013E' : 'm_35_to_39_years',
                   'B01001_014E' : 'm_40_to_44_years',
                   'B01001_015E' : 'm_45_to_49_years',
                   'B01001_016E' : 'm_50_to_54_years',
                   'B01001_017E' : 'm_55_to_59_years',
                   'B01001_018E' : 'm_60_and_61_years',
                   'B01001_019E' : 'm_62_to_64_years',
                   'B01001_020E' : 'm_65_and_66_years',
                   'B01001_021E' : 'm_67_to_69_years',
                   'B01001_022E' : 'm_70_to_74_years',
                   'B01001_023E' : 'm_75_to_79_years',
                   'B01001_024E' : 'm_80_to_84_years',
                   'B01001_025E' : 'm_85_years_and_over',
                   'B01001_026E' : 'F total',
                   'B01001_027E' : 'f_<5_years',
                   'B01001_028E' : 'f_5_to_9_years',
                   'B01001_029E' : 'f_10_to_14_years',
                   'B01001_030E' : 'f_15_to_17_years',
                   'B01001_031E' : 'f_18_and_19_years',
                   'B01001_032E' : 'f_20_years',
                   'B01001_033E' : 'f_21_years',
                   'B01001_034E' : 'f_22_to_24_years',
                   'B01001_035E' : 'f_25_to_29_years',
                   'B01001_036E' : 'f_30_to_34_years',
                   'B01001_037E' : 'f_35_to_39_years',
                   'B01001_038E' : 'f_40_to_44_years',
                   'B01001_039E' : 'f_45_to_49_years',
                   'B01001_040E' : 'f_50_to_54_years',
                   'B01001_041E' : 'f_55_to_59_years',
                   'B01001_042E' : 'f_60_and_61_years',
                   'B01001_043E' : 'f_62_to_64_years',
                   'B01001_044E' : 'f_65_and_66_years',
                   'B01001_045E' : 'f_67_to_69_years',
                   'B01001_046E' : 'f_70_to_74_years',
                   'B01001_047E' : 'f_75_to_79_years',
                   'B01001_048E' : 'f_80_to_84_years',
                   'B01001_049E' : 'f_85_years_and_over'}

#dictionary of household income ID's and column names
h_inc_colm = columns={'B19001_001E' : 'Total',
                    'B19001_002E' : 'Less than $10,000',
                    'B19001_003E' : '$10,000 to $14,999',
                    'B19001_004E' : '$15,000 to $19,999',
                    'B19001_005E' : '$20,000 to $24,999',
                    'B19001_006E' : '$25,000 to $29,999',
                    'B19001_007E' : '$30,000 to $34,999',
                    'B19001_008E' : '$35,000 to $39,999',
                    'B19001_009E' : '$40,000 to $44,999',
                    'B19001_010E' : '$45,000 to $49,999',
                    'B19001_011E' : '$50,000 to $59,999',
                    'B19001_012E' : '$60,000 to $74,999',
                    'B19001_013E' : '$75,000 to $99,999',
                    'B19001_014E' : '$100,000 to $124,999',
                    'B19001_015E' : '$125,000 to $149,999',
                    'B19001_016E' : '$150,000 to $199,999',
                    'B19001_017E' : '$200,000 or more'}


#dictionary of sex by education group ID's and column names
s_edu_colm = columns={'B15002_001E' : 'Estimate Total',
                   'B15002_002E' : 'Estimate Total Male',
                   'B15002_003E' : 'Estimate Total Male, No schooling completed',
                   'B15002_004E' : 'Estimate Total Male, Nursery to 4th grade',
                   'B15002_005E' : 'Estimate Total Male, 5th and 6th grade',
                   'B15002_006E' : 'Estimate Total Male, 7th and 8th grade',
                   'B15002_007E' : 'Estimate Total Male, 9th grade',
                   'B15002_008E' : 'Estimate Total Male, 10th grade',
                   'B15002_009E' : 'Estimate Total Male, 11th grade',
                   'B15002_010E' : 'Estimate Total Male, 12th grade no diploma',
                   'B15002_011E' : 'Estimate Total Male, Highschool graduate (includes equivalency)',
                   'B15002_012E' : 'Estimate Total Male, Some college, less than 1 year',
                   'B15002_013E' : 'Estimate Total Male, Some college, 1 or more years no degree',
                   'B15002_014E' : 'Estimate Total Male, Associates degree',
                   'B15002_015E' : 'Estimate Total Male, Bachelors degree',
                   'B15002_016E' : 'Estimate Total Male, Masters degree',
                   'B15002_017E' : 'Estimate Total Male, Professional school degree',
                   'B15002_018E' : 'Estimate Total Male, Doctorate degree',
                   'B15002_019E' : 'Estimate Total Female',
                   'B15002_020E' : 'Estimate Total Female, No schooling completed',
                   'B15002_021E' : 'Estimate Total Female, Nursery to 4th grade',
                   'B15002_022E' : 'Estimate Total Female, 5th and 6th grade',
                   'B15002_022E' : 'Estimate Total Female, 7th and 8th grade',
                   'B15002_024E' : 'Estimate Total Female, 9th grade',
                   'B15002_025E' : 'Estimate Total Female, 10th grade',
                   'B15002_026E' : 'Estimate Total Female, 11th grade',
                   'B15002_027E' : 'Estimate Total Female, 12th grade no diploma',
                   'B15002_028E' : 'Estimate Total Female, Highschool graduate (includes equivalency)',
                   'B15002_029E' : 'Estimate Total Female, Some college, less than 1 year',
                   'B15002_030E' : 'Estimate Total Female, Some college, 1 or more years no degree',
                   'B15002_031E' : 'Estimate Total Female, Associates degree',
                   'B15002_032E' : 'Estimate Total Female, Bachelors degree',
                   'B15002_033E' : 'Estimate Total Female, Masters degree',
                   'B15002_034E' : 'Estimate Total Female, Professional school degree',
                   'B15002_035E' : 'Estimate Total Female, Doctorate degree'}

#dictionary of education group ID's and column names
edu_colm = columns={'B15003_001E' : 'Estimate Total',
                   'B15003_002E' : 'No schooling completed',
                   'B15003_003E' : 'Nursery School',
                   'B15003_004E' : 'Kindergarten',
                   'B15003_005E' : '1st Grade',
                   'B15003_006E' : '2nd Grade',
                   'B15003_007E' : '3rd Grade',
                   'B15003_008E' : '4th Grade',
                   'B15003_009E' : '5th Grade',
                   'B15003_010E' : '6th Grade',
                   'B15003_011E' : '7th Grade',
                   'B15003_012E' : '8th Grade',
                   'B15003_013E' : '9th grade',
                   'B15003_014E' : '10th grade',
                   'B15003_015E' : '11th grade',
                   'B15003_016E' : '12th grade no diploma',
                   'B15003_017E' : 'Regular high school diploma',
                   'B15003_018E' : 'GED or alternative credential',
                   'B15003_019E' : 'Some college, less than 1 year',
                   'B15003_020E' : 'Some college, 1 or more years no degree',
                   'B15003_021E' : 'Associates degree',
                   'B15003_022E' : 'Bachelors degree',
                   'B15003_023E' : 'Masters degree',
                   'B15003_024E' : 'Professional school degree',
                   'B15003_025E' : 'Doctorate degree'}

#base_url is a list
#api_key is an api key in the form of a string
#state_ids is a list of integers, indexing the states
#census_colm is a dictionary of key, value pairs, where key == column id and value == column name
#returns a dataframe
def census_scraper(base_url, api_key, state_ids, census_colm):
    
    colm = [key for key in census_colm]
    colm = ','.join(colm)
    
    sample = []
    
    for i in state_ids:
        data_url = f'{base_url}?get={colm}&for=block%20group:*&in=state:{i}&in=county:*&in=tract:*&key={api_key}'

        response=requests.get(data_url)
        data = response.json()
            
        df=pd.DataFrame(data[1:], columns=data[0]).rename(columns= census_colm)
            
        sample.append(df)

    final_df = pd.concat(sample)
    
    return final_df

In [42]:
#create race csv
race_df = census_scraper(ac_url,api_key,state_ids,race_colm)
race_df.to_csv(pwd+'\\race_raw.csv', index = False)

#create household income csv
h_income_df = census_scraper(ac_url,api_key,state_ids,h_inc_colm)
h_income_df.to_csv(pwd+'\\h_inc_raw.csv', index = False)

#create sex by age csv
s_age_df = census_scraper(ac_url,api_key,state_ids,s_age_colm)
s_age_df.to_csv(pwd+'\\s_age_raw.csv', index = False)

#create sex by education csv
s_edu_df = census_scraper(ac_url,api_key,state_ids,s_edu_colm)
s_edu_df.to_csv(pwd+'\\s_edu_raw.csv', index = False)

#create education csv
edu_df = census_scraper(ac_url,api_key,state_ids,edu_colm)
edu_df.to_csv(pwd+'\\edu_raw.csv')