# Setup

In [3]:
!pip install fsspec



In [17]:
import requests
import pandas as pd
import pathlib

In [5]:
census_api_key = '529389300296df4951e09f06b7628da596dd8eeb'

In [18]:
DATA_PATH = pathlib.Path().resolve() / "data"

# Call API

## Set up API Call

In [6]:
# Set Census API URL and year
host = 'https://api.census.gov/data'
year = '/2018'

# We are using data from the 2018 ACS
dataset_acronym = '/acs/acs1/subject/variables'

# Set the action to get
g = '?get='

# Set the state to get
# location = '&for=state:*'
# Try with codes - setting for all counties in connecticut
location = '&for=county:*&in=state:09'

# Set the user key
usr_key = f"&key={census_api_key}"

## Set the variables to extract

In [23]:
# See here https://api.census.gov/data/2018/acs/acs1/subject/groups/S1901.html
series = "S1901_C01_"
all_categories = [series+f'{x:03}'+"E" for x in range(1,14)]

In [24]:
variables = ','.join(all_categories)
variables

'S1901_C01_001E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E,S1901_C01_012E,S1901_C01_013E'

In [25]:
# Let's also name them using a dictionary
var_names = {
    "S1901_C01_001E" : "Households - Total",
    "S1901_C01_002E" : "Less than 10,000",
    "S1901_C01_003E" : "10,000 to 14,999",
    "S1901_C01_004E" : "15,000 to 24,999",
    "S1901_C01_005E" : "25,000 to 34,999",
    "S1901_C01_006E" : "34,000 to 49,999",
    "S1901_C01_007E" : "50,000 to 75,999",
    "S1901_C01_008E" : "75,000 to 99,999", 
    "S1901_C01_009E" : "100,000 to 149,999",
    "S1901_C01_010E" : "150,000 to 199,999",
    "S1901_C01_011E" : "200,000 or more",
    "S1901_C01_012E" : "Median",
    "S1901_C01_013E" : "Mean"
}

## Create the call

In [26]:
#Put it all together in one f-string:
query_url = f"{host}{year}{dataset_acronym}{g}{variables}{location}{usr_key}"

In [27]:
# Check the query url
query_url

'https://api.census.gov/data/2018/acs/acs1/subject/variables?get=S1901_C01_001E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E,S1901_C01_012E,S1901_C01_013E&for=county:*&in=state:09&key=529389300296df4951e09f06b7628da596dd8eeb'

In [28]:
# Use requests package to call out to the API
response = requests.get(query_url)

In [13]:
response

<Response [200]>

In [14]:
response.text

'[["S1901_C01_001E","S1901_C01_002E","S1901_C01_003E","S1901_C01_004E","S1901_C01_005E","S1901_C01_006E","S1901_C01_007E","S1901_C01_008E","S1901_C01_009E","S1901_C01_010E","S1901_C01_011E","state","county"],\n["335539","5.8","5.0","7.6","8.1","11.3","16.4","12.7","15.4","8.4","9.3","09","009"],\n["348049","5.7","4.1","8.0","7.7","10.2","15.1","12.4","17.3","8.8","10.7","09","003"],\n["73598","3.4","2.9","7.6","9.7","9.2","16.8","13.0","20.8","8.3","8.4","09","005"],\n["44571","6.8","3.6","8.7","5.8","11.2","21.6","14.4","15.8","7.1","4.9","09","015"],\n["108098","4.5","4.4","8.1","6.2","10.9","20.0","13.0","17.1","8.9","7.0","09","011"],\n["66983","3.5","2.3","6.2","6.9","9.3","14.7","12.6","20.6","11.6","12.2","09","007"],\n["55619","4.3","3.7","5.5","4.9","7.7","12.5","15.0","24.0","11.2","11.2","09","013"],\n["345634","5.4","3.0","6.0","6.1","9.0","13.3","10.5","15.2","10.6","20.8","09","001"]]'

In [29]:
# Convert JSON response into dataframe
df = pd.read_json(response.text)

# Set the first row as the column names
df. columns = df.iloc[0] 

# Now get rid of the first row, that is now duplicated
df = df[1:]

# Now reset the index
df = df.reset_index(drop=True)

# Set the column names
df.rename(columns=var_names, inplace=True)

In [30]:
# Save
df.to_csv(DATA_PATH / "S1901_CT.csv")