# Setup

In [3]:
!pip install fsspec



In [1]:
import requests
import pandas as pd
import pathlib

In [5]:
census_api_key = '529389300296df4951e09f06b7628da596dd8eeb'

In [2]:
DATA_PATH = pathlib.Path().resolve() / "data"

# Call API

## Set up API Call

In [6]:
# Set Census API URL and year
host = 'https://api.census.gov/data'
year = '/2018'

# We are using data from the 2018 ACS
dataset_acronym = '/acs/acs1/subject/variables'

# Set the action to get
g = '?get='

# Set the state to get
# location = '&for=state:*'
# Try with codes - setting for all counties in connecticut
location = '&for=county:*&in=state:09'

# Set the user key
usr_key = f"&key={census_api_key}"

## Set the variables to extract

In [23]:
# See here https://api.census.gov/data/2018/acs/acs1/subject/groups/S1901.html
series = "S1901_C01_"
all_categories = [series+f'{x:03}'+"E" for x in range(1,14)]

In [24]:
variables = ','.join(all_categories)
variables

'S1901_C01_001E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E,S1901_C01_012E,S1901_C01_013E'

In [25]:
# Let's also name them using a dictionary
var_names = {
    "S1901_C01_001E" : "Households - Total",
    "S1901_C01_002E" : "Less than 10,000",
    "S1901_C01_003E" : "10,000 to 14,999",
    "S1901_C01_004E" : "15,000 to 24,999",
    "S1901_C01_005E" : "25,000 to 34,999",
    "S1901_C01_006E" : "34,000 to 49,999",
    "S1901_C01_007E" : "50,000 to 75,999",
    "S1901_C01_008E" : "75,000 to 99,999", 
    "S1901_C01_009E" : "100,000 to 149,999",
    "S1901_C01_010E" : "150,000 to 199,999",
    "S1901_C01_011E" : "200,000 or more",
    "S1901_C01_012E" : "Median",
    "S1901_C01_013E" : "Mean"
}

## Create the call

In [26]:
#Put it all together in one f-string:
query_url = f"{host}{year}{dataset_acronym}{g}{variables}{location}{usr_key}"

In [27]:
# Check the query url
query_url

'https://api.census.gov/data/2018/acs/acs1/subject/variables?get=S1901_C01_001E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E,S1901_C01_005E,S1901_C01_006E,S1901_C01_007E,S1901_C01_008E,S1901_C01_009E,S1901_C01_010E,S1901_C01_011E,S1901_C01_012E,S1901_C01_013E&for=county:*&in=state:09&key=529389300296df4951e09f06b7628da596dd8eeb'

In [28]:
# Use requests package to call out to the API
response = requests.get(query_url)

In [13]:
response

<Response [200]>

In [29]:
# Convert JSON response into dataframe
df = pd.read_json(response.text)

# Set the first row as the column names
df. columns = df.iloc[0] 

# Now get rid of the first row, that is now duplicated
df = df[1:]

# Now reset the index
df = df.reset_index(drop=True)

# Set the column names
df.rename(columns=var_names, inplace=True)

In [30]:
# Save
df.to_csv(DATA_PATH / "S1901_CT.csv")

# Direct import of income by ZIP

In [7]:
income_by_zip = pd.read_csv(DATA_PATH /"income_by_zip"/ "ACSST5Y2018.S1901-Data.csv", header = [0,1])

In [35]:
income_by_zip_small = income_by_zip[["GEO_ID", "NAME", "S1901_C01_012E"]]
income_by_zip_small["ZIP"] = income_by_zip_small["NAME"]["Geographic Area Name"].str[-6:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  income_by_zip_small["ZIP"] = income_by_zip_small["NAME"]["Geographic Area Name"].str[-6:]


In [37]:
income_by_zip_small.to_csv(DATA_PATH / "S1901_CT_income_by_zip.csv")

In [10]:
list(income_by_zip.columns)

[('GEO_ID', 'Geography'),
 ('NAME', 'Geographic Area Name'),
 ('S1901_C01_001E', 'Estimate!!Households!!Total'),
 ('S1901_C01_001EA', 'Annotation of Estimate!!Households!!Total'),
 ('S1901_C01_001M', 'Margin of Error!!Households MOE!!Total'),
 ('S1901_C01_001MA', 'Annotation of Margin of Error!!Households MOE!!Total'),
 ('S1901_C01_002E', 'Estimate!!Households!!Total!!Less than $10,000'),
 ('S1901_C01_002EA',
  'Annotation of Estimate!!Households!!Total!!Less than $10,000'),
 ('S1901_C01_002M',
  'Margin of Error!!Households MOE!!Total!!Less than $10,000'),
 ('S1901_C01_002MA',
  'Annotation of Margin of Error!!Households MOE!!Total!!Less than $10,000'),
 ('S1901_C01_003E', 'Estimate!!Households!!Total!!$10,000 to $14,999'),
 ('S1901_C01_003M',
  'Margin of Error!!Households MOE!!Total!!$10,000 to $14,999'),
 ('S1901_C01_003MA',
  'Annotation of Margin of Error!!Households MOE!!Total!!$10,000 to $14,999'),
 ('S1901_C01_003EA',
  'Annotation of Estimate!!Households!!Total!!$10,000 to $1