In [1]:
from MyCreds.mycreds import USCensusAPI
import requests
import pandas as pd

In [27]:
# API Reference: https://www.census.gov/data/developers/guidance/api-user-guide.Example_API_Queries.html
host = 'https://api.census.gov/data'
year = '/2019'
# Data Dictionary: https://api.census.gov/data.html
dataset_acronym = '/acs/acs1'
g = '?get='
# Variables for the ACS: https://api.census.gov/data/2005/acs/acs1/variables.html
variables = 'NAME,B01001_001E'
location = '&for=us:*'
usr_key = f"&key={USCensusAPI.api_key}"

query_url = f"{host}{year}{dataset_acronym}{g}{variables}{location}{usr_key}"

In [28]:
response = requests.get(query_url)

In [29]:
print(response.text)

[["NAME","B01001_001E","us"],
["United States","328239523","1"]]


B01001_001E is Estimated Total: Sex by Age without delineation.  In other words, this query is basically just returning 328,239,523 which is the total estimated US population in 2019.

Rather than going through and copying all the variable names from the reference table, I'm going to try and make things easier on myself and see if I can't just read that table in with pandas and extract the variable names.

In [31]:
variable_table_url = 'https://api.census.gov/data/2019/acs/acs1/variables.html'
v_table = pd.read_table(variable_table_url, skiprows=59)
v_table

  if (await self.run_code(code, result,  async_=asy)):


Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,text-align: right;
0,,,,,,,padding-right: 1em;
1,,,,,,,width: 3em;
2,,,,,,,//font-size: smaller;
3,,,,,,},
4,.tagquery tr td { vertical...,,,,,,
...,...,...,...,...,...,...,...
178279,</tr>,,,,,,
178280,</tbody>,,,,,,
178281,</table>,,,,,,
178282,</body>,,,,,,


Well, line 59 threw an error so I skipped it but the results aren't good. Because I'm too tired and lazy right now to figure out how to make that work properly, I'm going to give read_html a shot really quick.

In [32]:
variable_table_url = 'https://api.census.gov/data/2019/acs/acs1/variables.html'
v_table = pd.read_html(variable_table_url)
v_table

[                  Name                                       Label  \
 0               AIANHH                                   Geography   
 1                 ANRC                                   Geography   
 2          B01001_001E                            Estimate!!Total:   
 3          B01001_002E                     Estimate!!Total:!!Male:   
 4          B01001_003E      Estimate!!Total:!!Male:!!Under 5 years   
 ...                ...                                         ...   
 35551            STATE                                   Geography   
 35552         SUMLEVEL                          Summary Level code   
 35553               UA                                   Geography   
 35554            ucgid  Uniform Census Geography Identifier clause   
 35555  35555 variables                             35555 variables   
 
                                   Concept         Required  \
 0                                     NaN     not required   
 1                  

In [33]:
type(v_table)

list

In [37]:
variable_df = pd.DataFrame(v_table[0])
variable_df

Unnamed: 0,Name,Label,Concept,Required,Attributes,Limit,Predicate Type,Group,Unnamed: 8
0,AIANHH,Geography,,not required,,0,(not a predicate),,
1,ANRC,Geography,,not required,,0,(not a predicate),,
2,B01001_001E,Estimate!!Total:,SEX BY AGE,not required,"B01001_001EA, B01001_001M, B01001_001MA",0,int,B01001,
3,B01001_002E,Estimate!!Total:!!Male:,SEX BY AGE,not required,"B01001_002EA, B01001_002M, B01001_002MA",0,int,B01001,
4,B01001_003E,Estimate!!Total:!!Male:!!Under 5 years,SEX BY AGE,not required,"B01001_003EA, B01001_003M, B01001_003MA",0,int,B01001,
...,...,...,...,...,...,...,...,...,...
35551,STATE,Geography,,not required,,0,(not a predicate),,
35552,SUMLEVEL,Summary Level code,,not required,,0,string,,
35553,UA,Geography,,not required,,0,(not a predicate),,
35554,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,predicate-only,,0,ucgid,,


That's more like it!  This will make it easier to automate pulling out multiple variables and giving them more appropriate names than 'B01001_001E', for instance.

In [45]:
total_male_by_age_variables = ",".join(variable_df.iloc[4:26]['Name'].values)
total_male_by_age_variables

'B01001_003E,B01001_004E,B01001_005E,B01001_006E,B01001_007E,B01001_008E,B01001_009E,B01001_010E,B01001_011E,B01001_012E,B01001_013E,B01001_014E,B01001_015E,B01001_016E,B01001_017E,B01001_018E,B01001_019E,B01001_020E,B01001_021E,B01001_022E,B01001_023E,B01001_024E'

Ok, that gets me a string representation of all the variable names for the male population by age. I just picked those because they were at the top of the list.  I'm going to insert those into the API query and see what we get here.

In [51]:
# Only thing changing here is the variables which are substituted in under total_male_by_age_variables
m_query_url = f"{host}{year}{dataset_acronym}{g}{total_male_by_age_variables}{location}{usr_key}"

m_response = requests.get(m_query_url)

In [52]:
m_response.text

'[["B01001_003E","B01001_004E","B01001_005E","B01001_006E","B01001_007E","B01001_008E","B01001_009E","B01001_010E","B01001_011E","B01001_012E","B01001_013E","B01001_014E","B01001_015E","B01001_016E","B01001_017E","B01001_018E","B01001_019E","B01001_020E","B01001_021E","B01001_022E","B01001_023E","B01001_024E","us"],\n["9938937","10033518","10987313","6361859","4541794","2318283","2257008","6439169","11817829","11281470","10892040","10028675","10079567","10075795","10440265","4168435","5882735","3538792","4652319","6529918","4367764","2671396","1"]]'