
### Pre Requistes for the module 

* install the `census` module before getting started. To do this, run the following command from the command line: 
    * **`pip install census`**
    
* install the `states`  module before getting started. To do this, run the following command from the command line: 
   * **`pip install states`**

* update the config file (located in src_files folder) with api key


### Documentation
* [Documentation for the subject tables in ACS1 for i year surveys] https://api.census.gov/data/2018/acs/acs1/subject/
* [Census API Docs](https://www.census.gov/data/developers/data-sets.html)

##  import dependencies 





In [3]:
import pandas as pd
from census import Census #<-- Python wrapper for census API
from us import states
import requests
import os
from pprint import pprint


## create session with Census API

* import api key value 
* Create a session for Census api

In [4]:

# Census API Key
from config import api_key

# provide the api key to establish a session 
c = Census(api_key)

# Set an option to allow up to 300 characters to print in each column
pd.set_option('max_colwidth', 300)

## retrieve the household income statistics for Texas from Census API


In [5]:
 
# columns required from the income api
# the variables for income subject table can be viewed in this link :- https://api.census.gov/data/2018/acs/acs1/subject/variables.json
incomeAPIColumnName = ['NAME','S1901_C01_001E', 'S1901_C01_002E','S1901_C01_003E', 'S1901_C01_004E','S1901_C01_005E', 'S1901_C01_006E','S1901_C01_007E','S1901_C01_008E','S1901_C01_009E','S1901_C01_010E','S1901_C01_011E','S1901_C01_012E','S1901_C01_013E']

#output table column Name
tableColumnName = ['CityName', 'HouseholdsEstimateTotal', 'HouseholdsEstimate Less than $10,000', 'HouseholdsEstimate $10,000 to $14,999','HouseholdsEstimate $15,000 to $24,999','HouseholdsEstimate $25,000 to $34,999','HouseholdsEstimate $35,000 to $49,999','HouseholdsEstimate $50,000 to $74,999','HouseholdsEstimate $75,000 to $99,999','HouseholdsEstimate $100,000 to $149,999','HouseholdsEstimate $150,000 to $199,999','HouseholdsEstimate $200,000 or more','HouseholdsEstimateMedian income (dollars)','HouseholdsEstimateMean income (dollars)', 'State', 'Geo_ID']

#years data needs to be retrieved
years = ['2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']

# create the data frame for storing the api responses
incomeCensusDF = pd.DataFrame()

#***********build queryParameter *************

queryParameter = ""
for key in incomeAPIColumnName:
    queryParameter = queryParameter + key + ","

#remove last comma
queryParameter = queryParameter[:-1]

#***********complete build queryParameter *************



#*************** build gegraphy parameters *************

# the geography parameter for the state is (48) for texas. The dataset for the state can be viewed here :- https://api.census.gov/data/2016/acs/acs5/profile?get=NAME&for=state:*
#the geography parameter for cities is quieried by place place:* indicates all the cities for the state should be retrieved :
# the link for viewing the places  :- https://api.census.gov/data/2016/acs/acs5/profile?get=NAME&for=place:*&in=state:48

geoTexasCityParameter = '&for=place:*&in=state:48'
geoUSParameter = '&for=us:*'
friendswoodZipCode = '&for=zip code tabulation area:77546'
#*************** complete build geogrpahy parameters *******************


# ************** iterate over the years and retrieve the information from the ACSI Income Survery API *****************
for year in years:
    
    try : 
        
        #build url to pull national averages
        usaUrl = f"https://api.census.gov/data/{year}/acs/acs1/subject?get={queryParameter}{geoUSParameter}"
        
        # call the api and convert the response to json
        response = requests.get(usaUrl)
        data = response.json()
        
        # create a new dataframe with the response values
        df = pd.DataFrame(data[1:])
        
        # assign default code for usa for future merge
        df[15] = '0001'
        
    except : 
        print(f"Error While parsing the US data for the year {year}")    
       
        
    try : 
        
        # build url to pull city values in texas. 
        placeUrl = f"https://api.census.gov/data/{year}/acs/acs1/subject?get={queryParameter}{geoTexasCityParameter}"
        
        # call the api and convert the response to json
        response = requests.get(placeUrl).json()
     
        # add the response values to the new data frame
        df = df.append(response[1:])
       
        # update year
        df['Year'] = year
        
        # add the data frame with income api responses to the main DataFrame
        incomeCensusDF = incomeCensusDF.append(df)
        
    except Exception as ex:
        print(f"Error While parsing the regional data for the year {ex}")
        
          # update year
        df['Year'] = year
        
        # add the data frame with income api responses to the main DataFrame
        incomeCensusDF = incomeCensusDF.append(df)

        
        
        
#*************** completed retrieving data from the income subject table in the census api *******************


# rename the columns and reorder the column values

incomeCensusDF = incomeCensusDF.rename(columns = {0 : tableColumnName[0],
                                                  1 : tableColumnName[1],
                                                  2 : tableColumnName[2],
                                                  3 : tableColumnName[3],
                                                  4 : tableColumnName[4],
                                                  5 : tableColumnName[5],
                                                  6 : tableColumnName[6],
                                                  7 : tableColumnName[7],
                                                  8 : tableColumnName[8],
                                                  9 : tableColumnName[9],
                                                  10 : tableColumnName[10],
                                                  11 : tableColumnName[11],
                                                  12 : tableColumnName[12],
                                                  13 : tableColumnName[13],
                                                  14 : tableColumnName[14],
                                                  15 : tableColumnName[15]                                                      
                                                     }) 
newTableColumnName = ['Geo_ID', 'CityName', 'Year' ,'HouseholdsEstimateTotal', 'HouseholdsEstimate Less than $10,000', 'HouseholdsEstimate $10,000 to $14,999','HouseholdsEstimate $15,000 to $24,999','HouseholdsEstimate $25,000 to $34,999','HouseholdsEstimate $35,000 to $49,999','HouseholdsEstimate $50,000 to $74,999','HouseholdsEstimate $75,000 to $99,999','HouseholdsEstimate $100,000 to $149,999','HouseholdsEstimate $150,000 to $199,999','HouseholdsEstimate $200,000 or more','HouseholdsEstimateMedian income (dollars)','HouseholdsEstimateMean income (dollars)', 'State']

incomeCensusDF = incomeCensusDF.reindex(columns=newTableColumnName)

# update the column values for state from 48 to Texas   
incomeCensusDF['State'] = 'Texas'

incomeCensusDF.head()

## save the output to csv file    
    

In [4]:
# Specify the file to write to
output_path = os.path.join("..", "Resources", "incomeCensus.csv")

#save the file
incomeCensusDF.to_csv(output_path,  index=False)
 