In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import requests
from census import Census
from us import states
from datetime import datetime

In [2]:
# Use this function to retrieve content of a file
# # Mainly used for getting api keys from a local file
def get_file_contents(filename):
     #Given a filename,
       # return the contents of that file

    try:
        with open(filename, 'r') as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [3]:
census_api_key = get_file_contents('.census_api_key')

In [4]:
# Census API Key
c = Census(census_api_key)

In [5]:
# Retrieve census data for 2010 to 2017
years = list(range(2011, 2017))

for each in years:
    print('--------%s--------' % each)
    
    c = Census(census_api_key, year=each)
    
    # Get data using API
    print('Retrieving Data: %s' % datetime.now())
    census_data = c.acs5.get(('B01003_001E', 'B19301_001E', 'B19013_001E', 'B17001_002E'), 
                         {'for': 'zip code tabulation area:*'})

    print('Making DataFrame: %s' % datetime.now())    
    # Create and clean dataframe
    census_df = pd.DataFrame(census_data)
    census_df = census_df.rename(columns={'zip code tabulation area': 'zipcode',
                                          'B19301_001E': 'per_capita_income',
                                          'B19013_001E': 'household_income',
                                          'B01003_001E': 'population', 
                                          'B17001_002E': 'poverty_count',})
    
    # Keep only the rows with SF zipcodes
    sf_zipcodes = [str(x) for x in range(94102, 94189)]

    census_df = census_df[census_df.zipcode.isin(sf_zipcodes)]
    census_df.reset_index(drop=True, inplace=True)
    
    # Add in Poverty Rate (Poverty Count / Population)
    census_df['poverty_rate'] = 100 * census_df['poverty_count'].astype(int) / census_df['population'].astype(int)
    
    # Reformat zipcode to string
    census_df['zipcode'] = census_df.zipcode.apply(str)
    
    print('Exporting: %s' % datetime.now())
    # Export file for use
    census_df.to_csv('data/census-%s.csv' % each, index=False)

--------2011--------
Retrieving Data: 2018-01-17 18:48:07.410747
Making DataFrame: 2018-01-17 18:48:11.971226
Exporting: 2018-01-17 18:48:12.059433
--------2012--------
Retrieving Data: 2018-01-17 18:48:12.061439
Making DataFrame: 2018-01-17 18:48:28.978471
Exporting: 2018-01-17 18:48:29.053667
--------2013--------
Retrieving Data: 2018-01-17 18:48:29.054667
Making DataFrame: 2018-01-17 18:48:33.824359
Exporting: 2018-01-17 18:48:33.901558
--------2014--------
Retrieving Data: 2018-01-17 18:48:33.908542
Making DataFrame: 2018-01-17 18:48:49.088338
Exporting: 2018-01-17 18:48:49.164540
--------2015--------
Retrieving Data: 2018-01-17 18:48:49.165550
Making DataFrame: 2018-01-17 18:49:07.473203
Exporting: 2018-01-17 18:49:07.549443
--------2016--------
Retrieving Data: 2018-01-17 18:49:07.552414
Making DataFrame: 2018-01-17 18:49:22.093086
Exporting: 2018-01-17 18:49:22.170326


In [6]:
census_df

NameError: name 'census_dfq' is not defined