In [None]:
## Now we want to find remaining zipcodes and states that are missing. Lets see if we can find locations using open street map.
def lookup_missing_us_states_and_zips(df):
    """
    takes a dataframe with "iso2c", "us.state", and us.zipcode columns and uses OSM to replace missing values. .
    Uses the index of the dataframe as the search term
    It returns an updated dataframe with replaced nan values when possible.
    """
    import re
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="openstreetmap")

    state_missing_df = df[ (df["iso2c"] == "US") & (df["us.state"].isnull())].copy(False) #Note that the zoos with a missing US state are also those with a missing zipcode
    zipcodes_added = 0
    us_states_added = 0
    for zoo in state_missing_df.index:
        location = geolocator.geocode(zoo)
        
        if location:
            
            *other_info, country = location.address.split(', ')  # other_info contains all the first parts of the address and the string following the last comma is stored in country
            
            if bool(re.match(r"[0-9]{5}",other_info[-1])): 
                zipcodes_added += 1
                df.loc[zoo, "zipcode"] = other_info[-1][0:5]
            
                #print("Zipcode =" , other_info[-1])
            
            state = state_to_abbrev.get(other_info[-2])
            if state:
                us_states_added += 1
                df.loc[zoo, "us.state"] == state
                
                #print("State =" , state)

    print(us_states_added, "US states found.")
    print(zipcodes_added, "zipcodes found.")
                    
    output_df = df
    print("There are", len(singledf[(singledf["us.state"].isnull()) & (singledf["iso2c"] == "US")]), "zoos that still have missing US state values.")
    print("There are", len(singledf[(singledf["us.zipcode"].isnull()) & (singledf["iso2c"] == "US")]), "US zoos that still have missing zipcodes.")
                    
    return output_df


#singledf = lookup_missing_us_states_and_zips(singledf)

In [None]:
def get_zipcode_level_census_info(zipcodes, indicator, indicator_common_name, censuskey):
    
    """Generates a table indexed by zipcode for a given census variable. The set of indicators 
    are available at https://api.census.gov/data/2018/acs/acs5/profile/variables.html
    
    Inputs are the set of zipcodes, the indicator name, and the common name for the variable which will become the column name. 
    Also requires a census API key as an input
    """
    
    c = Census(censuskey)
    indicator_to_common_name = {indicator: indicator_common_name}
    zip_info = pd.DataFrame()
    for zipcode in zipcodes:
        zipcode = str(zipcode)
        if len(zipcode) <= 3:
            print("A zipcode input is 3 characters or less. Function not built to accomodate.")
        if len(zipcode) == 4:
            zipcode = "0" + zipcode
        if len(zipcode) == 5:
            #print(zipcode)
            query_result = c.acs5.zipcode(indicator, zipcode)
            if query_result:
                zip_info = zip_info.append(query_result)

    zip_info = zip_info.rename(columns=indicator_to_common_name)

    zip_info = zip_info.set_index("zip code tabulation area")
    
    return zip_info

In [None]:
def get_state_level_census_info(indicator, indicator_common_name, censuskey):
    
    """Generates a state level table indexed by state abbreviation for a given census variable. The set of indicators 
    are available at https://api.census.gov/data/2018/acs/acs5/profile/variables.html
    
    Inputs are the indicator name and the common name for the variable which will become the column name. 
    Also requires a census API key as an input
    """
    
    c = Census(censuskey)
    indicator_to_common_name = {indicator: indicator_common_name}
    state_info = pd.DataFrame()
    for state in states.STATES:
        state_info = state_info.append(c.acs5.get(('NAME', indicator),
              {'for': 'state:{}'.format(state.fips)}))

    state_info = state_info.drop("state", axis=1).rename(columns={"NAME":"State"}).rename(columns=indicator_to_common_name)

    state_info["State"] = state_info["State"].map(state_to_abbrev)
    state_info = state_info.set_index("State")
    
    return state_info