In [1]:
# !pip install geocoder
# !pip install plotly-geo
# !pip3 install pickle5

In [2]:
import numpy as np
import pandas as pd
import requests
import urllib
import time
from datetime import datetime, timezone
import pytz
from tzwhere import tzwhere

In [3]:
def get_FIPS_api(lat, lon):
    try:
        params = urllib.parse.urlencode({"latitude": lat, "longitude": lon, "format": "json"})
        url = "https://geo.fcc.gov/api/census/block/find?" + params
        response = requests.get(url)
        data = response.json()
    except:
        print(f"The API probably blocked us.")
        return "ERROR", "ERROR"

    return data['County']['FIPS'], data['County']['name']

def get_FIPS(s):
    city = s.City
    state = s.State
    
    count_info = counties_df.loc[(counties_df.city==city)&(counties_df.state_id==state), ["county_name", "county_fips"]]
    
    if len(count_info) >= 1:
        return count_info.iloc[0].county_fips, count_info.iloc[0].county_name

    counter = 0
    while counter < 5:
        county_info = get_FIPS_api(s.Lat, s.Lon)
        if county_info[0] == "ERROR":
            time.sleep(60)
            continue

        if len(county_info) == 2:
            return county_info[0], county_info[1]

        counter += 1
    return None, None

def fill_spaces(fips):
    if len(fips) < 5:
        return str(fips).zfill(5)
    return fips

#correcting 1900s to 2000s issue with date column
def fix_date(x):
    if x.year > 2021:
        return datetime(x.year - 100, x.month, x.day, x.hour, x.minute, x.second, x.microsecond, tzinfo=timezone.utc)
    else:
        return x

In [4]:
%%script false --no-raise-error

file_name = "sightings_cleaned_us.pkl"
sightings_cleaned = pd.read_pickle(file_name)

counties_df = pd.read_csv("resources/uscities.csv")
counties_df = counties_df[["city", "state_id", "county_name", "county_fips", "population"]]

sightings_county = sightings_cleaned[["Lat", "Lon", "City", "State"]].apply(get_FIPS, axis=1, result_type='expand')
sightings_county.columns = ["FIPS", "County"]

sightings_cleaned = pd.concat([sightings_cleaned, sightings_county], axis='columns')

sightings_cleaned = sightings_cleaned.loc[:,~sightings_cleaned.columns.duplicated()]
sightings_cleaned["FIPS"] = sightings_cleaned.FIPS.astype(str)

sightings_cleaned["FIPS"] = sightings_cleaned["FIPS"].apply(fill_spaces)

sightings_merged = pd.merge(sightings_cleaned, sightings_add_UTC[["timezone_str", "Time_UTC"]], left_index=True, right_index=True)
sightings_merged.drop(sightings_merged[sightings_merged.Time_UTC.isnull()].index, axis=0, inplace=True)
sightings_cleaned = sightings_merged

sightings_cleaned["Time_UTC"] = sightings_cleaned["Time_UTC"].apply(fix_date)
sightings_cleaned["Time_UTC"] = pd.to_datetime(sightings_cleaned["Time_UTC"], utc=True)

sightings_cleaned.to_pickle("sightings_with_counties.pkl")