In [1]:
# Dependencies
import googlemaps
import pandas as pd

In [None]:
gmaps = googlemaps.Client(key="SECRET")

In [None]:
bigfoot_df = pd.read_json('../data/filtered_years_clean.json')

In [None]:
def geocode_with_fallback(row):
    try:
        # Try nearest town, state
        location = gmaps.geocode(f"{row['nearest town']}, {row['state']}")
        if location:
            return location[0]['geometry']['location']['lat'], location[0]['geometry']['location']['lng']

        # Fallback to county, state
        location = gmaps.geocode(f"{row['county']}, {row['state']}")
        if location:
            return location[0]['geometry']['location']['lat'], location[0]['geometry']['location']['lng']

        # If neither works, return None
        return None, None
    except Exception as e:
        print(f"Error geocoding {row['nearest town']}, {row['state']} or {row['county']}, {row['state']}: {e}")
        return None, None

In [None]:
bigfoot_stub = bigfoot_df.iloc[:10]

In [None]:
bigfoot_df[['latitude', 'Longitude']] = bigfoot_df.apply(
    lambda row: pd.Series(geocode_with_fallback(row)), axis=1
)

In [None]:
bigfoot_df = bigfoot_df.drop(columns='Unnamed: 0')
bigfoot_df.columns

In [19]:
# Now we search for specific coordinates from the observed data
coordinate_pattern = r"\b((?:[0-8]?\d(?:\.\d+)?|90(?:\.0+)?)),\s*(-?(?:1[0-7]\d(?:\.\d+)?|0?\d{1,2}(?:\.\d+)?|180(?:\.0+)?))\b"

# Extract new latitude and longitude
extracted_coords = bigfoot_df['observed'].str.extract(coordinate_pattern, expand=True)
extracted_coords.columns = ['new_lat', 'new_long']

# Convert extracted values
extracted_coords = extracted_coords.astype(float)

# update lat and long if there are specified coords
bigfoot_df['latitude'] = bigfoot_df.apply(
    lambda row: extracted_coords.loc[row.name, 'new_lat'] if not pd.isna(extracted_coords.loc[row.name, 'new_lat']) else row['latitude'],
    axis=1
)

bigfoot_df['longitude'] = bigfoot_df.apply(
    lambda row: extracted_coords.loc[row.name, 'new_long'] if not pd.isna(extracted_coords.loc[row.name, 'new_long']) else row['longitude'],
    axis=1
)

# drop longitudes that would be outside of the USA / Canada
filtered_coords = bigfoot_df[(bigfoot_df['longitude'] >= -152) & (bigfoot_df['longitude'] <= -67)]

In [20]:
bigfoot_df.to_json('../data/bigfoot_coords_df.json', orient='records')


In [21]:
final_columns = ['report_number', 'report_class', 'state', 'county', 'latitude', 'longitude', 'season', 'month', 'observed']

clean_coords_list = filtered_coords[final_columns]
clean_coords_list.to_json('../data/bigfoot_coordinates_clean_cols.json', orient='records')