In [1]:
import googlemaps, os
import pandas as pd
from dotenv import load_dotenv

## Google Maps Api Setup
Please install dotenv package and place your Google Maps API key in a .env file prior to using

In [10]:
# Load the API key from the .env file
load_dotenv()
GMAPS_API_KEY = os.getenv("GMAPS_API_KEY")

In [11]:
gmaps = googlemaps.Client(key=GMAPS_API_KEY) # Create a Google Maps client

## Geocode Function
Reference: https://www.natasshaselvaraj.com/a-step-by-step-guide-on-geocoding-in-python/

In [15]:
def geocode(location):
    try:
        coordinates = gmaps.geocode(location)
        lat = coordinates[0]["geometry"]["location"]["lat"]
        lng = coordinates[0]["geometry"]["location"]["lng"]
        return (lat, lng)
    except:
        return None

## Load CSV file

In [13]:
PATH = './data/cleaned_ufo_data/data_cleaned.csv'
df = pd.read_csv(PATH)
df

Unnamed: 0,DateTime,City,State,Country,location
0,2021-09-30 22:50:00,Ocala,FL,USA,"Ocala,FL,USA"
1,2021-09-30 22:49:00,Atlanta,GA,USA,"Atlanta,GA,USA"
2,2021-09-30 21:45:00,Lakeland,GA,USA,"Lakeland,GA,USA"
3,2021-09-30 21:25:00,Grand Haven,MI,USA,"Grand Haven,MI,USA"
4,2021-09-30 20:59:00,Lewis Center,OH,USA,"Lewis Center,OH,USA"
...,...,...,...,...,...
14304,2021-02-01 18:50:00,Frisco,TX,USA,"Frisco,TX,USA"
14305,2021-02-01 17:02:00,Jonesboro,AR,USA,"Jonesboro,AR,USA"
14306,2021-02-01 15:36:00,Springfield,MO,USA,"Springfield,MO,USA"
14307,2021-02-01 15:25:00,Elizabeth,CO,USA,"Elizabeth,CO,USA"


## Geocoding

In [16]:
df['geocode'] = df['location'].apply(geocode)

## Adding long & lat column, and removing invalid rows

In [103]:
df['lat'] = df['geocode'].apply(lambda x: x[0] if x else None)
df['long'] = df['geocode'].apply(lambda x: x[1] if x else None)

In [105]:
df

Unnamed: 0,DateTime,City,State,Country,location,geocode,lat,long
0,2021-09-30 22:50:00,Ocala,FL,USA,"Ocala,FL,USA","(29.1871986, -82.14009229999999)",29.187199,-82.140092
1,2021-09-30 22:49:00,Atlanta,GA,USA,"Atlanta,GA,USA","(33.748752, -84.38768449999999)",33.748752,-84.387684
2,2021-09-30 21:45:00,Lakeland,GA,USA,"Lakeland,GA,USA","(31.0410373, -83.0751471)",31.041037,-83.075147
3,2021-09-30 21:25:00,Grand Haven,MI,USA,"Grand Haven,MI,USA","(43.0630734, -86.22838639999999)",43.063073,-86.228386
4,2021-09-30 20:59:00,Lewis Center,OH,USA,"Lewis Center,OH,USA","(40.1983884, -83.0100987)",40.198388,-83.010099
...,...,...,...,...,...,...,...,...
14304,2021-02-01 18:50:00,Frisco,TX,USA,"Frisco,TX,USA","(33.1506744, -96.82361159999999)",33.150674,-96.823612
14305,2021-02-01 17:02:00,Jonesboro,AR,USA,"Jonesboro,AR,USA","(35.835696, -90.7051346)",35.835696,-90.705135
14306,2021-02-01 15:36:00,Springfield,MO,USA,"Springfield,MO,USA","(37.2089572, -93.29229889999999)",37.208957,-93.292299
14307,2021-02-01 15:25:00,Elizabeth,CO,USA,"Elizabeth,CO,USA","(39.3602662, -104.596915)",39.360266,-104.596915


In [107]:
OUTPUT_PATH = './data/cleaned_ufo_data/data_geocoded.csv'
df.to_csv(OUTPUT_PATH, index=False)