In [3]:
pip install pandas requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
import datetime
import pandas as pd
import requests

In [4]:
# our dataset
data = {"addresses": ['Newport Beach, California', 'New York City', 'London, England', 10001, 'Sydney, Au']}

In [6]:
# Calling DataFrame constructor on addresses list 
df = pd.DataFrame(data) 
df 

Unnamed: 0,addresses
0,"Newport Beach, California"
1,New York City
2,"London, England"
3,10001
4,"Sydney, Au"


In [8]:
# create a throw-away dataframe
df_throwaway = df.copy()

def apply_this_function(passed_row):
    passed_row['new_col'] = True
    passed_row['added'] = datetime.datetime.now()
    return passed_row

df_throwaway.apply(apply_this_function, axis=1) # axis=1 is important to use the row itself

Unnamed: 0,addresses,new_col,added
0,"Newport Beach, California",True,2024-11-07 09:54:12.924285
1,New York City,True,2024-11-07 09:54:12.928273
2,"London, England",True,2024-11-07 09:54:12.929271
3,10001,True,2024-11-07 09:54:12.930345
4,"Sydney, Au",True,2024-11-07 09:54:12.931344


In [10]:
# create another throw-away dataframe
df_example_2 = df.copy()

def apply_this_other_function(row):
    column_name = 'addresses'
    address_value = row[column_name]
    if isinstance(address_value, int):
        row[column_name] = address_value * 2
    return row

df_example_2.apply(apply_this_other_function, axis=1) # axis=1 is important to use the row itself

Unnamed: 0,addresses
0,"Newport Beach, California"
1,New York City
2,"London, England"
3,20002
4,"Sydney, Au"


In [15]:
# create a working example. I like using a copy of the source data in case we make mistakes
rest_api_df = df.copy()
GOOGLE_API_KEY = 'AIzaSyCo0hFmUGHF3IqSq1s3C-U7tfIg0FnAgJk' #get your API

def extract_lat_long_via_address(address_or_zipcode):
    lat, lng = None, None
    api_key = GOOGLE_API_KEY
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"
    endpoint = f"{base_url}?address={address_or_zipcode}&key={api_key}"
    # see how our endpoint includes our API key? Yes this is yet another reason to restrict the key
    r = requests.get(endpoint)
    if r.status_code not in range(200, 299):
        return None, None
    try:
        '''
        This try block incase any of our inputs are invalid. This is done instead
        of actually writing out handlers for all kinds of responses.
        '''
        results = r.json()['results'][0]
        lat = results['geometry']['location']['lat']
        lng = results['geometry']['location']['lng']
    except:
        pass
    return lat, lng
    
def enrich_with_geocoding_api(row):
    column_name = 'addresses'
    address_value = row[column_name]
    address_lat, address_lng = extract_lat_long_via_address(address_value)
    row['lat'] = address_lat
    row['lng'] = address_lng
    return row

rest_api_df.apply(enrich_with_geocoding_api, axis=1) # axis=1 is important to use the row itself

Unnamed: 0,addresses,lat,lng
0,"Newport Beach, California",,
1,New York City,,
2,"London, England",,
3,10001,,
4,"Sydney, Au",,
