### Description
Author: T. Majidzadeh

Date Created: March 4, 2025

Date Updated: March 4, 2025

Purpose: For each address, get the Nominatim address from OSM.

In [2]:
import getpass
import pandas as pd
import numpy as np
import re
import time
import os
from geopy.adapters import AioHTTPAdapter
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import requests

In [3]:
email = getpass.getpass("Enter your email.")
nominatim_service = Nominatim(
    user_agent=f"{email}"
)
geolocator = nominatim_service.geocode

Enter your email. ········


In [4]:
pm_data = pd.read_csv("..\\data\\pm_osm_addresses.csv").astype(str)

In [5]:
addresses = []
retry_attempts = 3  # Max retries before skipping

for i, (addr, raw_addr) in enumerate(zip(pm_data['osm_address'], pm_data['raw_address'])):
    print(f"Processing {i}: {addr}")
    
    success = False  # Track if we got a valid geocode result
    if addr == "nan": # Only query the known successes.
        addresses.append(None)
        continue
    
    for attempt in range(retry_attempts):
        try:
            geocode = geolocator(addr)
            if geocode:
                addresses.append(geocode)
                success = True
                break  # Stop retrying since we succeeded
            geocode = geolocator(raw_addr)
            if geocode:
                addresses.append(geocode)
                success = True
                break # Stop retrying since we succeeded
        except (GeocoderTimedOut, GeocoderServiceError, requests.exceptions.RequestException, Exception) as e:
            print(f"Error on attempt {attempt + 1}: {type(e).__name__} - {e}")
            time.sleep(1.1 ** attempt)  # Exponential backoff

    if not success:  # If everything fails, append an empty value
        print("Failed to geocode, using missing value")
        addresses.append(None)

    time.sleep(1.1)  # Respect Nominatim's rate limit

print("Geocoding completed.")


Processing 0: Onelife Fitness, 4000, Wisconsin Avenue Northwest, City Ridge, Ward 3, Washington, District of Columbia, 20016, United States
Processing 1: Ravel and Royale at Strathmore Hall, 10511, Strathmore Hall Street, Parkside, Pooks Hill, Montgomery County, Maryland, 20852, United States
Processing 2: Hauser Boulevard, Park La Brea, Fairfax, Los Angeles, Los Angeles County, California, 90036, United States
Processing 3: Flats 8300, 8300, Wisconsin Avenue, Wisconsin North, East Bethesda, Montgomery County, Maryland, 20814, United States
Processing 4: 901, East Phillips Lane, Township Residences, Centennial, Arapahoe County, Colorado, 80122, United States
Processing 5: Sumiao Hunan Kitchen, 270, Third Street, East Cambridge, Cambridge, Middlesex County, Massachusetts, 02142, United States
Processing 6: 3645, Habersham Road Northeast, Atlanta, Fulton County, Georgia, 30305, United States
Processing 7: Axiom Apartments, 33, Rogers Street, East Cambridge, Cambridge, Middlesex County, M

In [12]:
latitudes, longitudes = (
    [address.latitude if address else None for address in addresses],
    [address.longitude if address else None for address in addresses]
)
pm_data['osm_latitude'] = latitudes
pm_data['osm_longitude'] = longitudes
pm_data.to_csv('..\\data\\pm_osm_addresses_geocodes.csv')