# Geocoding with Geopy
Loading the CSV File into a Pandas DataFrame and Geocoding with Geopy and Nominatim.
Nominatim is a geocoding service provided by OpenStreetMap. It converts addresses into geographic coordinates and vice versa.
Requests are limited to 1 per second, so we can use a sleep function to add a 1-second delay between each request, if a read timeout error occurs the script will retry 3 times before exiting pausing for 30 seconds between each retry.

In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderUnavailable
from requests.exceptions import ReadTimeout

def get_coordinates(city_or_county, state):
    """
    Returns the latitude and longitude coordinates of a city or county and its state using geopy.
    If the initial geocoding attempt fails, it retries with the "city or county" variable as a county.
    """
    address = f"{city_or_county}, {state}"
    geolocator = Nominatim(user_agent="script_geocoding with geopy")

    retries = 0
    while retries < 3:
        try:
            location = geolocator.geocode(address, timeout=10)
        except (GeocoderUnavailable, ReadTimeout):
            retries += 1
            print(f"Geocoding failed for {address}. Retrying in 10 seconds... ({retries}/3)")
            time.sleep(10)
        else:
            if location:
                return location.latitude, location.longitude
            else:
                # Retry with the "city or county" variable as a county
                address = f"{city_or_county} County, {state}"
                location = geolocator.geocode(address, timeout=10)
                if location:
                    return location.latitude, location.longitude
                else:
                    return None, None

    raise ReadTimeoutError("Exited due to ReadTimeOutError")

# Read the cities and counties from an Excel file
cities_df = pd.read_excel("Path/to/input.xlsx", engine='openpyxl')

# Add empty 'Latitude' and 'Longitude' columns to the dataframe
cities_df['Latitude'] = None
cities_df['Longitude'] = None

# Loop through the cities and counties, get their coordinates
for index, row in cities_df.iterrows():
    city_or_county = row['City Or County']
    state = row['State']
    lat, lng = get_coordinates(city_or_county, state)
    cities_df.at[index, 'Latitude'] = lat
    cities_df.at[index, 'Longitude'] = lng

# Write the updated DataFrame to a CSV file
cities_df.to_csv('Path/to/output.csv', index=False)


## Tryout - Using USZipCode library to get the county from a latitude and longitude

In [2]:
from uszipcode import SearchEngine

# Create a SearchEngine object
search = SearchEngine()

# Define the latitude and longitude
lat = 37.7749
lon = -122.4194

# Use the by_coordinates method to get the county
result = search.by_coordinates(lat=lat, lng=lon, returns=1)
county = result[0].county

# Print the county
print(county)

# Close the SearchEngine object
search.close()

San Francisco County


Using USZipCode library to get the county from a latitude and longitude

# Using the SearchEngine.by_coordinates() Method with a Retry Loop to retrieve the County

In [None]:
import csv
import time
from uszipcode import SearchEngine
from geopy.exc import GeocoderUnavailable
from requests.exceptions import ReadTimeoutError

# Set the file paths
input_file = r"Path\to\input\file.csv"
output_file = "Path\to\output\file.csv"

# Create a SearchEngine object
search = SearchEngine()

# Create a function to get the county from a latitude and longitude
def get_county(lat, lon):
    retries = 0
    while retries < 3:
        try:
            result = search.by_coordinates(lat=lat, lng=lon, returns=1)
            county = result[0].county
            return county
        except (GeocoderUnavailable, ReadTimeoutError):
            print(f"Attempt {retries+1}: Failed to get county. Retrying after 10 seconds.")
            time.sleep(10)
            retries += 1
    raise Exception("Exited due to ReadTimeOutError")

# Open the CSV file
with open(input_file, 'r') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=';')

    # Create a list to hold the updated rows
    updated_rows = []

    # Loop through each row in the CSV file
    for i, row in enumerate(reader):
        if i >= 5:
            break
        # Get the latitude and longitude from the row
        lat = row['Latitude']
        lon = row['Longitude']

        # Get the county from the latitude and longitude
        county = get_county(lat, lon)

        # Add the county to the row
        row['county'] = county

        # Add the updated row to the list
        updated_rows.append(row)

# Write the updated rows to the original file with the new "county" column
with open(output_file, 'w', newline='') as csvfile:
    fieldnames = updated_rows[0].keys()
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=';')

    writer.writeheader()
    for row in updated_rows:
        writer.writerow(row)

# Close the SearchEngine object
search.close()

