# Geolocating Using GeoPy

In order to map locations in R, the longitude and latitude must be recorded.

In the Detroit Open Portal, longitude and latitude of the shooting locations are mentioned, hence these specific locations are already taken car of. On the other hand, the hospital locations and zipcode areas only consist of the adress. 

To geolocate these, Python has a build in Library called GeoPy that makes an API call to Nominatim API which uses OpenStreetMap database to locate various locations. 

In [3]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import time


print("Essential Libraries ✅")


Essential Libraries ✅


In [None]:
print("Zipcode Areas Dataset")
try:
    # This file has a header, so pandas will read the columns correctly.
    df_zipcodeAreas = pd.read_csv("Datasets/zipcode_Areas.csv")
    print("✅ Loaded zipcode_Areas.csv")
    print("Original Data:")
    print(df_zipcodeAreas.head())
except FileNotFoundError:
    print("⚠️ ERROR: Make sure 'zipcode_Areas.csv' is in your 'Datasets' folder!")


print("\n--- Initializing Geocoder ---")

# We give the server 20 seconds to respond before timing out.
geolocator = Nominatim(user_agent="wayne_county_gis_project_v4", timeout=20)

# This adds a 1-second delay and retries automatically on network errors.
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1, error_wait_seconds=10, max_retries=3, swallow_exceptions=False)
print("✅ Geocoder Initialized")


def geocode_zipcode(zipcode):
    """Geocodes a 5-digit zip code with error handling."""
    if pd.isna(zipcode) or str(zipcode).strip() == "":
        return None, None
    try:
        # For zip codes, we just use the number as the query.
        query = str(int(zipcode)) # Ensure it's a clean integer string
        location_data = geocode(query)
        if location_data:
            print(f"  Successfully geocoded: {query}")
            return location_data.latitude, location_data.longitude
        else:
            print(f"  Failed to geocode (not found): {query}")
            return None, None
    except GeocoderUnavailable:
        print(f"  Network Error: Geocoder unavailable for {query}. Retrying...")
        return None, None
    except Exception as e:
        print(f"  An unexpected error occurred for zip code '{zipcode}': {e}")
        return None, None

print("\n--- Starting Geocoding Process (this may take a minute) ---")

df_zipcodes_geocoded = df_zipcodeAreas.copy()

df_zipcodes_geocoded[['latitude', 'longitude']] = pd.DataFrame(
    df_zipcodes_geocoded['Zip Code'].apply(geocode_zipcode).tolist(),
    index=df_zipcodes_geocoded.index
)

print("\n✅ Final Geocoded Zip Code Data:")
# Display the first few rows with the new columns
print(df_zipcodes_geocoded[['Zip Code', 'Shape Area', 'latitude', 'longitude']].head())

# Export the final DataFrame to a new CSV file
output_filename = "zipcode_Areas_geocoded.csv"
df_zipcodes_geocoded.to_csv(output_filename, index=False)

print(f"\n✅ DataFrame successfully saved to '{output_filename}'")

In [13]:
print("Additional Hospital Locations")
try:
    # This file has a header, so pandas will read the columns correctly.
    df_additional_hospitals = pd.read_csv("Datasets/additional_hospital_locations.csv")
    print("✅ Loaded additional_hospital_locations.csv")
    print("Original Data:")
    print(df_additional_hospitals.head())
except FileNotFoundError:
    print("⚠️ ERROR: Make sure 'additional_hospital_locations.csv' is in your 'Datasets' folder!")

print("\n--- Initializing Geocoder ---")

# We give the server 20 seconds to respond before timing out.
geolocator = Nominatim(user_agent="wayne_county_gis_project_v5", timeout=20)

# This adds a 1-second delay and retries automatically on network errors.
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1, error_wait_seconds=10, max_retries=3, swallow_exceptions=False)
print("✅ Geocoder Initialized")


def geocode_location(address):
    """Geocodes a full address string with error handling."""
    if pd.isna(address) or str(address).strip() == "":
        return None, None
    try:
        location_data = geocode(address)
        if location_data:
            print(f"  Successfully geocoded: {address}")
            return location_data.latitude, location_data.longitude
        else:
            print(f"  Failed to geocode (not found): {address}")
            return None, None
    except GeocoderUnavailable:
        print(f"  Network Error: Geocoder unavailable for {address}. Retrying...")
        return None, None
    except Exception as e:
        print(f"  An unexpected error occurred for address '{address}': {e}")
        return None, None

print("\n--- Starting Geocoding Process (this may take a few moments) ---")

# Create a copy to work with
df_additional_hospitals_geocoded = df_additional_hospitals.copy()

# Create a 'full_address' column by combining the address parts.
# Even if the column exists from your file, we recreate it to ensure it's correct.
df_additional_hospitals_geocoded['full_address'] = df_additional_hospitals_geocoded['Street Address'] + ', ' + \
                                                   df_additional_hospitals_geocoded['City'] + ', ' + \
                                                   df_additional_hospitals_geocoded['State'] + ' ' + \
                                                   df_additional_hospitals_geocoded['Zip Code'].astype(str)

# Apply the geocoding function to the 'full_address' column
df_additional_hospitals_geocoded[['latitude', 'longitude']] = pd.DataFrame(
    df_additional_hospitals_geocoded['full_address'].apply(geocode_location).tolist(),
    index=df_additional_hospitals_geocoded.index
)

print("\n\n--- Geocoding Complete! ---")
print("\n✅ Final Geocoded Additional Hospital Data:")
# Display the first few rows with the new columns
print(df_additional_hospitals_geocoded[['Hospital Name', 'full_address', 'latitude', 'longitude']].head())

# Export the final DataFrame to a new CSV file
output_filename = "additional_hospital_locations_geocoded.csv"
df_additional_hospitals_geocoded.to_csv(output_filename, index=False)

print(f"\n✅ DataFrame successfully saved to '{output_filename}'")


--- Loading Additional Hospital Dataset ---
✅ Loaded additional_hospital_locations.csv
Original Data:
      CCN                                Hospital Name      Street Address  \
0  NEW001                       Karmanos Cancer Center      4100 John R St   
1  NEW002                         Kresge Eye Institute      4717 John R St   
2  NEW003  Select Specialty Hospital-Northwest Detroit     6071 W Outer Dr   
3  NEW004                 Henry Ford St. John Hospital     22101 Moross Rd   
4  NEW005          Select Specialty Hospital-Downriver  10000 Telegraph Rd   

      City State  Zip Code  Telephone Number Hospital Type  \
0  DETROIT    MI     48201               NaN   Specialized   
1  DETROIT    MI     48201               NaN   Specialized   
2  DETROIT    MI     48235               NaN     Long Term   
3  DETROIT    MI     48236               NaN    Short Term   
4   TAYLOR    MI     48180               NaN     Long Term   

   Effective Date of Certification                     