<a href="https://colab.research.google.com/github/nxumalo210/GIS_Macy-s_Landfill_Project/blob/main/Geocoding_NYS_Landfill_Addresses.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Adding longitude and latitude features to my Macy's Store Location CSV file

import os
import numpy as np
import matplotlib as mlp
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
from google.colab import files


In [None]:
uploaded= files.upload()

df = pd.read_csv(list(uploaded.keys())[0])

print(df.head())

Saving NYS_Active_Landfills.csv to NYS_Active_Landfills (5).csv
   OBJECTID  Region                              Facility Name  \
0        49       1  110 Sand Company Clean Fill Disposal Site   
1        50       1          Blydenburgh Road Landfill Complex   
2        51       1       Brookhaven Waste Management Facility   
3        52       1          Port Jefferson Village Clean Fill   
4        53       4                         Albany (City) SWMF   

        Location Addrss            City State  ZIP Code  \
0    136 Spagnolli Road        Melville    NY   11747.0   
1  440 Blydenburgh Road       Hauppauge    NY   11787.0   
2   350 Horseblock Road         Yaphank    NY   11719.0   
3    Old Homestead Road  Port Jefferson    NY   11777.0   
4         525 Rapp Road          Albany    NY   12205.0   

                   Facility Owner Activity Description  Activity Number  \
0            Broad Hollow Estates  Long Island Landfill        52LR0351   
1  Islip Resource Recovery Agency 

In [None]:
print(df.columns)

Index(['OBJECTID', 'Region', 'Facility Name', 'Location Addrss', 'City',
       'State', 'ZIP Code', 'Facility Owner', 'Activity Description ',
       'Activity Number', 'EAST_COORDINATE', 'NORTH_COORDINATE',
       'Regulatory Status ', 'AUTHORIZATION_NUMBER',
       'AUTHORIZATION_ISSUE_DATE', 'EXPIRATION_DATE', 'Prior Year Reports ',
       'CURRENT_ANNUAL_REPORT', 'x', 'y'],
      dtype='object')


In [None]:
#First, to convert the zipcode columns from floats to integers

df["ZIP Code"] = df["ZIP Code"].astype('Int64')

In [None]:
#df["Full Address"] = pd.concat(df["Location Addrss"], " ,",df["City"], " ,", df["State"], df["ZIP Code"])

df["Full Address"] = df["Location Addrss"].astype(str) + ", " + df["City"].astype(str) + ", " + df["State"].astype(str) + ", " + df["ZIP Code"].astype(str)


In [None]:
#Change column names from x, y to Longitude and Latitude

df = df.rename(columns = {"x":"Longitude", "y": "Latitude"})

print(df.head())

   OBJECTID  Region                              Facility Name  \
0        49       1  110 Sand Company Clean Fill Disposal Site   
1        50       1          Blydenburgh Road Landfill Complex   
2        51       1       Brookhaven Waste Management Facility   
3        52       1          Port Jefferson Village Clean Fill   
4        53       4                         Albany (City) SWMF   

        Location Addrss            City State  ZIP Code  \
0    136 Spagnolli Road        Melville    NY     11747   
1  440 Blydenburgh Road       Hauppauge    NY     11787   
2   350 Horseblock Road         Yaphank    NY     11719   
3    Old Homestead Road  Port Jefferson    NY     11777   
4         525 Rapp Road          Albany    NY     12205   

                   Facility Owner Activity Description  Activity Number  ...  \
0            Broad Hollow Estates  Long Island Landfill        52LR0351  ...   
1  Islip Resource Recovery Agency  Long Island Landfill            0409  ...   
2       

In [None]:
print(df.columns)

Index(['OBJECTID', 'Region', 'Facility Name', 'Location Addrss', 'City',
       'State', 'ZIP Code', 'Facility Owner', 'Activity Description ',
       'Activity Number', 'EAST_COORDINATE', 'NORTH_COORDINATE',
       'Regulatory Status ', 'AUTHORIZATION_NUMBER',
       'AUTHORIZATION_ISSUE_DATE', 'EXPIRATION_DATE', 'Prior Year Reports ',
       'CURRENT_ANNUAL_REPORT', 'Longitude', 'Latitude', 'Full Address'],
      dtype='object')


In [None]:
#Empty the longitude and latitude columns out bc that's where our geocoded coordinates will be going.

df["Longitude"] = ''
df["Latitude"] = ''

In [None]:
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
from geopy.extra.rate_limiter import RateLimiter
import time
import numpy as np

geolocator = Nominatim(user_agent = "nys_landfill_geocoder", timeout = 10)
geocode = RateLimiter(geolocator.geocode, min_delay_seconds = 1.5)

latitudes = []
longitudes = []

for index, row in df.iterrows():
  address = row["Full Address"]
  try:
    location = geocode(address)
    if location:
      print(f"Coordinates for '{address}': '{location.latitude}','{location.longitude}'")
      latitudes.append(location.latitude)
      longitudes.append(location.longitude)
    else:
      print(f"Address for '{address}' not found.")
      latitudes.append(np.nan)
      longitudes.append(np.nan)
  except GeocoderTimedOut:
    print(f"Geocoding service timed out for: {address}. Retrying...")
    latitudes.append(np.nan)
    longitudes.append(np.nan)
  except GeocoderServiceError as e:
    print(f"Geocoding service error for: {address}. Error: {e}")
    latitudes.append(np.nan)
    longitudes.append(np.nan)
  except Exception as e:
    print(f"An unexpected error occurred for: {address}. Error: {e}")
    latitudes.append(np.nan)
    longitudes.append(np.nan)
time.sleep(1)

df['Longitude'] = longitudes
df['Latitude'] = latitudes

print(df.head())


Address for '136 Spagnolli Road, Melville, NY, 11747' not found.
Coordinates for '440 Blydenburgh Road, Hauppauge, NY, 11787': '40.8140901','-73.1808861'
Address for '350 Horseblock Road, Yaphank, NY, 11719' not found.
Coordinates for 'Old Homestead Road, Port Jefferson, NY, 11777': '40.9581121','-73.0532991'
Coordinates for '525 Rapp Road, Albany, NY, 12205': '42.7033613','-73.8496348'
Coordinates for '4 Arrowhead Lane, Colonie, NY, 12047': '42.8018552','-73.7307096'
Address for '1916 State Route 9W, Coeymans, NY, 12143' not found.
Coordinates for '41155 State Highway 10, Delhi, NY, 13753': '42.2570758','-74.9582601'
Address for '32230 NYS Route 10, Walton, NY, 13856' not found.
Address for '32230 NYS Route 10, Walton, NY, 13856' not found.
Coordinates for '209 Partition Street Extension, Rensselaer, NY, 12144': '42.6442237','-73.729143'
Coordinates for '286 Sand Road, Morrisonville, NY, 12962': '44.690873','-73.5965655'
Coordinates for '74 Recycle Circle Lane, Lake Placid, NY, 12946'

In [None]:
output_file_path = '/content/NYS_Landfills_with_Coordinates.csv'

df.to_csv(output_file_path, index = False)

print(f"New Landfill File with Coordinates successfully saved to '{output_file_path}'")

New Landfill File with Coordinates successfully saved to '/content/NYS_Landfills_with_Coordinates.csv'
