## Use Google and Geopy to add Lat and Long

In [None]:
#!pip install googlemaps

In [None]:
#!pip install geopy

In [1]:
import googlemaps
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import time
import pandas as pd

In [2]:
# Initialize Google Maps client and Nominatim geolocator
gmaps = googlemaps.Client(key='YOUR_KEY')
geolocator = Nominatim(user_agent="home_price_model")

In [9]:
# Load your CSV file
data_path = 'csv_File_Path'
data = pd.read_csv(data_path)
#data = data.head(20)

In [10]:
# Counter for tracking progress
global_counter = 0

# Define the geocoding function with fallback
def geocode_address(row):
    global global_counter  # Reference the global counter
    
    # Check if ZIP Code is NaN and handle it
    zip_code = str(int(row['ZIP Code'])) if pd.notnull(row['ZIP Code']) else ''
    
    # Create the full address string
    full_address = f"{row['street']}, {row['citi']}, {zip_code}"
    
    # Attempt Google Maps geocoding
    try:
        geocode_result = gmaps.geocode(full_address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            result = (location['lat'], location['lng'])
        else:
            result = (None, None)
    except Exception as e:
        print(f"Google Maps error for address {full_address}: {e}")
        result = None

    # If Google Maps fails, attempt Nominatim geocoding
    if result is None:
        try:
            address_components = [
                row['street'],
                row['citi'],
                zip_code  # Use the processed zip code here
            ]
            fallback_address = ', '.join([comp for comp in address_components if comp])
            
            location = geolocator.geocode(fallback_address)
            if location:
                result = (location.latitude, location.longitude)
            else:
                result = (None, None)
        except GeocoderTimedOut:
            time.sleep(1)
            return geocode_address(row)  # Retry on timeout
        except Exception as e:
            print(f"Nominatim error for address {full_address}: {e}")
            result = (None, None)
    
    # Update the global counter and print progress every 500 records
    global_counter += 1
    if global_counter % 500 == 0:
        print(f"{global_counter} records updated.")
    
    return result

# Apply the geocoding function
data[['latitude', 'longitude']] = data.apply(geocode_address, axis=1, result_type='expand')

500 records updated.
1000 records updated.
1500 records updated.
2000 records updated.
2500 records updated.
3000 records updated.
3500 records updated.
4000 records updated.
4500 records updated.
5000 records updated.
5500 records updated.
6000 records updated.
6500 records updated.
7000 records updated.
7500 records updated.
8000 records updated.
8500 records updated.
9000 records updated.
9500 records updated.
10000 records updated.
10500 records updated.
11000 records updated.
11500 records updated.
12000 records updated.
12500 records updated.
13000 records updated.
13500 records updated.
14000 records updated.
14500 records updated.
15000 records updated.


In [12]:
# Count records with nulls in either latitude or longitude
null_count = data['latitude'].isnull().sum() + data['longitude'].isnull().sum()

# Alternatively, count rows where both latitude and longitude are null
both_null_count = data[['latitude', 'longitude']].isnull().all(axis=1).sum()

print(f"Records with null latitude or longitude: {null_count}")
print(f"Records with both latitude and longitude null: {both_null_count}")


Records with null latitude or longitude: 4
Records with both latitude and longitude null: 2


In [13]:
csv_file_path = 'Your_Csv_path)
data.to_csv(csv_file_path, index=False)