In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
import time
import openpyxl

In [2]:
INPUT_FILE = "FinalDataset.xlsx"
OUTPUT_FILE = "FinalDataset_Geocoded.csv"

In [3]:
geolocator = Nominatim(user_agent="real_estate_analysis_app") 

# --- Geocoding Function ---
def geocode_place(place):
    """
    Attempts to geocode a single place name.
    Includes a 1-second delay to respect Nominatim's rate limits.
    """
    # Clean the input string by stripping leading/trailing whitespace
    place = str(place).strip()
    
    # Introduce a delay to avoid hitting rate limits
    time.sleep(1) 
    
    try:
        # Use the geocoder to find the location
        location = geolocator.geocode(place, timeout=10)
        
        if location:
            print(f"Successfully geocoded: {place} -> ({location.latitude}, {location.longitude})")
            return (location.latitude, location.longitude)
        else:
            print(f"Could not find coordinates for: {place}")
            return (None, None)
            
    except GeocoderTimedOut:
        print(f"Geocoding timed out for: {place}. Retrying after 5 seconds...")
        time.sleep(5)
        # Attempt retry (optional, but robust)
        try:
            location = geolocator.geocode(place, timeout=20)
            if location:
                return (location.latitude, location.longitude)
            else:
                return (None, None)
        except Exception:
            return (None, None)
            
    except GeocoderServiceError as e:
        print(f"Service error for {place}: {e}")
        return (None, None)
    except Exception as e:
        print(f"An unexpected error occurred for {place}: {e}")
        return (None, None)

In [4]:
import os
os.chdir('/Users/nidamaryam/Desktop/Projects/PropertyTax')
os.getcwd()

'/Users/nidamaryam/Desktop/Projects/PropertyTax'

In [5]:
# --- Main Logic ---
def run_geocoding():
    """Loads data, geocodes places, and saves the new DataFrame."""
    try:
        # 1. Load the dataset
        df = pd.read_excel('FinalDataset.xlsx')
        print(f"Original data loaded successfully. Shape: {df.shape}")
        
        # 2. Clean 'Place' column and identify unique locations
        df['Place_Clean'] = df['Place'].astype(str).str.strip()
        unique_places = df['Place_Clean'].unique()
        print(f"Found {len(unique_places)} unique places to geocode.")
        
        # 3. Create a dictionary to store geocoded results (to avoid redundant API calls)
        geocoded_cache = {}
        
        # 4. Geocode each unique place
        for place in unique_places:
            lat, lon = geocode_place(place)
            geocoded_cache[place] = {'Latitude': lat, 'Longitude': lon}
        
        # 5. Convert cache to a DataFrame for merging
        geo_df = pd.DataFrame.from_dict(geocoded_cache, orient='index').reset_index()
        geo_df.columns = ['Place_Clean', 'Latitude', 'Longitude']
        
        # 6. Merge coordinates back into the original DataFrame
        final_df = pd.merge(df, geo_df, on='Place_Clean', how='left')
        
        # 7. Drop the temporary clean column and reorder columns for clarity
        final_df = final_df.drop(columns=['Place_Clean'])
        
        # 8. Save the final geocoded dataset
        final_df.to_csv(OUTPUT_FILE, index=False)
        print("\n" + "="*50)
        print(f"Geocoding complete! The new file is saved as '{OUTPUT_FILE}'.")
        print("Final DataFrame head with coordinates:")
        print(final_df[['Place', 'Latitude', 'Longitude']].head())
        print("="*50)

    except FileNotFoundError:
        print(f"Error: Input file '{INPUT_FILE}' not found. Please ensure the file is in the correct directory.")
    except Exception as e:
        print(f"An error occurred during the main process: {e}")

In [6]:
if __name__ == "__main__":
    run_geocoding()

Original data loaded successfully. Shape: (2734, 4)
Found 316 unique places to geocode.
Successfully geocoded: Devanahalli, Bangalore -> (13.2483502, 77.7134377)
Successfully geocoded: Vidyaranyapura, Bangalore -> (13.0766407, 77.5577315)
Successfully geocoded: Sarjapur, Bangalore -> (12.9116225, 77.6388622)
Successfully geocoded: Whitefield, Bangalore -> (12.9963995, 77.7614229)
Successfully geocoded: Rajarajeshwari Nagar, Bangalore -> (12.9274413, 77.5155224)
Successfully geocoded: Yelahanka, Bangalore -> (13.1006982, 77.5963454)
Successfully geocoded: Kanakapura Road, Bangalore -> (12.8848574, 77.5526906)
Successfully geocoded: Sarjapur Road, Bangalore -> (12.9243414, 77.6448784)
Successfully geocoded: Marathahalli, Bangalore -> (12.9552572, 77.6984163)
Successfully geocoded: Soukya Road, Whitefield, Bangalore -> (12.9943352, 77.7963018)
Successfully geocoded: Varthur, Bangalore -> (12.9406508, 77.746988)
Successfully geocoded: Hoskote, Bangalore -> (13.0318443, 77.7618779)
Successf