In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Photon

import time


In [2]:
errors = []

In [3]:
def geocode_address(address, geolocator):
    """
    Geocodes a single address string to get latitude and longitude.

    Args:
        address (str): The address to geocode.
        geolocator (geopy.geocoders.Photon): The geolocator instance.

    Returns:
        tuple: A tuple containing (latitude, longitude), or (None, None) if not found.
    """
    if pd.isna(address):
        return None, None
    try:
        # We add a small delay to be respectful to the free API server
        time.sleep(0.1) 
        location = geolocator.geocode(address)
        if location:
            print(f"Correct{location.latitude, location.longitude} '{address} ': ", )
            return location.longitude, location.latitude
        else:
            return None, None
    except Exception as e:
        errors.append(address)
        print(f"Error geocoding '{address}': {e}")
        return None, None


In [4]:
def main():
    # --- Configuration ---
    input_filename = '../DATA/mp_new.csv'
    output_filename = '../DATA/mp_new_geolocations.csv'
    address_column_A = 'mf_address'
    address_column_B = 'tl_address'
    address_column_C = 'ha_address'
    # --- 1. Load the data ---
    
    df = pd.read_csv(input_filename)
    print(f"Successfully loaded '{input_filename}'.")

    # --- 2. Initialize the Geocoder ---
    # We use Nominatim, which is a free service based on OpenStreetMap data.
    # A custom user_agent is good practice.
    geolocator = Nominatim(user_agent="my-geocoder-app")

    # Use RateLimiter to avoid overwhelming the service and getting blocked.
    # This limits the geocoding to one address per second.
    geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
    
    print("Starting geocoding process... This may take a while for large files.")

    # --- 3. Geocode the address columns ---
    # Apply the geocoding function to column 'A'
    df['missing_from_latlong'] = df[address_column_A].apply(
        lambda addr: geocode_address(addr, geolocator)
    )
    print(f"Finished geocoding column '{address_column_A}'.")
    
    # Apply the geocoding function to column 'B'
    df['tl_latlong'] = df[address_column_B].apply(
        lambda addr: geocode_address(addr, geolocator)
    )
    
    df['home_latlong'] = df[address_column_C].apply(
        lambda addr: geocode_address(addr, geolocator)
    )
    print(f"Finished geocoding column '{address_column_B}'.")


    # --- 4. Save the results ---
    df.to_csv(output_filename, index=False)
    print(f"\nGeocoding complete!")
    print(f"Results saved to '{output_filename}'.")
    print("\nPreview of the final DataFrame:")
    print(df.head())


if __name__ == "__main__":
    # Before running, make sure you have the necessary libraries installed:
    # pip install pandas geopy
    main()


Successfully loaded '../DATA/mp_new.csv'.
Starting geocoding process... This may take a while for large files.
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road, Kilsyth, G65 9BE ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.9785333, -4.0673282) '23 Corrie Road Kilsyth Kilsyth G65 9NS ': 
Correct(55.860693, -4.212577) 'Whitehill Secondary School, 

In [5]:
errors

["East Glasgow Children's Home",
 "East Glasgow Children's Home",
 '8 Old School House Ln, Houston, Johnstone PA6 7JB',
 '8 Old School House Ln, Houston, Johnstone PA6 7JB',
 'On Number 30 Lothian Bus West Approach Road Edinburgh',
 "East Glasgow Children's Home",
 "East Glasgow Children's Home",
 '2a Ellis St, Airdrie ML6 6BU',
 "East Glasgow Children's Home",
 "East Glasgow Children's Home",
 "East Glasgow Children's Home",
 "Wishaw Children's Home",
 "East Glasgow Children's Home",
 "East Glasgow Children's Home",
 '30 Glasgow St, Glasgow G12 8JR',
 '5 Main St, Barrhead, Glasgow G78 1RE',
 '8 Old School House Ln, Houston, Johnstone PA6 7JB',
 '7 Irvine Dr, Paisley PA3 3TA',
 "East Glasgow Children's Home",
 "East Glasgow Children's Home"]

In [6]:
df_output_filename = pd.read_csv('../DATA/mp_new_geolocations.csv')
df_output_filename.rename(columns={'A_latlong': 'missing_from_latlong', 'B_latlong': 'tl_latlong'})
df_output_filename.to_csv('../DATA/mp_new_geolocations.csv', index=False)

In [7]:
df_output_filename = pd.read_csv('../DATA/mp_new_geolocations.csv')


In [8]:
df_output_filename

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,reportid,sno,label,misperid,initial_risk_level,current_final_risk_level,nominalpersionid,forenames,...,q_22_explanation,q_23,q_23_explanation,q_24,q_24_explanation,q_25,q_25_explanation,missing_from_latlong,tl_latlong,home_latlong
0,0.0,0.0,1240,,Adult,960,High,High,3671,Abigail,...,,0,,1,,0,,"(-4.0673282, 55.9785333)","(-4.0546078, 55.9791552)","(-4.0673282, 55.9785333)"
1,1.0,1.0,2361,,Child,960,High,Medium,3671,Abigail,...,,0,,0,,0,,"(None, None)","(-4.0813954, 55.9810406)","(-4.0673282, 55.9785333)"
2,2.0,2.0,4265,,Adult,960,Medium,Medium,3671,Abigail,...,,0,,0,,0,,"(-4.0673282, 55.9785333)","(None, None)","(-4.0673282, 55.9785333)"
3,3.0,3.0,5463,,Adult,960,High,Medium,3671,Abigail,...,,0,,1,,0,,"(-4.0673282, 55.9785333)","(None, None)","(-4.0673282, 55.9785333)"
4,4.0,4.0,6531,,Adult,960,High,High,3671,Abigail,...,,0,,1,,0,,"(-4.0673282, 55.9785333)","(None, None)","(-4.0673282, 55.9785333)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,,,38504,,Wanted/Absconder,20039,High,High,78847,Sophia,...,,0,,1,,0,,"(-3.4283068, 56.0820438)","(-3.4111303, 56.0811451)","(-3.4161094, 56.0756984)"
172,,,37688,,Wanted/Absconder,20039,High,High,78847,Sophia,...,,0,,1,,0,,"(-3.4283068, 56.0820438)","(-3.4285522, 56.0752937)","(-3.4161094, 56.0756984)"
173,,,89782,S65432/10R,Wanted/Absconder,38455,High,High,149833,Fraser,...,,0,,1,eiger,0,,"(-3.4135806, 56.4007574)","(None, None)","(-3.4369261, 56.391476)"
174,,,78187,S65432/10R,Adult,38455,High,High,149833,Fraser,...,,0,,1,,0,,"(-3.4135806, 56.4007574)","(-3.4412553, 56.3836772)","(-3.4369261, 56.391476)"
