In [3]:
from pathlib import Path
current_dir = Path.cwd()
parent_dir = current_dir.parent
school_data = parent_dir/'2_data_cleaning'/'cleaned_csv'/'Combined_schools_final.csv'
school_data_with_coords = parent_dir/'2_data_cleaning'/'cleaned_csv'/'Combined_schools_with_coords.csv'

In [None]:
import pandas as pd
import requests
from tqdm import tqdm

def get_coordinates_nominatim(school_name):
    """
    Fetch latitude and longitude for a given school name using Nominatim Geocoding API.

    Parameters:
        school_name (str): Name of the school to geocode.

    Returns:
        tuple: Latitude and Longitude as (lat, long). Returns (None, None) if an error occurs.
    """
    base_url = "https://nominatim.openstreetmap.org/search"
    params = {
        "q": school_name,
        "format": "json",
        "addressdetails": 1,
        "limit": 1
    }

    try:
        response = requests.get(base_url, params=params, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
        response.raise_for_status()
        data = response.json()
        if data:
            return float(data[0]["lat"]), float(data[0]["lon"])
    except Exception as e:
        print(f"Error fetching coordinates for '{school_name}': {e}")
    return None, None

def add_coordinates_to_df_nominatim(input_csv, output_csv):
    """
    Add latitude and longitude columns to a DataFrame based on school names using Nominatim.

    Parameters:
        input_csv (str): Path to the input CSV file with a column named 'school_name'.
        output_csv (str): Path to save the updated CSV file.

    Returns:
        pd.DataFrame: The updated DataFrame with latitude and longitude columns added.
    """
    # Load the input CSV
    df = pd.read_csv(input_csv)

    # Add 'lat' and 'long' columns
    print("Fetching coordinates...")
    tqdm.pandas()  # Enable progress bar
    df[['lat', 'long']] = df['School Name'].progress_apply(
        lambda name: pd.Series(get_coordinates_nominatim(name))
    )

    # Save the updated DataFrame to CSV
    df.to_csv(output_csv, index=False)
    print(f"Updated DataFrame saved to {output_csv}")

    return df

# Example usage
input_csv = school_data
output_csv = school_data_with_coords

updated_df = add_coordinates_to_df_nominatim(input_csv, output_csv)
print(updated_df.head())


Fetching coordinates...


KeyError: 'school_name'