In [None]:
import requests
import pandas as pd
import time
import random

In [None]:
def get_coordinates_from_osm(province):
    
    # Construct the URL for OSM Nominatim
    base_url = "https://nominatim.openstreetmap.org/search"
    
    # Parameters for the search
    params = {
        'q': f"{province}, Italia",
        'format': 'json',
        'limit': 1
    }
    
    # Headers to identify our application
    headers = {
        'User-Agent': 'ProvinceMapVisualization/1.0 (md3934@columbia.edu)',
        'Accept-Language': 'it-IT,it'
    }
    
    try:
        # Add random delay between 1-2 seconds
        time.sleep(1 + random.random())
        
        response = requests.get(base_url, params=params, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            if data:
                return float(data[0]['lat']), float(data[0]['lon'])
        return None
    except Exception as e:
        print(f"Error getting coordinates for {province}: {e}")
        return None

In [None]:
df = pd.read_csv('../data/calls_clean.csv')
df.head()

In [None]:
unique_provinces = df['provincia'].unique()

# Create a dictionary to store coordinates
coords_dict = {}

# Get coordinates for each province
print("Getting coordinates for provinces...")
for province in unique_provinces:
    if province not in coords_dict:
        print(f"Processing {province}...")
        coords = get_coordinates_from_osm(province)
        if coords:
            coords_dict[province] = coords
            print(f"Found coordinates for {province}: {coords}")
        else:
            print(f"Could not find coordinates for {province}")

In [None]:
# Add coordinates to DataFrame
df['latitude'] = df['provincia'].map(lambda x: coords_dict.get(x)[0] if coords_dict.get(x) else None)
df['longitude'] = df['provincia'].map(lambda x: coords_dict.get(x)[1] if coords_dict.get(x) else None)

df.to_csv('../data/calls_latlon.csv', index=False)

### Some corrections are needed


In [None]:
df = pd.read_csv('../data/calls_latlon.csv')

In [None]:
# Make corrections
corrections = {
    'missing': ('', ''),
    'ForlÃ¬-Cesena': (44.22238317778976, 12.040962409019011),
    "Valle d'Aosta/VallÃ©e d'Aoste": (45.733184284216605, 7.298274149320562),
    "Livorno": (43.54673711579884, 10.312234337127085), #For some reason the provincia of Livorno was being located in the Isola d'Elba
}

In [None]:
# Apply corrections
for province, (lat, lon) in corrections.items():
    mask = df['provincia'] == province
    df.loc[mask, 'latitude'] = lat
    df.loc[mask, 'longitude'] = lon

df.to_csv('calls_latlon.csv', index=False)

# Verify the changes
for province in corrections.keys():
    sample = df[df['provincia'] == province].iloc[0] if len(df[df['provincia'] == province]) > 0 else None
    if sample is not None:
        print(f"{province}: ({sample['latitude']}, {sample['longitude']})")
