In [1]:
import requests
import pandas as pd
import time
import random

In [2]:
def get_coordinates_from_osm(province):
    
    # Construct the URL for OSM Nominatim
    base_url = "https://nominatim.openstreetmap.org/search"
    
    # Parameters for the search
    params = {
        'q': f"{province}, Italia",
        'format': 'json',
        'limit': 1
    }
    
    # Headers to identify our application
    headers = {
        'User-Agent': 'ProvinceMapVisualization/1.0 (md3934@columbia.edu)',
        'Accept-Language': 'it-IT,it'
    }
    
    try:
        # Add random delay between 1-2 seconds
        time.sleep(1 + random.random())
        
        response = requests.get(base_url, params=params, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            if data:
                return float(data[0]['lat']), float(data[0]['lon'])
        return None
    except Exception as e:
        print(f"Error getting coordinates for {province}: {e}")
        return None

In [3]:
df = pd.read_csv('../processed_data/calls_clean.csv')
df.head()

Unnamed: 0,year,week,date,provincia,calls
0,2024,2,2024-01-21,Agrigento,1
1,2024,3,2024-01-28,Agrigento,3
2,2024,4,2024-02-04,Agrigento,4
3,2024,5,2024-02-11,Agrigento,1
4,2024,6,2024-02-18,Agrigento,4


In [4]:
unique_provinces = df['provincia'].unique()

# Create a dictionary to store coordinates
coords_dict = {}

# Get coordinates for each province
print("Getting coordinates for provinces...")
for province in unique_provinces:
    if province not in coords_dict:
        print(f"Processing {province}...")
        coords = get_coordinates_from_osm(province)
        if coords:
            coords_dict[province] = coords
            print(f"Found coordinates for {province}: {coords}")
        else:
            print(f"Could not find coordinates for {province}")

Getting coordinates for provinces...
Processing Agrigento...
Found coordinates for Agrigento: (37.3122991, 13.57465)
Processing Alessandria...
Found coordinates for Alessandria: (44.83495335, 8.745030418605868)
Processing Ancona...
Found coordinates for Ancona: (43.4801189, 13.218727943815642)
Processing Arezzo...
Found coordinates for Arezzo: (43.51714415, 11.763928194732205)
Processing Ascoli Piceno...
Found coordinates for Ascoli Piceno: (42.8834204, 13.539593040686007)
Processing Asti...
Found coordinates for Asti: (44.826012649999996, 8.202686328987273)
Processing Avellino...
Found coordinates for Avellino: (40.9965446, 15.1405690365004)
Processing Bari...
Found coordinates for Bari: (41.1257843, 16.8620293)
Processing Barletta-Andria-Trani...
Found coordinates for Barletta-Andria-Trani: (41.180172, 16.1466408)
Processing Belluno...
Found coordinates for Belluno: (46.2805407, 12.078913722504204)
Processing Benevento...
Found coordinates for Benevento: (41.2476307, 14.7057053631465

In [5]:
# Add coordinates to DataFrame
df['latitude'] = df['provincia'].map(lambda x: coords_dict.get(x)[0] if coords_dict.get(x) else None)
df['longitude'] = df['provincia'].map(lambda x: coords_dict.get(x)[1] if coords_dict.get(x) else None)

df.to_csv('../processed_data/calls_latlon.csv', index=False)

### Some corrections are needed


In [6]:
df = pd.read_csv('../processed_data/calls_latlon.csv')

In [7]:
# Make corrections
corrections = {
    'missing': ('', ''),
    'ForlÃ¬-Cesena': (44.22238317778976, 12.040962409019011),
    "Valle d'Aosta/VallÃ©e d'Aoste": (45.733184284216605, 7.298274149320562),
    "Livorno": (43.54673711579884, 10.312234337127085), #For some reason the provincia of Livorno was being located in the Isola d'Elba
}

In [8]:
# Apply corrections
for province, (lat, lon) in corrections.items():
    mask = df['provincia'] == province
    df.loc[mask, 'latitude'] = lat
    df.loc[mask, 'longitude'] = lon

df.to_csv('../processed_data/calls_latlon.csv', index=False)

# Verify the changes
for province in corrections.keys():
    sample = df[df['provincia'] == province].iloc[0] if len(df[df['provincia'] == province]) > 0 else None
    if sample is not None:
        print(f"{province}: ({sample['latitude']}, {sample['longitude']})")


missing: (, )
Livorno: (43.54673711579884, 10.312234337127085)
