In [None]:
import pandas as pd
import time
import requests
import json

In [None]:
INPUT_CSV = "data/mayhem/mayhem_dates.csv"
OUTPUT_CSV = "data/mayhem/mayhem_dates_geocoded.csv"
OUTPUT_GEOJSON = "data/mayhem/mayhem_dates_points.geojson"

USER_AGENT = ""  

In [None]:
def geocode(city, country):
    """Return (lat, lon) for a city/country combo using Nominatim."""
    query = f"{city}, {country}"
    url = "https://nominatim.openstreetmap.org/search"
    params = {
        "q": query,
        "format": "json",
        "limit": 1
    }
    headers = {"User-Agent": USER_AGENT}

    try:
        r = requests.get(url, params=params, headers=headers)
        r.raise_for_status()
        data = r.json()
        if len(data) == 0:
            return None, None
        return float(data[0]["lat"]), float(data[0]["lon"])
    except Exception as e:
        print("Error geocoding", query, "=>", e)
        return None, None


In [None]:
df = pd.read_csv(INPUT_CSV)

lats = []
lons = []

print("Starting geocoding… ")
for idx, row in df.iterrows():
    city = row['city']
    country = row['country']
    lat, lon = geocode(city, country)
    lats.append(lat)
    lons.append(lon)

    print(f"{city}, {country} → {lat}, {lon}")

    time.sleep(1)  # polite pause, keeps Nominatim happy

df['lat'] = lats
df['lon'] = lons

# save CSV
df.to_csv(OUTPUT_CSV, index=False)
print("Saved:", OUTPUT_CSV)


In [None]:
features = []
for _, row in df.iterrows():
    if pd.notnull(row['lat']) and pd.notnull(row['lon']):
        feat = {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [row['lon'], row['lat']]
            },
            "properties": {
                "date": row['date'],
                "city": row['city'],
                "country": row['country'],
                "venue": row['venue']
            }
        }
        features.append(feat)

geojson = {
    "type": "FeatureCollection",
    "features": features
}

with open(OUTPUT_GEOJSON, "w", encoding="utf8") as f:
    json.dump(geojson, f, indent=2)

print("Saved:", OUTPUT_GEOJSON)
print("Geocoding done. Slay.")
