# TomTom fetcher (notebook)

Use this notebook to geocode a place, fetch TomTom flow samples over a small grid,
create a GeoDataFrame (optional), and save the raw JSON/GeoJSON outputs into `data/tomtom/<region>/`.

Install prerequisites if needed:
```
pip install pandas requests
# optional: pip install geopandas
```

In [16]:
# Import and workspace setup
import sys
from pathlib import Path
import os

# Ensure project root is on sys.path (adjust if your notebook is placed elsewhere)
project_root = Path.cwd().parent  # assuming notebooks/ is one level below root
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

%load_ext autoreload
%autoreload 2

from ndw.tomtom_fetch import geocode_place, fetch_for_region, fetch_flow_grid

# Quick API-key check (either config.py or env var)
print('TOMTOM_API_KEY set in env:', bool(os.environ.get('TOMTOM_API_KEY')))
try:
    from config import api_key  # type: ignore
    print('Found config.api_key (project root)')
except Exception:
    print('No config.py found in project root â€” ensure TOMTOM_API_KEY is set')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
TOMTOM_API_KEY set in env: False
Found config.api_key (project root)


In [17]:
# Geocode an example place to get a bbox
place = 'Eindhoven'
bbox = geocode_place(place)
print('Place:', place)
print('Bounding box (minLon, minLat, maxLon, maxLat):', bbox)


Place: Eindhoven
Bounding box (minLon, minLat, maxLon, maxLat): (5.3567184, 51.4000475, 5.5488528, 51.4970779)


In [18]:
# Fetch a small grid for the place (uncomment to run)
# Keep grid small to avoid rate limits.
df, summary = fetch_for_region('Eindhoven', nx=3, ny=3, delay=0.5)
print('Summary (kept in memory):', summary)
df.head()

# Create a GeoDataFrame from the fetched DataFrame (if geopandas is available)
try:
    import geopandas as gpd
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326")
    print('GeoDataFrame created:')
    display(gdf.head())
except Exception as e:
    gdf = None
    print('geopandas not available or failed to construct GeoDataFrame:', e)


Summary (kept in memory): {'count_samples': 9, 'mean_speed_kmph': 66.66666666666667, 'min_speed_kmph': 32.0, 'p25_speed_kmph': 41.0, 'p50_speed_kmph': 60.0, 'p75_speed_kmph': 66.0}
GeoDataFrame created:


Unnamed: 0,lat,lon,currentSpeed,freeFlowSpeed,confidence,currentTravelTime,freeFlowTravelTime,geometry
0,51.400047,5.356718,59,59,1,125,125,POINT (5.35672 51.40005)
1,51.448563,5.356718,62,62,1,142,142,POINT (5.35672 51.44856)
2,51.497078,5.356718,122,122,1,107,107,POINT (5.35672 51.49708)
3,51.400047,5.452786,66,66,1,15,15,POINT (5.45279 51.40005)
4,51.448563,5.452786,32,32,1,67,67,POINT (5.45279 51.44856)


In [19]:
# Attach street names to each coordinate (TomTom reverse geocode if key available, else Nominatim)
import time
import requests
import os

def _get_key():
    try:
        from config import api_key  # type: ignore
        if api_key:
            return api_key
    except Exception:
        pass
    return os.environ.get('TOMTOM_API_KEY')

def reverse_geocode_tomtom(lat, lon, key):
    url = f"https://api.tomtom.com/search/2/reverseGeocode/{lat},{lon}.json"
    params = {'key': key, 'language': 'en-GB'}
    r = requests.get(url, params=params, timeout=10)
    r.raise_for_status()
    data = r.json()
    if data.get('addresses'):
        first = data['addresses'][0]
        addr = first.get('address', {})
        street = addr.get('streetName') or addr.get('municipalitySubdivision') or addr.get('freeformAddress')
        freeform = first.get('address', {}).get('freeformAddress')
        return street, freeform
    return None, None

def reverse_geocode_nominatim(lat, lon):
    url = 'https://nominatim.openstreetmap.org/reverse'
    params = {'lat': lat, 'lon': lon, 'format': 'jsonv2', 'zoom': 18, 'addressdetails': 1}
    headers = {'User-Agent': 'ndw-tomtom-fetch/1.0 (+https://example.org)'}
    r = requests.get(url, params=params, headers=headers, timeout=10)
    r.raise_for_status()
    d = r.json()
    addr = d.get('address', {})
    road = addr.get('road') or addr.get('pedestrian') or addr.get('residential') or addr.get('footway')
    return road, d.get('display_name')

key = _get_key()
cache = {}
streets = []
addresses = []
total = len(df)
for i, row in df.reset_index(drop=True).iterrows():
    lat = float(row['lat'])
    lon = float(row['lon'])
    keycoord = (round(lat, 5), round(lon, 5))
    if keycoord in cache:
        s, a = cache[keycoord]
    else:
        try:
            if key:
                s, a = reverse_geocode_tomtom(lat, lon, key)
                time.sleep(0.2)
            else:
                s, a = reverse_geocode_nominatim(lat, lon)
                time.sleep(1.0)
        except Exception:
            s, a = None, None
        cache[keycoord] = (s, a)
    streets.append(s)
    addresses.append(a)
    if (i+1) % 10 == 0 or (i+1) == total:
        print(f'Processed {i+1}/{total} (lat={lat:.5f}, lon={lon:.5f})')

df = df.reset_index(drop=True)
df['street'] = streets
df['address'] = addresses
if 'gdf' in globals() and gdf is not None:
    gdf = gdf.reset_index(drop=True)
    gdf['street'] = streets
    gdf['address'] = addresses

print('Sample with street names:')
display(df[['lat','lon','street','address']].head())


Processed 9/9 (lat=51.49708, lon=5.54885)
Sample with street names:


Unnamed: 0,lat,lon,street,address
0,51.400047,5.356718,Veldhovenseweg,"Veldhovenseweg 3, 5511 KJ Knegsel"
1,51.448563,5.356718,Pullen,"Pullen 11, 5513 NP Wintelre"
2,51.497078,5.356718,Dijkpad,"Dijkpad 1, 5684 LL Best"
3,51.400047,5.452786,Heistraat,"Heistraat 43, 5581 VW Waalre"
4,51.448563,5.452786,Beukenlaan,"Beukenlaan 2A, 5651 CD Eindhoven"


In [20]:
# Save only JSON (and GeoJSON if gdf available) into data/tomtom/<region>/
from pathlib import Path
import datetime, json, re

def slugify(name):
    s = name.strip().lower()
    s = re.sub(r'[^a-z0-9]+', '-', s)
    s = re.sub(r'-+', '-', s).strip('-')
    return s or 'region'

project_root = Path.cwd().parent  # adjust if notebook is elsewhere
base_data = project_root / 'data' / 'tomtom'
base_data.mkdir(parents=True, exist_ok=True)
region_slug = slugify(place)
out_dir = base_data / region_slug
out_dir.mkdir(parents=True, exist_ok=True)
ts = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
prefix = f"{region_slug}_{ts}"
json_path = out_dir / f"{prefix}.json"
# Save raw records JSON only
df.to_json(json_path, orient='records', force_ascii=False)
print('Saved JSON to:', json_path)
# Optionally save GeoJSON if GeoDataFrame exists
try:
    if 'gdf' in globals() and gdf is not None:
        geojson_path = out_dir / f"{prefix}.geojson"
        gdf.to_file(str(geojson_path), driver='GeoJSON')
        print('Saved GeoJSON to:', geojson_path)
except Exception as e:
    print('Could not save GeoJSON (geopandas may be missing):', e)


Saved JSON to: /Users/Bruno/Library/CloudStorage/OneDrive-TUEindhoven/IGNITE/data/data/tomtom/eindhoven/eindhoven_20251202T140025Z.json
Saved GeoJSON to: /Users/Bruno/Library/CloudStorage/OneDrive-TUEindhoven/IGNITE/data/data/tomtom/eindhoven/eindhoven_20251202T140025Z.geojson


### Notes
- We now save only JSON (and GeoJSON if available) from the notebook, not CSV or a separate summary file.
- `summary` remains in memory for quick checks. If you want to persist stats, we can write them into the JSON metadata or a small manifest.
- Use `geopandas` to produce GeoJSON; install it via `conda` or `pip` if needed.