## Get Data

### Download Railway Dataset

In [2]:
!pip install -r requirements.txt

Collecting requests (from -r requirements.txt (line 1))
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tqdm (from -r requirements.txt (line 2))
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting charset_normalizer<4,>=2 (from requests->-r requirements.txt (line 1))
  Using cached charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl.metadata (38 kB)
Collecting idna<4,>=2.5 (from requests->-r requirements.txt (line 1))
  Using cached idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3<3,>=1.21.1 (from requests->-r requirements.txt (line 1))
  Using cached urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests->-r requirements.txt (line 1))
  Downloading certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
Using cached requests-2.32.5-py3-none-any.whl (64 kB)
Using cached charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl (107 kB)
Using cached idna-3.11-py3-none-any.whl (71 kB)
Using cache

In [None]:
import requests
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

OVERPASS_URL = "https://overpass-api.de/api/interpreter"
OUTPUT_DIR = Path("stations_world")
OUTPUT_DIR.mkdir(exist_ok=True)

# Grid configuration
ROWS = 100
COLS = 200

# Number of workers
WORKERS = 200

def make_query(s, w, n, e):
    return f"""
    [out:json][timeout:180];

    (
      node["railway"="station"]({s},{w},{n},{e});
      way["railway"="station"]({s},{w},{n},{e});
      relation["railway"="station"]({s},{w},{n},{e});

      node["amenity"="bus_station"]({s},{w},{n},{e});
      way["amenity"="bus_station"]({s},{w},{n},{e});
    );

    out body;
    >;
    out skel qt;
    """

def download_tile(args):
    tile_id, south, west, north, east = args

    query = make_query(south, west, north, east)
    resp = requests.post(OVERPASS_URL, data=query)

    if resp.status_code == 200:
        out_file = OUTPUT_DIR / f"stations_tile_{tile_id}.json"
        with open(out_file, "w", encoding="utf-8") as f:
            f.write(resp.text)
        return True
    else:
        return False

def main():
    lat_step = 180 / ROWS
    lon_step = 360 / COLS

    total_tiles = ROWS * COLS
    tasks = []

    tile_id = 0
    for _ in range(total_tiles):
        i = tile_id // COLS
        j = tile_id % COLS

        south = -90 + i * lat_step
        north = -90 + (i + 1) * lat_step
        west  = -180 + j * lon_step
        east  = -180 + (j + 1) * lon_step

        tasks.append((tile_id, south, west, north, east))
        tile_id += 1

    with ThreadPoolExecutor(max_workers=WORKERS) as executor:
        futures = [executor.submit(download_tile, t) for t in tasks]

        for _ in tqdm(as_completed(futures), total=len(futures), desc="Downloading world tiles"):
            pass

if __name__ == "__main__":
    main()


Downloading world tiles:   0%|          | 0/20000 [00:00<?, ?it/s]

Tile 0: -90.0,-180.0,-88.2,-178.2
Saved stations_world\stations_tile_0.json


Downloading world tiles:   0%|          | 1/20000 [00:07<40:31:54,  7.30s/it]

Tile 1: -90.0,-178.2,-88.2,-176.4
Saved stations_world\stations_tile_1.json


Downloading world tiles:   0%|          | 2/20000 [00:12<34:46:17,  6.26s/it]

Tile 2: -90.0,-176.4,-88.2,-174.6
Error tile 2: HTTP 504


### Download OpenAdress Dataset

1. Open https://batch.openaddresses.io/data#map=0/0/0
2. Create an account or login
3. Download the "Global" file