<a href="https://colab.research.google.com/github/peckert659/course_app/blob/main/swissmobile_map_info.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install selenium webdriver-manager
!apt-get update
!apt install chromium-chromedriver

Collecting selenium
  Downloading selenium-4.34.0-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.34.0-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.4/9.4 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading webdriver_manager-4.0.2-py2.py3

In [8]:
import re
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

def setup_driver():
    options = Options()
    options.add_argument("--headless=new")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--window-size=1920,1080")
    return webdriver.Chrome(options=options)

def extract_coordinates(url):
    match = re.search(r"E=(\d+)&N=(\d+)", url)
    if match:
        return int(match.group(1)), int(match.group(2))
    return None, None

def get_canton_district(E, N, retries=2):
    url = "https://api3.geo.admin.ch/rest/services/api/MapServer/identify"
    layers = "ch.swisstopo.swissboundaries3d-kanton-flaeche.fill,ch.swisstopo.swissboundaries3d-bezirk-flaeche.fill"
    params = {
        "geometry": f"{E},{N}",
        "geometryFormat": "geojson",
        "geometryType": "esriGeometryPoint",
        "imageDisplay": "1920,1080,96",
        "mapExtent": f"{E-500},{N-500},{E+500},{N+500}",
        "sr": "2056",
        "lang": "fr",
        "layers": layers,
        "tolerance": 0,
        "returnGeometry": "false"
    }

    for attempt in range(retries + 1):
        try:
            r = requests.get(url, params=params, timeout=10)
            r.raise_for_status()
            results = r.json().get("results", [])
            canton = district = None
            for res in results:
                layer = res.get("layerName", "")
                attrs = res.get("attributes", {})
                if "kanton" in layer and not canton:
                    canton = attrs.get("kantonsname") or attrs.get("name")
                if "bezirk" in layer and not district:
                    district = attrs.get("beznam") or attrs.get("bezname") or attrs.get("name")
            return canton, district
        except requests.exceptions.HTTPError as e:
            if r.status_code == 500 and attempt < retries:
                # print(f"⚠️ Tentative {attempt+1} échouée (erreur 500), nouvelle tentative dans 2 sec…")
                time.sleep(2)
                continue
            print(f"❌ Erreur lors de l'appel à geo.admin.ch: {e}")
            break
        except Exception as e:
            print(f"❌ Exception générale: {e}")
            break
    return None, None

def extract_data(url):
    driver = setup_driver()
    data = {"url": url}

    E, N = extract_coordinates(url)
    if E and N:
        data["lv95_E"] = E
        data["lv95_N"] = N
        canton, district = get_canton_district(E, N)
        if canton: data["canton"] = canton
        if district: data["district"] = district

    try:
        driver.get(url)
        time.sleep(10)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        lines = driver.find_element(By.TAG_NAME, "body").text.split("\n")
        lines = [line.strip().replace("\u202f", " ") for line in lines if line.strip()]

        # print("\n--- Lignes extraites de la page ---")
        # for i, line in enumerate(lines):
        #    print(f"{i:02}: {line}")
        # print("--- Fin des lignes ---\n")

    finally:
        driver.quit()

    profil_idx = next((i for i, l in enumerate(lines) if "profil" in l.lower() and "long" in l.lower()), -1)
    if profil_idx == -1:
        print("❌ Aucun 'Profil en long' trouvé.")
        return data

    # print(f"✅ 'Profil en long' trouvé à la ligne {profil_idx}: {lines[profil_idx]}")

    profile_block = " ".join(lines[max(0, profil_idx - 6):profil_idx])
    # print(f"\n🧩 Bloc profil reconstruit : {profile_block}")

    title_lines = lines[max(0, profil_idx - 9):max(0, profil_idx - 6)]
    title_block = " ".join(title_lines)
    # print(f"🧩 Bloc titre reconstruit : {title_block}\n")

    date_match = re.search(r"(\d{1,2}\.\d{1,2}\.\d{4})", title_block)
    if date_match:
        data["date_creation"] = date_match.group(1)
        title_text = title_block.replace(data["date_creation"], "").replace("Parcours", "").strip()
        data["titre"] = title_text

    patterns = {
        'distance_km': r'(\d+[\.,]?\d*)\s*km',
        'duree': r'(\d+)\s*h\s*(\d+)\s*min',
    }

    # print("🔍 Recherche dans le bloc profil:")
    for key, pattern in patterns.items():
        if key in data:
            continue
        match = re.search(pattern, profile_block.lower())
        if match:
            if key == "duree":
                data["duree_h"] = match.group(1)
                data["duree_min"] = match.group(2)
                # print(f"  ✅ {key} : {match.group(1)}h {match.group(2)}min")
            else:
                value = match.group(1).replace(",", ".")
                data[key] = value
                # print(f"  ✅ {key} : {value}")

    values = re.findall(r"(\d+)\s*m", profile_block)
    if len(values) >= 4:
        data["denivele_montee_m"] = values[0]
        data["denivele_descente_m"] = values[1]
        data["altitude_max_m"] = max(values[-2], values[-1], key=int)
        data["altitude_min_m"] = min(values[-2], values[-1], key=int)
        #print(f"  ✅ Dénivelés : +{values[0]} m, -{values[1]} m")
        # print(f"  ✅ Altitudes : max={data['altitude_max_m']} m, min={data['altitude_min_m']} m")

    return data

def format_results(dct):
    print("\n" + "=" * 60)
    print("📊 RÉSULTATS EXTRAITS")
    print("=" * 60)
    for k, v in dct.items():
        print(f"{k:25s}: {v}")

def main():
    urls = [
        "https://schweizmobil.ch/fr/tour/788280957?layers=wanderland%2CtrackLayer&E=2550025&N=1147496&resolution=2.78&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer",
        "https://schweizmobil.ch/fr/tour/1974623420?layers=wanderland%2CtrackLayer&E=2546918&N=1148822&resolution=9.39&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer",
        "https://schweizmobil.ch/fr/tour/1795873806?layers=wanderland%2CtrackLayer&E=2573143&N=1100393&resolution=12.49&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer",
        "https://schweizmobil.ch/fr/tour/354342071?layers=wanderland%2CtrackLayer&E=2572664&N=1102584&resolution=4.08&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer",
        "https://schweizmobil.ch/fr/tour/1355335766"
    ]
    for url in urls:
        print("\n=======================")
        print(f"Extraction depuis : {url}")
        print("=======================")
        result = extract_data(url)
        format_results(result)

if __name__ == "__main__":
    main()



Extraction depuis : https://schweizmobil.ch/fr/tour/788280957?layers=wanderland%2CtrackLayer&E=2550025&N=1147496&resolution=2.78&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer
❌ Erreur lors de l'appel à geo.admin.ch: 500 Server Error: Internal Server Error for url: https://api3.geo.admin.ch/rest/services/api/MapServer/identify?geometry=2550025%2C1147496&geometryFormat=geojson&geometryType=esriGeometryPoint&imageDisplay=1920%2C1080%2C96&mapExtent=2549525%2C1146996%2C2550525%2C1147996&sr=2056&lang=fr&layers=ch.swisstopo.swissboundaries3d-kanton-flaeche.fill%2Cch.swisstopo.swissboundaries3d-bezirk-flaeche.fill&tolerance=0&returnGeometry=false

📊 RÉSULTATS EXTRAITS
url                      : https://schweizmobil.ch/fr/tour/788280957?layers=wanderland%2CtrackLayer&E=2550025&N=1147496&resolution=2.78&bgLayer=pk&detours=yes&shooting=yes&logos=yes&photos=yes&season=summer
lv95_E                   : 2550025
lv95_N                   : 1147496
date_creation            : 4