In [1]:
import time
import random
import re
import os
import undetected_chromedriver as uc

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains

TRANSPORTSTYRELSEN_URL = "https://fordon-fu-regnr.transportstyrelsen.se"
CHROME_PATH = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"
CHROME_VERSION = 125
CARS_FOLDER = "cars"

options = uc.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--start-maximized")
options.binary_location = CHROME_PATH

driver = uc.Chrome(options=options, version_main=CHROME_VERSION)
wait = WebDriverWait(driver, 20)

def human_sleep(min_s=2, max_s=5):
    """Random short sleep to mimic human behavior."""
    time.sleep(random.uniform(min_s, max_s))

def expand_element(selector, label=""):
    """
    Utility to safely scroll and click an element (accordion heading, etc.)
    identified by CSS selector.
    """
    try:
        el = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", el)
        human_sleep(0.5, 1.5)
        el.click()
        if label:
            print(f"[INFO] Expanded '{label}'.")
        else:
            print(f"[INFO] Expanded element {selector}")
    except Exception as e:
        print(f"[WARNING] Could not expand {selector}: {e}")

def get_ts_data(regnum):
    """
    Loads the Transportstyrelsen page for regnum and tries to extract:
      1) Fuel usage (bränsleförbrukning)
      2) Koldiox (CO2 g/km)
      3) Skatt (yearly tax)
      4) Namn (Fabrikat)
      5) handelsbeteckning (Handelsbeteckning)

    Returns a dict with keys:
      {
        "fuel_usage": <float or 'N/A'>,
        "koldiox": <str or 'N/A'>,
        "skatt": <str or 'N/A'>,
        "namn": <str or 'N/A'>,
        "handelsbeteckning": <str or 'N/A'>
      }
    """
    data = {
        "fuel_usage": "N/A",
        "koldiox": "N/A",
        "skatt": "N/A",
        "namn": "N/A",
        "handelsbeteckning": "N/A"
    }

    try:
        # 1) Load site
        driver.get(TRANSPORTSTYRELSEN_URL)
        human_sleep()

        # 2) Enter regnum and click 'Sök'
        search_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ts-regnr-sok")))
        search_input.clear()
        search_input.send_keys(regnum)
        print(f"[INFO] Entered regnum '{regnum}'")
        human_sleep()

        sok_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btnSok")))
        ActionChains(driver).move_to_element(sok_btn).pause(random.uniform(0.3,1)).click().perform()
        print("[INFO] Clicked 'Sök' button.")
        human_sleep()

        # Expand multiple accordions we know we need
        expand_element("#ts-teknik-heading > span > a", "Teknisk information")
        expand_element("#accordion > div:nth-child(8)", "Accordion #8")
        expand_element("#ts-miljo-heading > span > a", "Miljö")
        expand_element("#ts-Bensin-heading > span > a", "Bensin (if present)")
        expand_element("#ts-skatt-heading > span > a", "Skatt")

        human_sleep()

        #
        # 1) Fuel usage
        #
        def parse_paragraphs_for_fuel(paragraphs):
            """Checks <p> for 'blandad körning' or 'viktad blandad körning' and extracts l/100km if found."""
            for p in paragraphs:
                txt = p.text.lower().strip()
                if ("blandad" in txt and "körning" in txt) or ("viktad" in txt and "blandad" in txt):
                    print(f"[INFO] Found paragraph (fuel): {p.text.strip()}")
                    match = re.search(r"(\d+[\.,]?\d*)\s*l/100km", txt, re.IGNORECASE)
                    if match:
                        return match.group(1).replace(",", ".")
            return "N/A"

        # paragraphs in Miljö
        miljo_paras = driver.find_elements(By.CSS_SELECTOR, "#ts-miljoCollapse p")
        usage_val = parse_paragraphs_for_fuel(miljo_paras)

        # if still N/A, paragraphs in Bensin
        if usage_val == "N/A":
            bensin_paras = driver.find_elements(By.CSS_SELECTOR, "#ts-BensinCollapse p")
            usage_val = parse_paragraphs_for_fuel(bensin_paras)

        data["fuel_usage"] = usage_val
        print(f"[INFO] Fuel usage for {regnum} = {usage_val} l/100km")

        #
        # 2) Koldiox (CO2 g/km)
        #
        try:
            co2_el = driver.find_element(
                By.CSS_SELECTOR,
                "#ts-miljoCollapse > div > div:nth-child(5) > div:nth-child(3) > p"
            )
            txt = co2_el.text.strip()
            match = re.search(r"(\d+)\s*g/km", txt, re.IGNORECASE)
            if match:
                data["koldiox"] = match.group(1)
                print(f"[INFO] Found koldiox: {data['koldiox']} g/km")
        except Exception as e:
            print(f"[WARNING] Could not find CO2 element: {e}")

        #
        # 3) Skatt (yearly tax)
        #
        try:
            skatt_p = driver.find_element(
                By.CSS_SELECTOR,
                "#ts-skattCollapse > div > div > div:nth-child(2) > p"
            )
            txt = skatt_p.text.strip()
            match = re.search(r"(\d+)", txt)
            if match:
                data["skatt"] = match.group(1)
                print(f"[INFO] Found skatt: {data['skatt']} kr")
        except Exception as e:
            print(f"[WARNING] Could not find Skatt element: {e}")

        #
        # 4) Namn (Fabrikat)
        #
        try:
            namn_p = driver.find_element(
                By.CSS_SELECTOR,
                "#ts-sammanfattningCollapse > div > div > div:nth-child(2) > p"
            )
            # Usually "Fabrikat ...\nRENAULT L"
            text_lines = [line.strip() for line in namn_p.text.split("\n") if line.strip()]
            if len(text_lines) > 1:
                data["namn"] = text_lines[-1]  # e.g. "RENAULT L"
                print(f"[INFO] Found namn: {data['namn']}")
        except Exception as e:
            print(f"[WARNING] Could not find Namn element: {e}")

        #
        # 5) handelsbeteckning (Handelsbeteckning)
        #
        try:
            handelsbeteckning_p = driver.find_element(
                By.CSS_SELECTOR,
                "#ts-sammanfattningCollapse > div > div > div:nth-child(3) > p"
            )
            # Typically includes "Handelsbeteckning" line + next line with the handelsbeteckning
            handelsbeteckning_lines = [line.strip() for line in handelsbeteckning_p.text.split("\n") if line.strip()]
            if len(handelsbeteckning_lines) > 1:
                # The last line should be the actual handelsbeteckning, e.g. "T-ROC"
                data["handelsbeteckning"] = handelsbeteckning_lines[-1]
                print(f"[INFO] Found handelsbeteckning: {data['handelsbeteckning']}")
        except Exception as e:
            print(f"[WARNING] Could not find handelsbeteckning element: {e}")

        human_sleep()

    except Exception as e:
        print(f"[ERROR] get_ts_data exception for {regnum}: {e}")

    return data

def remove_zero_brbr():
    """Removes any brbr.txt file that contains only '0'."""
    for folder in os.listdir(CARS_FOLDER):
        folder_path = os.path.join(CARS_FOLDER, folder)
        if not os.path.isdir(folder_path):
            continue
        fuel_file = os.path.join(folder_path, "brbr.txt")
        if os.path.exists(fuel_file):
            with open(fuel_file, "r", encoding="utf-8") as f:
                content = f.read().strip()
            if content == "0":
                os.remove(fuel_file)
                print(f"[INFO] Removed brbr.txt in '{folder}' because it only contained '0'.")

def process_fuel_usage_all():
    """
    Iterates subfolders in 'cars':
      - If brbr.txt doesn't exist, check if Bränsle.txt is exactly 'El'.
        If so, create brbr.txt with '0'.
        Otherwise, fetch from the TS website.
      - Also tries to fetch/save:
          * skatt.txt
          * namn.txt
          * koldiox.txt
          * handelsbeteckning.txt  <-- NEW
        if they do not exist yet.
    """
    for folder in os.listdir(CARS_FOLDER):
        folder_path = os.path.join(CARS_FOLDER, folder)
        if not os.path.isdir(folder_path):
            continue

        reg_file     = os.path.join(folder_path, "regnum.txt")
        fuel_file    = os.path.join(folder_path, "brbr.txt")
        bransle_file = os.path.join(folder_path, "Bränsle.txt")
        skatt_file   = os.path.join(folder_path, "skatt.txt")
        namn_file    = os.path.join(folder_path, "namn.txt")
        co2_file     = os.path.join(folder_path, "koldiox.txt")
        handelsbeteckning_file   = os.path.join(folder_path, "handelsbeteckning.txt")  # NEW

        if not os.path.exists(reg_file):
            print(f"[SKIP] No regnum.txt in '{folder}'")
            continue

        with open(reg_file, "r", encoding="utf-8") as f:
            regnum = f.read().strip().upper()
        if not regnum:
            print(f"[SKIP] Empty regnum.txt in '{folder}'")
            continue

        # We skip calling the website if all files are present (including handelsbeteckning_file).
        all_extra_exist = (
            os.path.exists(fuel_file) and
            os.path.exists(skatt_file) and
            os.path.exists(namn_file) and
            os.path.exists(co2_file) and
            os.path.exists(handelsbeteckning_file)
        )
        recheck_brbr = False
        if os.path.exists(fuel_file):
            with open(fuel_file, "r", encoding="utf-8") as fh:
                content = fh.read().strip()
                if content == "0":
                    recheck_brbr = True

        if all_extra_exist and not recheck_brbr:
            print(f"[SKIP] All data files exist in '{folder}', skipping.")
            continue

        # If brbr.txt missing or we need to re-check, see if it's an electric car
        if not os.path.exists(fuel_file) or recheck_brbr:
            if os.path.exists(bransle_file):
                with open(bransle_file, "r", encoding="utf-8") as f:
                    br_content = f.read().strip().lower()
                # If it is exactly "el", create brbr.txt with "0"
                if br_content == "el":
                    with open(fuel_file, "w", encoding="utf-8") as f_out:
                        f_out.write("0")
                    print(f"[INFO] Found only 'El' in Bränsle.txt. Wrote '0' to brbr.txt in '{folder}'")
                    # We'll still try to fetch the rest if needed below.

        # --- Now fetch from TS if any file is missing or we need to re-check brbr ---
        print(f"[INFO] Fetching from TS for '{regnum}'...")
        ts_data = get_ts_data(regnum)  # returns dict with usage, skatt, namn, koldiox, handelsbeteckning

        # 1) Save brbr.txt if missing or if it was '0'
        if not os.path.exists(fuel_file) or recheck_brbr:
            usage = ts_data["fuel_usage"]
            if usage != "N/A":
                with open(fuel_file, "w", encoding="utf-8") as f:
                    f.write(usage)
                print(f"[SAVED] {regnum} → brbr.txt = {usage} l/100km")
            else:
                print(f"[WARNING] No fuel usage found for {regnum}")

        # 2) Save skatt.txt if missing
        if not os.path.exists(skatt_file):
            skatt_val = ts_data["skatt"]
            if skatt_val != "N/A":
                with open(skatt_file, "w", encoding="utf-8") as f:
                    f.write(skatt_val)
                print(f"[SAVED] {regnum} → skatt.txt = {skatt_val}")

        # 3) Save namn.txt if missing
        if not os.path.exists(namn_file):
            namn_val = ts_data["namn"]
            if namn_val != "N/A":
                with open(namn_file, "w", encoding="utf-8") as f:
                    f.write(namn_val)
                print(f"[SAVED] {regnum} → namn.txt = {namn_val}")

        # 4) Save koldiox.txt if missing
        if not os.path.exists(co2_file):
            co2_val = ts_data["koldiox"]
            if co2_val != "N/A":
                with open(co2_file, "w", encoding="utf-8") as f:
                    f.write(co2_val)
                print(f"[SAVED] {regnum} → koldiox.txt = {co2_val}")

        # 5) Save handelsbeteckning.txt if missing
        if not os.path.exists(handelsbeteckning_file):
            handelsbeteckning_val = ts_data["handelsbeteckning"]
            if handelsbeteckning_val != "N/A":
                with open(handelsbeteckning_file, "w", encoding="utf-8") as f:
                    f.write(handelsbeteckning_val)
                print(f"[SAVED] {regnum} → handelsbeteckning.txt = {handelsbeteckning_val}")

        # Wait before next request (adjust range as you wish)
        print("[INFO] Waiting 10-15 seconds before next request...")
        time.sleep(random.uniform(10, 15))

# --- MAIN EXECUTION ---
try:
    process_fuel_usage_all()
finally:
    driver.quit()


[SKIP] All data files exist in 'Volkswagen', skipping.
[SKIP] All data files exist in 'Ford Kuga 2.0 TDCi AWD Powershift Trend Euro 6', skipping.
[SKIP] All data files exist in 'Audi A3 Sportback 1.0 TFSI Comfort Euro 6', skipping.
[SKIP] All data files exist in 'Citroën Berlingo Multispace 1.6 BlueHDi 100 8v ETG6 Euro 6', skipping.
[SKIP] All data files exist in 'Skoda Superb Kombi 2.0 TDI 4x4 Elegance Euro 5', skipping.
[SKIP] All data files exist in 'Volvo V90 Recharge T8 AWD Geartronic R-Design Euro6 Polestar', skipping.
[SKIP] All data files exist in 'BMW X1 xDrive20d Sport line Euro 5', skipping.
[INFO] Found only 'El' in Bränsle.txt. Wrote '0' to brbr.txt in 'Tesla Model 3 Long Range AWD_Autopilot, Dator för FSD'
[INFO] Fetching from TS for 'HHD98C'...
[INFO] Entered regnum 'HHD98C'
[INFO] Clicked 'Sök' button.
[INFO] Expanded 'Teknisk information'.
[INFO] Expanded 'Accordion #8'.
[INFO] Expanded 'Miljö'.
Stacktrace:
0   undetected_chromedriver             0x00000001030b46b8 und