In [4]:
import os
import re
import undetected_chromedriver as uc

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains

# --- CONSTANTS ---
CARS_FOLDER = "cars"
BILSKATT_URL = "https://bilskatt.nu"
CHROME_PATH = "/Applications/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing"
CHROME_VERSION = 125

options = uc.ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--start-maximized")
options.binary_location = CHROME_PATH

driver = uc.Chrome(options=options, version_main=CHROME_VERSION)
wait = WebDriverWait(driver, 20)

def expand_drivmedel_accordion():
    """Attempt to expand the Drivmedel accordion. Return True if we found and clicked it."""
    try:
        # Find all possible accordion buttons. Sometimes "Drivmedel" is capitalized or not.
        accordion_buttons = driver.find_elements(By.CSS_SELECTOR, "#more-info-container button")
        for btn in accordion_buttons:
            if "drivmedel" in btn.text.strip().lower():
                # Scroll into view
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)

                # Attempt normal click, if that fails, JS click
                try:
                    wait.until(EC.element_to_be_clickable(btn))
                    btn.click()
                except:
                    driver.execute_script("arguments[0].click();", btn)
                
                # Now wait for #fuel to be visible after expansion
                try:
                    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#fuel")))
                except:
                    pass

                return True
        return False
    except Exception as e:
        print(f"Error expanding Drivmedel: {e}")
        return False

def get_tax_and_co2(regnum):
    try:
        # 1) Go to bilskatt.nu
        driver.get(BILSKATT_URL)

        # 2) Accept cookies if prompt appears
        try:
            cookie_btn = wait.until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR, "#page-top button.fc-button.fc-cta-consent.fc-primary-button")
                )
            )
            cookie_btn.click()
        except:
            pass

        # 3) Enter the search regnum
        search_input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#topSearch")))
        search_input.clear()
        search_input.send_keys(regnum)

        # 4) Select the first autocomplete result
        dropdown = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#autoComplete_result_0")))
        ActionChains(driver).move_to_element(dropdown).click().perform()

        # Accept any second cookie banner if it appears
        try:
            WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable(
                    (By.CSS_SELECTOR, "#page-top > div.fc-ab-root button")
                )
            ).click()
        except:
            pass

        # --- Tax ---
        tax_digits = "N/A"
        try:
            tax_el = wait.until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR, "#mainBody > div:nth-child(3) > div > div > div > h3")
                )
            )
            tax_text = tax_el.text
            extracted = re.findall(r"\d+", tax_text)
            if extracted:
                tax_digits = "".join(extracted)
        except Exception as e:
            print(f"[ERROR] Failed to read tax: {e}")

        # --- CO₂ ---
        co2_digits = "N/A"

        # 5) Expand Drivmedel and parse #fuel
        if expand_drivmedel_accordion():
            try:
                # Find all tables under #fuel (covers #fuel > div > table, #fuel table, etc.)
                co2_tables = driver.find_elements(By.CSS_SELECTOR, "#fuel table")
                
                for table in co2_tables:
                    rows = table.find_elements(By.TAG_NAME, "tr")
                    for row in rows:
                        tds = row.find_elements(By.TAG_NAME, "td")
                        if len(tds) >= 2:
                            first_col = tds[0].text.lower()
                            # Don't also require "NEDC" here, so we can catch "Koldioxidutsläpp blandad (WLTP)"
                            if "koldioxidutsläpp" in first_col:
                                match = re.findall(r"\d+", tds[1].text)
                                if match:
                                    co2_digits = "".join(match)
                                    break
                    if co2_digits != "N/A":
                        break
            except Exception as e:
                print(f"Error extracting CO2 under #fuel: {e}")

        # 6) If still "N/A", fallback to main table in #more-info-container
        if co2_digits == "N/A":
            try:
                fallback_table = driver.find_element(
                    By.CSS_SELECTOR,
                    "#more-info-container > div.table-responsive table"
                )
                rows = fallback_table.find_elements(By.TAG_NAME, "tr")
                for row in rows:
                    tds = row.find_elements(By.TAG_NAME, "td")
                    if len(tds) >= 2:
                        first_col = tds[0].text.lower()
                        if "koldioxidutsläpp" in first_col:
                            match = re.findall(r"\d+", tds[1].text)
                            if match:
                                co2_digits = "".join(match)
                                break
            except Exception as e:
                print(f"Error extracting CO2 (fallback): {e}")

        return tax_digits, co2_digits

    except Exception as e:
        print(f"[ERROR] Unexpected error in get_tax_and_co2: {e}")
        return "N/A", "N/A"
# Add this new function near existing functions:
def get_vehicle_name():
    """Expands 'Information om fordonet' accordion and gets the vehicle name."""
    try:
        # Expand accordion first if not already expanded
        accordion_buttons = driver.find_elements(By.CSS_SELECTOR, "#more-info-container button")
        for btn in accordion_buttons:
            if "information om fordonet" in btn.text.strip().lower():
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
                try:
                    wait.until(EC.element_to_be_clickable(btn))
                    btn.click()
                except:
                    driver.execute_script("arguments[0].click();", btn)
                break  # Stop after clicking the correct accordion

        # Wait until #info accordion is expanded
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#info")))

        # Extract vehicle name from the known table location
        name_element = wait.until(EC.presence_of_element_located(
            (By.CSS_SELECTOR, "#info > div > table > tbody > tr:nth-child(1) > td.text-right")
        ))
        vehicle_name = name_element.text.strip()

        return vehicle_name if vehicle_name else "N/A"

    except Exception as e:
        print(f"[ERROR] Could not extract vehicle name: {e}")
        return "N/A"

def process_all_folders():
    for folder in os.listdir(CARS_FOLDER):
        folder_path = os.path.join(CARS_FOLDER, folder)
        if not os.path.isdir(folder_path):
            continue

        reg_file = os.path.join(folder_path, "regnum.txt")
        skatt_file = os.path.join(folder_path, "skatt.txt")
        co2_file = os.path.join(folder_path, "koldiox.txt")
        namn_file = os.path.join(folder_path, "namn.txt")

        if not os.path.exists(reg_file):
            print(f"[SKIP] No regnum.txt in '{folder}'")
            continue

        with open(reg_file, "r", encoding="utf-8") as f:
            regnum = f.read().strip().upper()
        if not regnum:
            print(f"[SKIP] Empty regnum.txt in '{folder}'")
            continue

        need_skatt = not os.path.exists(skatt_file)
        need_co2 = not os.path.exists(co2_file)
        need_namn = not os.path.exists(namn_file)

        if not (need_skatt or need_co2 or need_namn):
            print(f"[SKIP] '{folder}' already has skatt.txt, koldiox.txt, and namn.txt.")
            continue

        print(f"[INFO] Gathering data for '{regnum}' in folder '{folder}'...")
        tax_value, co2_value = get_tax_and_co2(regnum)
        vehicle_name = get_vehicle_name()

        # Save tax
        if need_skatt and tax_value != "N/A":
            with open(skatt_file, "w", encoding="utf-8") as f:
                f.write(tax_value)
            print(f"[SAVED] {regnum} → skatt.txt = {tax_value}")

        # Save CO₂
        if need_co2 and co2_value != "N/A":
            with open(co2_file, "w", encoding="utf-8") as f:
                f.write(co2_value)
            print(f"[SAVED] {regnum} → koldiox.txt = {co2_value}")

        # Save vehicle name
        if need_namn and vehicle_name != "N/A":
            with open(namn_file, "w", encoding="utf-8") as f:
                f.write(vehicle_name)
            print(f"[SAVED] {regnum} → namn.txt = {vehicle_name}")

        # Warning if any info not found
        if need_skatt and tax_value == "N/A":
            print(f"[WARNING] No tax found for {regnum}")
        if need_co2 and co2_value == "N/A":
            print(f"[WARNING] No CO₂ found for {regnum}")
        if need_namn and vehicle_name == "N/A":
            print(f"[WARNING] No vehicle name found for {regnum}")


# --- MAIN EXECUTION ---
try:
    process_all_folders()
finally:
    driver.quit()


[SKIP] 'Volkswagen' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Ford Kuga 2.0 TDCi AWD Powershift Trend Euro 6' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Audi A3 Sportback 1.0 TFSI Comfort Euro 6' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Citroën Berlingo Multispace 1.6 BlueHDi 100 8v ETG6 Euro 6' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Skoda Superb Kombi 2.0 TDI 4x4 Elegance Euro 5' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Volvo V90 Recharge T8 AWD Geartronic R-Design Euro6 Polestar' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'BMW X1 xDrive20d Sport line Euro 5' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'Tesla Model 3 Long Range AWD_Autopilot, Dator för FSD' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'BMW 116 i 5-dörrars M Sport Euro 6' already has skatt.txt, koldiox.txt, and namn.txt.
[SKIP] 'BMW 318 d Sedan Advantage Euro 6' already has skatt.txt, koldiox.txt, 

EL Bilar