In [19]:
import time
import json
from pathlib import Path

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager


START_URL = "https://fminside.net/players"
MAX_PLAYERS = 3
OUT_FILE = Path("fminside_players.json")


def start_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--start-maximized")
    return webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options
    )


def get_value_by_key(driver, key_name: str):
    """
    Finds: <span class="key">KEY</span><span class="value">VALUE</span>
    Handles nested <span class="card"> correctly.
    """
    try:
        li = driver.find_element(
            By.XPATH,
            f"//span[@class='key' and normalize-space()='{key_name}']/parent::li"
        )
        value_el = li.find_element(By.CLASS_NAME, "value")

        cards = value_el.find_elements(By.CLASS_NAME, "card")
        if cards:
            return cards[0].text.strip()

        return value_el.text.strip()
    except:
        return None


def scrape_player_profile(driver, url: str) -> dict:
    driver.get(url)
    time.sleep(2)

    data = {"profile_url": url}

    # ---------- NAME ----------
    data["name"] = driver.find_element(By.CSS_SELECTOR, "#player h1").text.strip()

    # ---------- META ----------
    try:
        data["estimated_value"] = driver.find_element(
            By.CSS_SELECTOR, "#player .player_value"
        ).text.strip()
    except:
        data["estimated_value"] = None

    data["positions"] = [
        p.text for p in driver.find_elements(By.CSS_SELECTOR, "#player .position")
        if p.text
    ]

    # ---------- PLAYER INFO (KEY BASED) ----------
    data["age"] = get_value_by_key(driver, "Age")
    data["height"] = get_value_by_key(driver, "Height")
    data["caps_goals"] = get_value_by_key(driver, "Caps / Goals")
    data["left_foot"] = get_value_by_key(driver, "Left foot")
    data["right_foot"] = get_value_by_key(driver, "Right foot")

    # nationality is an image inside the same <li>
    try:
        li = driver.find_element(
            By.XPATH,
            "//span[@class='key' and normalize-space()='Nationality']/parent::li"
        )
        data["nationality"] = li.find_element(By.CSS_SELECTOR, "img.flag").get_attribute("code")
    except:
        try:
            data["nationality"] = driver.find_element(
                By.CSS_SELECTOR, "#player_info img.flag"
            ).get_attribute("code")
        except:
            data["nationality"] = None

    # ---------- CONTRACT (KEY BASED) ----------
    data["contract_end"] = get_value_by_key(driver, "Contract end")
    data["wages"] = get_value_by_key(driver, "Wages")
    data["sell_value"] = get_value_by_key(driver, "Sell value")

    # ---------- ATTRIBUTES ----------
    attributes = {}
    for row in driver.find_elements(By.CSS_SELECTOR, "#player_stats tr"):
        attr = row.get_attribute("id")
        if not attr:
            continue
        try:
            attributes[attr] = int(
                row.find_element(By.CLASS_NAME, "stat").text.strip()
            )
        except:
            pass

    data["attributes"] = attributes

    return data


def main():
    driver = start_driver()
    driver.get(START_URL)

    print("Solve Cloudflare if needed.")
    input("Press ENTER when players are visible...")

    rows = driver.find_elements(By.CSS_SELECTOR, "ul.player")

    profile_urls = []
    for row in rows:
        if len(profile_urls) >= MAX_PLAYERS:
            break
        try:
            profile_urls.append(
                row.find_element(By.CSS_SELECTOR, "a[href^='/players/']").get_attribute("href")
            )
        except:
            pass

    players = []
    for i, url in enumerate(profile_urls, 1):
        print(f"[{i}/{len(profile_urls)}] {url}")
        players.append(scrape_player_profile(driver, url))

    driver.quit()

    OUT_FILE.write_text(json.dumps(players, indent=2), encoding="utf-8")
    print(f"Saved {len(players)} players to {OUT_FILE}")


if __name__ == "__main__":
    main()


Solve Cloudflare if needed.
[1/3] https://fminside.net/players/7-fm-26/85139014-kylian-mbappe
[2/3] https://fminside.net/players/7-fm-26/2000168055-mariona-caldentey
[3/3] https://fminside.net/players/7-fm-26/2000168051-aitana-bonmati
Saved 3 players to fminside_players.json


In [None]:
import time
import json

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager


URL = "https://fminside.net/players/7-fm-26/85139014-kylian-mbappe"


def start_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--start-maximized")
    return webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options,
    )


def extract_player_info(driver):
    data = {}

    columns = driver.find_elements(By.CSS_SELECTOR, "#player_info .column")

    player_info_column = None
    contract_column = None

    for col in columns:
        try:
            title = col.find_element(By.TAG_NAME, "h2").text.strip()
            if title == "Player info":
                player_info_column = col
            elif title == "Contract":
                contract_column = col
        except:
            pass

    # -------- Player info --------
    if player_info_column:
        for li in player_info_column.find_elements(By.CSS_SELECTOR, "li"):
            key = li.find_element(By.CLASS_NAME, "key").text.strip()
            value_el = li.find_element(By.CLASS_NAME, "value")

            cards = value_el.find_elements(By.CLASS_NAME, "card")
            if cards:
                value = cards[0].text.strip()
            else:
                value = value_el.text.strip()

            data[key] = value

    # -------- Contract --------
    if contract_column:
        for li in contract_column.find_elements(By.CSS_SELECTOR, "li"):
            key = li.find_element(By.CLASS_NAME, "key").text.strip()
            value = li.find_element(By.CLASS_NAME, "value").text.strip()
            data[key] = value

    return data


def extract_meta(driver):
    meta = {}

    meta["name"] = driver.find_element(By.CSS_SELECTOR, "#player_info h1").text.strip()

    meta["estimated_value"] = driver.find_element(
        By.CSS_SELECTOR, ".player_value .price"
    ).text.strip()

    meta["club"] = driver.find_element(
        By.CSS_SELECTOR, ".meta ul li a span.value"
    ).text.strip()

    meta["nationality"] = driver.find_element(
        By.CSS_SELECTOR, ".meta img.flag"
    ).get_attribute("code")

    meta["positions"] = [
        el.text.strip()
        for el in driver.find_elements(
            By.CSS_SELECTOR, ".meta .desktop_positions .position"
        )
    ]

    return meta

def extract_attributes(driver):
    attributes = {}

    rows = driver.find_elements(By.CSS_SELECTOR, "#player_stats tr")

    for row in rows:
        attr_id = row.get_attribute("id")
        if not attr_id:
            continue

        stat_cells = row.find_elements(By.CLASS_NAME, "stat")
        if not stat_cells:
            continue

        value_text = stat_cells[0].text.strip()

        if not value_text.isdigit():
            continue  # skip empty / non-numeric rows

        attributes[attr_id] = int(value_text)

    return attributes

def main():
    driver = start_driver()
    driver.get(URL)

    print("Solve Cloudflare if needed, then press ENTER")
    input()

    time.sleep(2)

    meta = extract_meta(driver)
    info = extract_player_info(driver)
    attributes = extract_attributes(driver)

    result = {
        "profile_url": URL,
        "name": meta.get("name"),
        "estimated_value": meta.get("estimated_value"),
        "club": meta.get("club"),
        "nationality": meta.get("nationality"),
        "positions": meta.get("positions"),
        "age": info.get("Age"),
        "height": info.get("Height"),
        "caps_goals": info.get("Caps / Goals"),
        "left_foot": info.get("Left foot"),
        "right_foot": info.get("Right foot"),
        "contract_end": info.get("Contract end"),
        "wages": info.get("Wages"),
        "sell_value": info.get("Sell value"),
        "attributes": attributes,
    }

    print(json.dumps(result, indent=2, ensure_ascii=False))

    driver.quit()


if __name__ == "__main__":
    main()


Solve Cloudflare if needed, then press ENTER


ValueError: invalid literal for int() with base 10: ''