In [1]:
# Import Library
import requests
import json

import time
import os

import sys

In [None]:
# Scrape Club ID dari Satu Liga

competition_id = "GB1" #EPL
season_id = 2024 # 2024-2025

url = f"https://transfermarkt-api.fly.dev/competitions/{competition_id}/clubs?season_id={season_id}"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    
    # Cek struktur data
    if isinstance(data, dict) and "clubs" in data:
        clubs = data["clubs"]
    else:
        clubs = data  # fallback

    print(f"Ditemukan {len(clubs)} klub")
    for club in clubs:
        print(f"- {club['name']} (ID: {club['id']})")

    with open("clubs_from_competition.json", "w", encoding="utf-8") as f:
        json.dump(clubs, f, ensure_ascii=False, indent=2)
    print("Disimpan ke 'clubs_from_competition.json'")
else:
    print(f"Gagal: {response.status_code}")

In [3]:
# Scrape Player ID tiap TIM

# Load data klub dari file sebelumnya
with open("clubs_from_competition.json", "r", encoding="utf-8") as f:
    clubs = json.load(f)

season_id = 2024
all_players = []

for club in clubs:
    club_id = club['id']
    club_name = club['name']
    print(f"Mengambil pemain dari {club_name} (ID: {club_id})...")

    url = f"https://transfermarkt-api.fly.dev/clubs/{club_id}/players?season_id={season_id}"
    response = requests.get(url)

    if response.status_code == 200:
        try:
            data = response.json()
            players = data["players"] if isinstance(data, dict) and "players" in data else data

            for p in players:
                if isinstance(p, dict):
                    all_players.append({
                        "club_id": club_id,
                        "club_name": club_name,
                        "player_id": p.get("id"),
                        "name": p.get("name"),
                        "position": p.get("position"),
                        "age": p.get("age"),
                        "market_value": p.get("market_value")
                    })
        except Exception as e:
            print(f"Parsing error untuk {club_name}: {e}")
    else:
        print(f"Gagal ambil dari {club_name} - {response.status_code}")

# Simpan ke file
with open("epl_players_2024_basic.json", "w", encoding="utf-8") as f:
    json.dump(all_players, f, ensure_ascii=False, indent=2)

print(f"\nTotal pemain yang berhasil disimpan: {len(all_players)}")

📥 Mengambil pemain dari Manchester City (ID: 281)...
❌ Gagal ambil dari Manchester City - 503
📥 Mengambil pemain dari Arsenal FC (ID: 11)...
❌ Gagal ambil dari Arsenal FC - 503
📥 Mengambil pemain dari Liverpool FC (ID: 31)...
❌ Gagal ambil dari Liverpool FC - 503
📥 Mengambil pemain dari Chelsea FC (ID: 631)...
❌ Gagal ambil dari Chelsea FC - 503
📥 Mengambil pemain dari Tottenham Hotspur (ID: 148)...
❌ Gagal ambil dari Tottenham Hotspur - 503
📥 Mengambil pemain dari Manchester United (ID: 985)...
❌ Gagal ambil dari Manchester United - 503
📥 Mengambil pemain dari Newcastle United (ID: 762)...
❌ Gagal ambil dari Newcastle United - 503
📥 Mengambil pemain dari Aston Villa (ID: 405)...
❌ Gagal ambil dari Aston Villa - 503
📥 Mengambil pemain dari Brighton & Hove Albion (ID: 1237)...
❌ Gagal ambil dari Brighton & Hove Albion - 503
📥 Mengambil pemain dari West Ham United (ID: 379)...
❌ Gagal ambil dari West Ham United - 503
📥 Mengambil pemain dari Nottingham Forest (ID: 703)...
❌ Gagal ambil da

In [2]:
# Konfigurasi
INPUT_FILE = "epl_players_2024_basic.json"
OUTPUT_FILE = "epl_players_detailed_2024.json"
API_BASE = "https://transfermarkt-api.fly.dev"
HEADERS = {"accept": "application/json"}
DELAY = 2

# Semua endpoint
ENDPOINTS = [
    "profile", "market_value", "transfers",
    "jersey_numbers", "stats", "injuries", "achievements"
]

# Load data pemain dasar
with open(INPUT_FILE, "r", encoding="utf-8") as f:
    all_players = json.load(f)

# Load data yang sudah ada
if os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        existing_data = json.load(f)
    done_ids = {p["player_id"] for p in existing_data}
else:
    existing_data = []
    done_ids = set()

# Daftar gagal baru
new_failed = []

# Proses scraping
for player in all_players:
    pid = str(player["player_id"])
    if pid in done_ids:
        print(f"Lewati {player['name']} (ID: {pid})")
        continue

    print(f"Scraping {player['name']} (ID: {pid})")

    player_obj = {
        "player_id": pid,
        "name": player["name"],
        "profile_url": f"https://www.transfermarkt.com/{player['name'].replace(' ', '-').lower()}/profil/spieler/{pid}",
        "club_id": player["club_id"],
        "club_name": player["club_name"],
        "competition_id": "GB1",
        "competition_name": "Premier League"
    }

    success = True

    for endpoint in ENDPOINTS:
        try:
            url = f"{API_BASE}/players/{pid}/{endpoint}"
            r = requests.get(url, headers=HEADERS)
            if r.status_code == 200:
                content = r.json()
                player_obj[endpoint] = content.get(endpoint) if endpoint in content else content
            else:
                print(f"{endpoint} gagal: status {r.status_code}")
                # Jika selain transfers yang gagal, anggap gagal total
                if endpoint not in ["transfers", "injuries"]:
                    success = False
                    break
        except Exception as e:
            print(f"{endpoint} error: {e}")
            if endpoint != "transfers":
                success = False
                break
        time.sleep(DELAY)

    if success:
        existing_data.append(player_obj)
        with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
            json.dump(existing_data, f, ensure_ascii=False, indent=2)
        print(f"Selesai {player['name']}")
    else:
        new_failed.append(player)
        print(f"Gagal scraping {player['name']}, simpan ke daftar gagal.")

print(f"\nScraping selesai. Tersimpan: {len(existing_data)} pemain | Gagal: {len(new_failed)} pemain.")

⏩ Lewati Ederson (ID: 238223)
⏩ Lewati Stefan Ortega (ID: 85941)
⏩ Lewati Scott Carson (ID: 14555)
⏩ Lewati Rúben Dias (ID: 258004)
⏩ Lewati Abdukodir Khusanov (ID: 763079)
⏩ Lewati Nathan Aké (ID: 177476)
⏩ Lewati Manuel Akanji (ID: 284730)
⏩ Lewati John Stones (ID: 186590)
⏩ Lewati Vitor Reis (ID: 1005575)
⏩ Lewati Josko Gvardiol (ID: 475959)
⏩ Lewati Rico Lewis (ID: 701057)
⏩ Lewati Rodri (ID: 357565)
⏩ Lewati Nico González (ID: 466805)
⏩ Lewati Matheus Nunes (ID: 601883)
⏩ Lewati Mateo Kovacic (ID: 51471)
⏩ Lewati İlkay Gündoğan (ID: 53622)
⏩ Lewati Bernardo Silva (ID: 241641)
⏩ Lewati Kevin De Bruyne (ID: 88755)
⏩ Lewati Claudio Echeverri (ID: 994536)
⏩ Lewati James McAtee (ID: 583199)
⏩ Lewati Nico O'Reilly (ID: 743413)
⏩ Lewati Jérémy Doku (ID: 486049)
⏩ Lewati Jack Grealish (ID: 203460)
⏩ Lewati Phil Foden (ID: 406635)
⏩ Lewati Savinho (ID: 743591)
⏩ Lewati Oscar Bobb (ID: 661207)
⏩ Lewati Erling Haaland (ID: 418560)
⏩ Lewati Omar Marmoush (ID: 445939)
⏩ Lewati David Raya (ID: 

In [4]:
# Cek Pemain Dengan Endpoint Kurang

# Konfigurasi
REQUIRED_ENDPOINTS = [
    "profile", "market_value", "transfers",
    "jersey_numbers", "stats", "injuries", "achievements"
]

# Load data
with open("epl_players_detailed_2024.json", "r", encoding="utf-8") as f:
    players = json.load(f)

# Cek pemain yang endpoint-nya tidak lengkap
incomplete_players = []

for player in players:
    missing = [ep for ep in REQUIRED_ENDPOINTS if ep not in player]
    if missing:
        incomplete_players.append({
            "player_id": player["player_id"],
            "name": player["name"],
            "club_name": player["club_name"],
            "missing_endpoints": missing
        })

# Print hasil
print(f"Total pemain dengan endpoint kurang: {len(incomplete_players)}\n")
for p in incomplete_players:
    print(f"- {p['name']} ({p['club_name']}) -> Missing: {', '.join(p['missing_endpoints'])}")

🛠️ Total pemain dengan endpoint kurang: 208

- Dominik Szoboszlai (Liverpool FC) -> Missing: transfers
- Cody Gakpo (Liverpool FC) -> Missing: transfers
- Mohamed Salah (Liverpool FC) -> Missing: transfers
- Federico Chiesa (Liverpool FC) -> Missing: transfers
- Darwin Núñez (Liverpool FC) -> Missing: transfers
- Robert Sánchez (Chelsea FC) -> Missing: transfers
- Filip Jørgensen (Chelsea FC) -> Missing: transfers
- Marcus Bettinelli (Chelsea FC) -> Missing: transfers
- Aarón Anselmino (Chelsea FC) -> Missing: transfers
- Cole Palmer (Chelsea FC) -> Missing: transfers
- Kevin Danso (Tottenham Hotspur) -> Missing: transfers
- Luke Shaw (Manchester United) -> Missing: transfers
- Harry Amass (Manchester United) -> Missing: transfers
- Noussair Mazraoui (Manchester United) -> Missing: transfers
- Manuel Ugarte (Manchester United) -> Missing: transfers
- Casemiro (Manchester United) -> Missing: transfers
- Toby Collyer (Manchester United) -> Missing: transfers
- Christian Eriksen (Manchest

In [3]:
# Retry per endpoints

# Konfigurasi
INPUT_PATH = "epl_players_detailed_2024.json"
OUTPUT_PATH = "epl_players_detailed_2024_filled.json"
API_BASE = "https://transfermarkt-api.fly.dev"
HEADERS = {"accept": "application/json"}
DELAY = 0.3
ENDPOINTS_ORDER = [
    "profile", "market_value", "transfers",
    "jersey_numbers", "stats", "injuries", "achievements"
]

# Load data pemain yang sudah ada
with open(INPUT_PATH, "r", encoding="utf-8") as f:
    players = json.load(f)

# Cek apakah file output sudah ada, untuk melanjutkan
if os.path.exists(OUTPUT_PATH):
    with open(OUTPUT_PATH, "r", encoding="utf-8") as f:
        saved_players = json.load(f)
    saved_ids = {p["player_id"] for p in saved_players}
    print(f"Melanjutkan dari {len(saved_ids)} pemain yang sudah tersimpan.")
else:
    saved_players = []
    saved_ids = set()
    print("Mulai simpan pemain satu per satu...")

# Proses tiap pemain
for player in players:
    pid = player["player_id"]
    if pid in saved_ids:
        print(f"Lewati {player['name']} (ID: {pid})")
        continue

    updated = False

    # Retry endpoint yang belum ada
    for endpoint in ENDPOINTS_ORDER:
        if endpoint not in player:
            print(f"{player['name']} - Retry {endpoint}...")
            try:
                url = f"{API_BASE}/players/{pid}/{endpoint}"
                r = requests.get(url, headers=HEADERS)
                if r.status_code == 200:
                    content = r.json()
                    player[endpoint] = content.get(endpoint) if endpoint in content else content
                    updated = True
                else:
                    print(f"Gagal {endpoint}: {r.status_code}")
                    sys.exit(f"Proses dihentikan karena status {r.status_code} pada {endpoint} - {player['name']}")
            except Exception as e:
                print(f"⚠️ Error {endpoint}: {e}")
                sys.exit(f"Proses dihentikan karena error pada {endpoint} - {player['name']}")
            time.sleep(DELAY)

    # Susun ulang field
    ordered_player = {
        "player_id": player["player_id"],
        "name": player["name"],
        "profile_url": player["profile_url"],
        "club_id": player["club_id"],
        "club_name": player["club_name"],
        "competition_id": player["competition_id"],
        "competition_name": player["competition_name"]
    }
    for endpoint in ENDPOINTS_ORDER:
        if endpoint in player:
            ordered_player[endpoint] = player[endpoint]

    # Simpan langsung satu per satu
    saved_players.append(ordered_player)
    with open(OUTPUT_PATH, "w", encoding="utf-8") as f:
        json.dump(saved_players, f, ensure_ascii=False, indent=2)

    print(f"✅ Selesai {player['name']}")

print(f"\nSemua pemain telah disimpan ke '{OUTPUT_PATH}' sebanyak {len(saved_players)} pemain.")

♻️ Melanjutkan dari 543 pemain yang sudah tersimpan.

🎯 Semua pemain telah disimpan ke 'epl_players_detailed_2024_filled.json' sebanyak 543 pemain.
