In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

headers = {"User-Agent": "Mozilla/5.0"}
all_dfs = []

for page in range(1, 5):  # loop over pages 1–4
    url = f"https://www.transfermarkt.com/premier-league/transferrekorde/wettbewerb/GB1/plus/1?page={page}&saison_id=2025&zuab=zu"
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "html.parser")
    table = soup.find("table", {"class": "items"})
    df = pd.read_html(str(table))[0]

    # Flatten headers
    df.columns = [col[1] if isinstance(col, tuple) else col for col in df.columns]

    # Extract rows in repeating pattern
    df_main          = df.iloc[::7].reset_index(drop=True)   # main row
    positions        = df.iloc[2::7].reset_index(drop=True)  # position
    left_clubs       = df.iloc[3::7].reset_index(drop=True)  # left club
    left_league      = df.iloc[4::7].reset_index(drop=True)  # left league
    joined_club      = df.iloc[5::7].reset_index(drop=True)  # joined club
    joined_league    = df.iloc[6::7].reset_index(drop=True)  # joined league

    # Add them as new columns
    df_main["Position"]      = positions["Player"]
    df_main["Left Club"]     = left_clubs["Player"]
    df_main["Left League"]   = left_league["Player"]
    df_main["Joined Club"]   = joined_club["Player"]
    df_main["Joined League"] = joined_league["Player"]

    df_main["Player"] = df_main.apply(
    lambda row: str(row["Player"]).replace(str(row["Position"]), "").strip(),
    axis=1
)

    # Drop the combined 'Left' and 'Joined' columns
    df_main = df_main.drop(columns=["Left", "Joined", "Nat."], errors="ignore")

    all_dfs.append(df_main)

# Combine all pages
df_all = pd.concat(all_dfs, ignore_index=True)

print(df_all.head(10))   # first 10 players
print(len(df_all))       # should be 100
