In [3]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time
import re

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# STEP 1: Get team URLs from league page
def get_team_urls(league_url):
    res = requests.get(league_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")


    team_links = soup.select('td.hauptlink.no-border-links a')
    
    team_urls = []
    for link in team_links:
        href = link.get("href")
        if href and "/startseite/verein/" in href:
            # Ensure full URL and keep ?saison_id in it
            full_url = f"https://www.transfermarkt.us{href}"
            team_urls.append(full_url)
    return list(set(team_urls))  # remove duplicates

In [7]:
league_urls = {
    "MLS": "https://www.transfermarkt.us/major-league-soccer/startseite/wettbewerb/MLS1"
}

all_data = []

team_urls = get_team_urls(league_urls["MLS"])

In [8]:
team_urls

['https://www.transfermarkt.us/portland-timbers/startseite/verein/4291/saison_id/2024',
 'https://www.transfermarkt.us/colorado-rapids/startseite/verein/1247/saison_id/2024',
 'https://www.transfermarkt.us/new-york-city-fc/startseite/verein/40058/saison_id/2024',
 'https://www.transfermarkt.us/vancouver-whitecaps-fc/startseite/verein/6321/saison_id/2024',
 'https://www.transfermarkt.us/inter-miami-cf/startseite/verein/69261/saison_id/2024',
 'https://www.transfermarkt.us/atlanta-united-fc/startseite/verein/51663/saison_id/2024',
 'https://www.transfermarkt.us/real-salt-lake-city/startseite/verein/6643/saison_id/2024',
 'https://www.transfermarkt.us/new-york-red-bulls/startseite/verein/623/saison_id/2024',
 'https://www.transfermarkt.us/charlotte-fc/startseite/verein/78435/saison_id/2024',
 'https://www.transfermarkt.us/san-diego-fc/startseite/verein/114977/saison_id/2024',
 'https://www.transfermarkt.us/philadelphia-union/startseite/verein/25467/saison_id/2024',
 'https://www.transferm

In [13]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def extract_player_market_values(team_url):
    res = requests.get(team_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")

    players = []

    # Find the player rows in the squad table
    table = soup.find("table", class_="items")
    if not table:
        return []

    rows = table.find_all("tr", class_=["odd", "even"])

    for row in rows:
        name_tag = row.find("td", class_="hauptlink")
        value_tag = row.find("td", class_="rechts hauptlink")

        if not name_tag or not value_tag:
            continue

        player_name = name_tag.get_text(strip=True)
        market_value = value_tag.get_text(strip=True)

        # Optional: get player detail page link
        link_tag = name_tag.find("a")
        player_link = (
            "https://www.transfermarkt.us" + link_tag["href"]
            if link_tag and link_tag.has_attr("href")
            else None
        )

        players.append({
            "player_name": player_name,
            "market_value": market_value,
            "player_link": player_link
        })

    return players

overall_data = []
# Example usage
for team_url in team_urls:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/portland-timbers/startseite/verein/4291/saison_id/2024
Number of players scraped: 25
{'player_name': 'Maxime Crépeau', 'market_value': '€3.50m', 'player_link': 'https://www.transfermarkt.us/maxime-crepeau/profil/spieler/189014'}
Team URL: https://www.transfermarkt.us/colorado-rapids/startseite/verein/1247/saison_id/2024
Number of players scraped: 26
{'player_name': 'Zack Steffen', 'market_value': '€2.00m', 'player_link': 'https://www.transfermarkt.us/zack-steffen/profil/spieler/221624'}
Team URL: https://www.transfermarkt.us/new-york-city-fc/startseite/verein/40058/saison_id/2024
Number of players scraped: 29
{'player_name': 'Matt Freese', 'market_value': '€600k', 'player_link': 'https://www.transfermarkt.us/matt-freese/profil/spieler/500309'}
Team URL: https://www.transfermarkt.us/vancouver-whitecaps-fc/startseite/verein/6321/saison_id/2024
Number of players scraped: 25
{'player_name': 'Yohei Takaoka', 'market_value': '€3.00m', 'player_link': 'ht

In [15]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)

In [16]:
overall_df

Unnamed: 0,team_url,player_name,market_value,player_link
0,https://www.transfermarkt.us/portland-timbers/...,Maxime Crépeau,€3.50m,https://www.transfermarkt.us/maxime-crepeau/pr...
1,https://www.transfermarkt.us/portland-timbers/...,James Pantemis,€400k,https://www.transfermarkt.us/james-pantemis/pr...
2,https://www.transfermarkt.us/portland-timbers/...,Trey Muse,€200k,https://www.transfermarkt.us/trey-muse/profil/...
3,https://www.transfermarkt.us/portland-timbers/...,Kamal Miller,€2.80m,https://www.transfermarkt.us/kamal-miller/prof...
4,https://www.transfermarkt.us/portland-timbers/...,Dario Župarić,€1.00m,https://www.transfermarkt.us/dario-zuparic/pro...
...,...,...,...,...
408,https://www.transfermarkt.us/club-de-foot-mont...,Prince Owusu,€2.00m,https://www.transfermarkt.us/prince-owusu/prof...
409,https://www.transfermarkt.us/club-de-foot-mont...,Giacomo Vrioni,€1.50m,https://www.transfermarkt.us/giacomo-vrioni/pr...
410,https://www.transfermarkt.us/club-de-foot-mont...,Sunusi Ibrahim,€1.20m,https://www.transfermarkt.us/sunusi-ibrahim/pr...
411,https://www.transfermarkt.us/club-de-foot-mont...,Jules-Anthony Vilsaint,€500k,https://www.transfermarkt.us/jules-anthony-vil...


In [17]:
overall_df.to_csv("mls_market_values_1.csv", index=False)

In [18]:
not_scraped = [
 'https://www.transfermarkt.us/san-diego-fc/startseite/verein/114977/saison_id/2024',
 'https://www.transfermarkt.us/austin-fc/startseite/verein/72309/saison_id/2024',
 'https://www.transfermarkt.us/st-louis-city-sc/startseite/verein/82686/saison_id/2024',
 'https://www.transfermarkt.us/columbus-crew-sc/startseite/verein/813/saison_id/2024',
 'https://www.transfermarkt.us/toronto-fc/startseite/verein/11141/saison_id/2024',
 'https://www.transfermarkt.us/fc-dallas/startseite/verein/8816/saison_id/2024',
 'https://www.transfermarkt.us/sporting-kansas-city/startseite/verein/4284/saison_id/2024',
 'https://www.transfermarkt.us/los-angeles-galaxy/startseite/verein/1061/saison_id/2024',
 'https://www.transfermarkt.us/d-c-united/startseite/verein/2440/saison_id/2024',
 'https://www.transfermarkt.us/houston-dynamo-fc/startseite/verein/9168/saison_id/2024',
 'https://www.transfermarkt.us/fc-cincinnati/startseite/verein/51772/saison_id/2024',
 'https://www.transfermarkt.us/minnesota-united-fc/startseite/verein/56089/saison_id/2024',
 'https://www.transfermarkt.us/nashville-sc/startseite/verein/63966/saison_id/2024',
 'https://www.transfermarkt.us/chicago-fire-fc/startseite/verein/432/saison_id/2024',
 'https://www.transfermarkt.us/new-england-revolution/startseite/verein/626/saison_id/2024',
 'https://www.transfermarkt.us/orlando-city-sc/startseite/verein/45604/saison_id/2024']

In [21]:
overall_data = []
# Example usage
for team_url in not_scraped:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/san-diego-fc/startseite/verein/114977/saison_id/2024
Number of players scraped: 28
{'player_name': 'Pablo Sisniega', 'market_value': '€200k', 'player_link': 'https://www.transfermarkt.us/pablo-sisniega/profil/spieler/351477'}
Team URL: https://www.transfermarkt.us/austin-fc/startseite/verein/72309/saison_id/2024
Number of players scraped: 24
{'player_name': 'Brad Stuver', 'market_value': '€800k', 'player_link': 'https://www.transfermarkt.us/brad-stuver/profil/spieler/300600'}
Team URL: https://www.transfermarkt.us/st-louis-city-sc/startseite/verein/82686/saison_id/2024
Number of players scraped: 31
{'player_name': 'Roman Bürki', 'market_value': '€1.80m', 'player_link': 'https://www.transfermarkt.us/roman-burki/profil/spieler/59027'}
Team URL: https://www.transfermarkt.us/columbus-crew-sc/startseite/verein/813/saison_id/2024
Number of players scraped: 25
{'player_name': 'Patrick Schulte', 'market_value': '€3.50m', 'player_link': 'https://www.transf

In [23]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)
print(overall_df)
overall_df.to_csv("mls_market_values_2.csv", index=False)

                                              team_url        player_name  \
0    https://www.transfermarkt.us/san-diego-fc/star...     Pablo Sisniega   
1    https://www.transfermarkt.us/san-diego-fc/star...      CJ dos Santos   
2    https://www.transfermarkt.us/san-diego-fc/star...      Jacob Jackson   
3    https://www.transfermarkt.us/san-diego-fc/star...       Paddy McNair   
4    https://www.transfermarkt.us/san-diego-fc/star...       Andrés Reyes   
..                                                 ...                ...   
355  https://www.transfermarkt.us/orlando-city-sc/s...  Gustavo Caraballo   
356  https://www.transfermarkt.us/orlando-city-sc/s...     Duncan McGuire   
357  https://www.transfermarkt.us/orlando-city-sc/s...     Ramiro Enrique   
358  https://www.transfermarkt.us/orlando-city-sc/s...        Luis Muriel   
359  https://www.transfermarkt.us/orlando-city-sc/s...      Favian Loyola   

    market_value                                        player_link  
0    

In [24]:
not_scraped = [
 'https://www.transfermarkt.us/d-c-united/startseite/verein/2440/saison_id/2024',
 'https://www.transfermarkt.us/houston-dynamo-fc/startseite/verein/9168/saison_id/2024',
 'https://www.transfermarkt.us/new-england-revolution/startseite/verein/626/saison_id/2024']

overall_data = []
# Example usage
for team_url in not_scraped:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/d-c-united/startseite/verein/2440/saison_id/2024
Number of players scraped: 27
{'player_name': 'Joon-hong Kim', 'market_value': '€550k', 'player_link': 'https://www.transfermarkt.us/joon-hong-kim/profil/spieler/709172'}
Team URL: https://www.transfermarkt.us/houston-dynamo-fc/startseite/verein/9168/saison_id/2024
Number of players scraped: 31
{'player_name': 'Jonathan Bond', 'market_value': '€500k', 'player_link': 'https://www.transfermarkt.us/jonathan-bond/profil/spieler/130768'}
Team URL: https://www.transfermarkt.us/new-england-revolution/startseite/verein/626/saison_id/2024
Number of players scraped: 27
{'player_name': 'Aljaž Ivačič', 'market_value': '€600k', 'player_link': 'https://www.transfermarkt.us/aljaz-ivacic/profil/spieler/231923'}


In [25]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)
print(overall_df)
overall_df.to_csv("mls_market_values_3.csv", index=False)

                                             team_url         player_name  \
0   https://www.transfermarkt.us/d-c-united/starts...       Joon-hong Kim   
1   https://www.transfermarkt.us/d-c-united/starts...        Luis Barraza   
2   https://www.transfermarkt.us/d-c-united/starts...         Jordan Farr   
3   https://www.transfermarkt.us/d-c-united/starts...   Lukas MacNaughton   
4   https://www.transfermarkt.us/d-c-united/starts...      Lucas Bartlett   
..                                                ...                 ...   
80  https://www.transfermarkt.us/new-england-revol...         Malcolm Fry   
81  https://www.transfermarkt.us/new-england-revol...    Leonardo Campana   
82  https://www.transfermarkt.us/new-england-revol...     Ignatius Ganago   
83  https://www.transfermarkt.us/new-england-revol...  Maximiliano Urruti   
84  https://www.transfermarkt.us/new-england-revol...         Marcos Dias   

   market_value                                        player_link  
0     