In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time
import re

In [2]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# STEP 1: Get team URLs from league page
def get_team_urls(league_url):
    res = requests.get(league_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")


    team_links = soup.select('td.hauptlink.no-border-links a')
    
    team_urls = []
    for link in team_links:
        href = link.get("href")
        if href and "/startseite/verein/" in href:
            # Ensure full URL and keep ?saison_id in it
            full_url = f"https://www.transfermarkt.us{href}"
            team_urls.append(full_url)
    return list(set(team_urls))  # remove duplicates

In [3]:
league_urls = {
    "USLC": "https://www.transfermarkt.us/usl-championship/startseite/wettbewerb/USL"
}

all_data = []

team_urls = get_team_urls(league_urls["USLC"])

In [4]:
team_urls

['https://www.transfermarkt.us/north-carolina-fc/startseite/verein/11228/saison_id/2024',
 'https://www.transfermarkt.us/miami-fc/startseite/verein/42814/saison_id/2024',
 'https://www.transfermarkt.us/hartford-athletic/startseite/verein/67984/saison_id/2024',
 'https://www.transfermarkt.us/louisville-city-fc/startseite/verein/46316/saison_id/2024',
 'https://www.transfermarkt.us/phoenix-rising-fc/startseite/verein/33414/saison_id/2024',
 'https://www.transfermarkt.us/orange-county-sc/startseite/verein/31068/saison_id/2024',
 'https://www.transfermarkt.us/sacramento-republic-fc/startseite/verein/40835/saison_id/2024',
 'https://www.transfermarkt.us/birmingham-legion-fc/startseite/verein/64480/saison_id/2024',
 'https://www.transfermarkt.us/pittsburgh-riverhounds-sc/startseite/verein/4290/saison_id/2024',
 'https://www.transfermarkt.us/las-vegas-lights-fc/startseite/verein/62204/saison_id/2024',
 'https://www.transfermarkt.us/san-antonio-fc/startseite/verein/52910/saison_id/2024',
 'htt

In [5]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def extract_player_market_values(team_url):
    res = requests.get(team_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")

    players = []

    # Find the player rows in the squad table
    table = soup.find("table", class_="items")
    if not table:
        return []

    rows = table.find_all("tr", class_=["odd", "even"])

    for row in rows:
        name_tag = row.find("td", class_="hauptlink")
        value_tag = row.find("td", class_="rechts hauptlink")

        if not name_tag or not value_tag:
            continue

        player_name = name_tag.get_text(strip=True)
        market_value = value_tag.get_text(strip=True)

        # Optional: get player detail page link
        link_tag = name_tag.find("a")
        player_link = (
            "https://www.transfermarkt.us" + link_tag["href"]
            if link_tag and link_tag.has_attr("href")
            else None
        )

        players.append({
            "player_name": player_name,
            "market_value": market_value,
            "player_link": player_link
        })

    return players

overall_data = []
# Example usage
for team_url in team_urls:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/north-carolina-fc/startseite/verein/11228/saison_id/2024
Number of players scraped: 24
{'player_name': 'Jake McGuire', 'market_value': '€200k', 'player_link': 'https://www.transfermarkt.us/jake-mcguire/profil/spieler/482650'}
Team URL: https://www.transfermarkt.us/miami-fc/startseite/verein/42814/saison_id/2024
Number of players scraped: 28
{'player_name': 'Bill Hamid', 'market_value': '€100k', 'player_link': 'https://www.transfermarkt.us/bill-hamid/profil/spieler/77714'}
Team URL: https://www.transfermarkt.us/hartford-athletic/startseite/verein/67984/saison_id/2024
Number of players scraped: 21
{'player_name': 'Antony Siaha', 'market_value': '€100k', 'player_link': 'https://www.transfermarkt.us/antony-siaha/profil/spieler/863627'}
Team URL: https://www.transfermarkt.us/louisville-city-fc/startseite/verein/46316/saison_id/2024
Number of players scraped: 26
{'player_name': 'Damian Las', 'market_value': '€250k', 'player_link': 'https://www.transferm

In [6]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)
overall_df

Unnamed: 0,team_url,player_name,market_value,player_link
0,https://www.transfermarkt.us/north-carolina-fc...,Jake McGuire,€200k,https://www.transfermarkt.us/jake-mcguire/prof...
1,https://www.transfermarkt.us/north-carolina-fc...,Akira Fitzgerald,€50k,https://www.transfermarkt.us/akira-fitzgerald/...
2,https://www.transfermarkt.us/north-carolina-fc...,Trevor Mulqueen,€25k,https://www.transfermarkt.us/trevor-mulqueen/p...
3,https://www.transfermarkt.us/north-carolina-fc...,Conor Donovan,€300k,https://www.transfermarkt.us/conor-donovan/pro...
4,https://www.transfermarkt.us/north-carolina-fc...,Bryce Washington,€175k,https://www.transfermarkt.us/bryce-washington/...
...,...,...,...,...
349,https://www.transfermarkt.us/monterey-bay-fc/s...,Anton Søjberg,€200k,https://www.transfermarkt.us/anton-sojberg/pro...
350,https://www.transfermarkt.us/monterey-bay-fc/s...,Mayele Malango,€175k,https://www.transfermarkt.us/mayele-malango/pr...
351,https://www.transfermarkt.us/monterey-bay-fc/s...,Luke Ivanovic,€175k,https://www.transfermarkt.us/luke-ivanovic/pro...
352,https://www.transfermarkt.us/monterey-bay-fc/s...,Diego Gutiérrez,€150k,https://www.transfermarkt.us/diego-gutierrez/p...


In [7]:
overall_df.to_csv("uslc_market_values_1.csv", index=False)

In [8]:
not_scraped = [
 'https://www.transfermarkt.us/oakland-roots-sc/startseite/verein/69044/saison_id/2024',
 'https://www.transfermarkt.us/tampa-bay-rowdies/startseite/verein/23565/saison_id/2024',
 'https://www.transfermarkt.us/new-mexico-united/startseite/verein/70056/saison_id/2024',
 'https://www.transfermarkt.us/charleston-battery/startseite/verein/4115/saison_id/2024',
 'https://www.transfermarkt.us/fc-tulsa/startseite/verein/24260/saison_id/2024',
 'https://www.transfermarkt.us/lexington-sc/startseite/verein/103004/saison_id/2024',
 'https://www.transfermarkt.us/el-paso-locomotive-fc/startseite/verein/67375/saison_id/2024',
 'https://www.transfermarkt.us/detroit-city-fc/startseite/verein/38647/saison_id/2024',
 'https://www.transfermarkt.us/loudoun-united-fc/startseite/verein/68031/saison_id/2024',
 'https://www.transfermarkt.us/indy-eleven/startseite/verein/39148/saison_id/2024']

In [9]:
overall_data = []
# Example usage
for team_url in not_scraped:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/oakland-roots-sc/startseite/verein/69044/saison_id/2024
Number of players scraped: 28
{'player_name': 'Kendall McIntosh', 'market_value': '€125k', 'player_link': 'https://www.transfermarkt.us/kendall-mcintosh/profil/spieler/189890'}
Team URL: https://www.transfermarkt.us/tampa-bay-rowdies/startseite/verein/23565/saison_id/2024
Number of players scraped: 23
{'player_name': 'Nicolás Campisi', 'market_value': '€300k', 'player_link': 'https://www.transfermarkt.us/nicolas-campisi/profil/spieler/745925'}
Team URL: https://www.transfermarkt.us/new-mexico-united/startseite/verein/70056/saison_id/2024
Number of players scraped: 27
{'player_name': 'Alexander Tambakis', 'market_value': '€150k', 'player_link': 'https://www.transfermarkt.us/alexander-tambakis/profil/spieler/68436'}
Team URL: https://www.transfermarkt.us/charleston-battery/startseite/verein/4115/saison_id/2024
Number of players scraped: 0
Team URL: https://www.transfermarkt.us/fc-tulsa/startsei

In [10]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)
print(overall_df)
overall_df.to_csv("uslc_market_values_2.csv", index=False)

                                              team_url       player_name  \
0    https://www.transfermarkt.us/oakland-roots-sc/...  Kendall McIntosh   
1    https://www.transfermarkt.us/oakland-roots-sc/...   Raphael Spiegel   
2    https://www.transfermarkt.us/oakland-roots-sc/...     Timothy Syrel   
3    https://www.transfermarkt.us/oakland-roots-sc/...     Thomas Camier   
4    https://www.transfermarkt.us/oakland-roots-sc/...      Camden Riley   
..                                                 ...               ...   
195  https://www.transfermarkt.us/loudoun-united-fc...      Pedro Santos   
196  https://www.transfermarkt.us/loudoun-united-fc...         Zach Ryan   
197  https://www.transfermarkt.us/loudoun-united-fc...    Wesley Leggett   
198  https://www.transfermarkt.us/loudoun-united-fc...      Riley Bidois   
199  https://www.transfermarkt.us/loudoun-united-fc...       Uriel Koffi   

    market_value                                        player_link  
0          €125k 

In [11]:
not_scraped = [
 'https://www.transfermarkt.us/charleston-battery/startseite/verein/4115/saison_id/2024',
 'https://www.transfermarkt.us/indy-eleven/startseite/verein/39148/saison_id/2024']

overall_data = []
# Example usage
for team_url in not_scraped:
    players = extract_player_market_values(team_url)

    print(f"Team URL: {team_url}")
    print(f"Number of players scraped: {len(players)}")
    if players:
        print(players[0])  # Print one player's data as a sanity check

    overall_data.append({
        "team_url": team_url,
        "players": players
    })
    # Optional: Sleep to avoid overwhelming the server
    time.sleep(1)

Team URL: https://www.transfermarkt.us/charleston-battery/startseite/verein/4115/saison_id/2024
Number of players scraped: 25
{'player_name': 'Luis Zamudio', 'market_value': '€150k', 'player_link': 'https://www.transfermarkt.us/luis-zamudio/profil/spieler/564971'}
Team URL: https://www.transfermarkt.us/indy-eleven/startseite/verein/39148/saison_id/2024
Number of players scraped: 24
{'player_name': 'Hunter Sulte', 'market_value': '€150k', 'player_link': 'https://www.transfermarkt.us/hunter-sulte/profil/spieler/617576'}


In [12]:
flattened_data = []

for team in overall_data:
    for player in team["players"]:
        flattened_data.append({
            "team_url": team["team_url"],
            **player
        })

overall_df = pd.DataFrame(flattened_data)
print(overall_df)
overall_df.to_csv("uslc_market_values_3.csv", index=False)

                                             team_url          player_name  \
0   https://www.transfermarkt.us/charleston-batter...         Luis Zamudio   
1   https://www.transfermarkt.us/charleston-batter...     Christian Garner   
2   https://www.transfermarkt.us/charleston-batter...       Daniel Kuzemka   
3   https://www.transfermarkt.us/charleston-batter...            Enzo Mori   
4   https://www.transfermarkt.us/charleston-batter...         Graham Smith   
5   https://www.transfermarkt.us/charleston-batter...        Leland Archer   
6   https://www.transfermarkt.us/charleston-batter...      Michael Edwards   
7   https://www.transfermarkt.us/charleston-batter...        Joey Akpunonu   
8   https://www.transfermarkt.us/charleston-batter...     Nathan Dossantos   
9   https://www.transfermarkt.us/charleston-batter...  Langston Blackstock   
10  https://www.transfermarkt.us/charleston-batter...         Mark Segbers   
11  https://www.transfermarkt.us/charleston-batter...         Aa