In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# The base part of the website's URL (used to construct full player URLs)
BASE_URL = "https://www.unitedrugby.com"

# List of club "players" pages
club_urls = [
    "https://www.unitedrugby.com/clubs/benetton/players",
    "https://www.unitedrugby.com/clubs/cardiff-rugby/players",
    "https://www.unitedrugby.com/clubs/connacht/players",
    "https://www.unitedrugby.com/clubs/dhl-stormers/players",
    "https://www.unitedrugby.com/clubs/dragons/players",
    "https://www.unitedrugby.com/clubs/edinburgh/players",
    "https://www.unitedrugby.com/clubs/emirates-lions/players",
    "https://www.unitedrugby.com/clubs/glasgow-warriors/players",
    "https://www.unitedrugby.com/clubs/hollywoodbets-sharks/players",
    "https://www.unitedrugby.com/clubs/leinster/players",
    "https://www.unitedrugby.com/clubs/team-munster/players",
    "https://www.unitedrugby.com/clubs/ospreys/players",
    "https://www.unitedrugby.com/clubs/scarlets/players",
    "https://www.unitedrugby.com/clubs/ulster/players",
    "https://www.unitedrugby.com/clubs/vodacom-bulls/players",
    "https://www.unitedrugby.com/clubs/zebre-parma/players"
]

# Open (or create) a CSV file to write all the player URLs.
# 'w', newline='' ensures CSV rows are added properly
with open("player_urls.csv", "w", newline='', encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["club", "player_url"])  # optional header
    
    for club_page in club_urls:
        print(f"Scraping: {club_page}")
        response = requests.get(club_page)
        if response.status_code != 200:
            print(f"Failed to retrieve {club_page}")
            continue
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Find all player-card divs:
        # Adjust class names based on what you see in the HTML (some pages might differ).
        player_cards = soup.find_all("div", class_="player-card")
        
        for card in player_cards:
            # Each card should contain one <a> that links to the player
            a_tag = card.find("a")
            if a_tag and a_tag.has_attr("href"):
                rel_url = a_tag["href"].strip()
                
                # If the URL is relative, prepend the base
                if rel_url.startswith("/"):
                    rel_url = BASE_URL + rel_url
                
                # Write the club URL and the player link to CSV
                writer.writerow([club_page, rel_url])

print("Done! Check 'player_urls.csv' for the results.")