In [None]:
#%% [code]
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# Define Chrome options (headless)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Define the target schools (all in lowercase for case-insensitive matching)
target_schools = {
    "abilene christian", "alabama", "alabama a&m", "alabama state", "albany", "alcorn state", "american", 
    "appalachian state", "arizona", "arizona state", "arkansas", "arkansas state", "army", "auburn", "austin peay", 
    "ball state", "baylor", "bellarmine", "belmont", "binghamton", "boise state", "boston college", "boston university", 
    "bowling green", "bradley", "brigham young", "brown", "bryant", "bucknell", "buffalo", "butler", "cal poly", 
    "cal state bakersfield", "cal state fullerton", "cal state northridge", "california", "campbell", "canisius", 
    "central arkansas", "central connecticut", "central florida", "central michigan", "charleston", "charleston southern", 
    "charlotte", "chattanooga", "chicago state", "cincinnati", "citadel", "clemson", "cleveland state", "coastal carolina", 
    "colgate", "colorado", "colorado state", "columbia", "connecticut", "coppin state", "cornell", "creighton", "dartmouth", 
    "davidson", "dayton", "delaware", "delaware state", "denver", "depaul", "detroit mercy", "drake", "drexel", "duke", 
    "duquesne", "east carolina", "east tennessee state", "eastern illinois", "eastern kentucky", "eastern michigan", 
    "eastern washington", "elon", "evansville", "fairfield", "fairleigh dickinson", "florida", "florida a&m", "florida atlantic", 
    "florida gulf coast", "florida international", "florida state", "fordham", "fresno state", "furman", "gardner-webb", 
    "george mason", "george washington", "georgetown", "georgia", "georgia southern", "georgia state", "georgia tech", 
    "gonzaga", "grambling state", "grand canyon", "green bay", "hampton", "hartford", "harvard", "hawaii", "high point", 
    "hofstra", "holy cross", "houston", "houston christian", "howard", "idaho", "idaho state", "illinois", "illinois chicago", 
    "illinois state", "incarnate word", "indiana", "indiana state", "iona", "iowa", "iowa state", "iupui", "jacksonville", 
    "jacksonville state", "james madison", "kansas", "kansas state", "kennesaw state", "kent state", "kentucky", "la salle", 
    "lafayette", "lamar", "lehigh", "liberty", "lipscomb", "little rock", "long beach state", "long island", "longwood", 
    "louisiana", "louisiana monroe", "louisiana tech", "louisville", "loyola chicago", "loyola maryland", "loyola marymount", 
    "maine", "manhattan", "marist", "marquette", "marshall", "maryland", "maryland eastern shore", "massachusetts", 
    "mcneese", "memphis", "mercer", "merrimack", "miami", "miami (oh)", "michigan", "michigan state", "middle tennessee", 
    "milwaukee", "minnesota", "mississippi state", "mississippi valley state", "missouri", "missouri state", 
    "missouri-kansas city", "monmouth", "montana", "montana state", "morehead state", "morgan state", "mount st. mary's", 
    "murray state", "navy", "nebraska", "nevada", "new hampshire", "new mexico", "new mexico state", "new orleans", 
    "niagara", "nicholls", "njit", "norfolk state", "north alabama", "north carolina", "north carolina a&t", 
    "north carolina central", "north dakota", "north dakota state", "north florida", "north texas", "northeastern", 
    "northern arizona", "northern colorado", "northern illinois", "northern iowa", "northwestern", "northwestern state", 
    "notre dame", "oakland", "ohio", "ohio state", "oklahoma", "oklahoma state", "old dominion", "omaha", "oral roberts", 
    "oregon", "oregon state", "pacific", "penn", "penn state", "pepperdine", "pittsburgh", "portland", "portland state", 
    "prairie view a&m", "presbyterian", "princeton", "providence", "purdue", "purdue fort wayne", "quinnipiac", 
    "radford", "rhode island", "rice", "richmond", "rider", "robert morris", "rutgers", "sacramento state", "sacred heart", 
    "saint francis (pa)", "saint joseph's", "saint louis", "saint mary's", "sam houston", "samford", "san diego", 
    "san diego state", "san francisco", "san jose state", "santa clara", "savannah state", "seattle", "seton hall", 
    "siena", "south alabama", "south carolina", "south carolina state", "south dakota", "south dakota state", 
    "south florida", "southeast missouri state", "southeastern louisiana", "southern illinois", "southern indiana", 
    "southern methodist", "southern mississippi", "southern university", "southern utah", "st. bonaventure", 
    "st. francis brooklyn", "st. john's", "st. peter's", "stanford", "stephen f. austin", "stetson", "stonehill", 
    "syracuse", "tarleton state", "temple", "tennessee", "tennessee state", "tennessee tech", "texas", "texas a&m", 
    "texas a&m-corpus christi", "texas christian", "texas southern", "texas state", "texas tech", "toledo", "towson", 
    "troy", "tulane", "tulsa", "uab", "uc davis", "uc irvine", "uc riverside", "uc san diego", "uc santa barbara", 
    "ucla", "umbc", "unc asheville", "unc guilford", "unc wilmington", "usc", "usc upstate", "utah", "utah state", 
    "utah tech", "utah valley", "utep", "ut martin", "utsa", "valparaiso", "vanderbilt", "vcu", "vermont", 
    "villanova", "virginia", "virginia military institute", "virginia tech", "wagner", "wake forest", "washington", 
    "washington state", "weber state", "west virginia", "western carolina", "western illinois", "western kentucky", 
    "western michigan", "wichita state", "william & mary", "winthrop", "wisconsin", "wofford", "wright state", 
    "wyoming", "xavier", "yale", "youngstown state"
}

# Loop through team IDs 1 to 2990
for team_id in range(1, 2991):
    driver = webdriver.Chrome(options=options)
    url = f"https://www.espn.com/mens-college-basketball/team/stats/_/id/{team_id}"
    driver.get(url)
    time.sleep(6)  # allow page to load

    # Attempt to extract the school name using provided XPaths.
    primary_school_xpath = '//*[@id="fittPageContainer"]/div[2]/div/div[1]/div/div/div/div/h1/span/span[1]'
    fallback_school_xpath = '//*[@id="fittPageContainer"]/div[2]/div/div[1]/div/div/div/div/h1/span[2]/span[1]'
    school_name = ""
    try:
        school_name = driver.find_element(By.XPATH, primary_school_xpath).text.strip()
    except NoSuchElementException:
        try:
            school_name = driver.find_element(By.XPATH, fallback_school_xpath).text.strip()
        except NoSuchElementException:
            print(f"Team ID {team_id} not found (no school name).")
            driver.quit()
            continue

    print(f"Team ID {team_id}: {school_name}")

    # Check if the school is in our target list (case-insensitive)
    if school_name.lower() in target_schools:
        print(f"Scraping data for {school_name}...")
        data = []
        row = 1
        while True:
            try:
                # XPath for player's name and link from the Season Totals table:
                player_xpath = f'//*[@id="fittPageContainer"]/div[2]/div/div[5]/div/div/section/div/div[6]/div[2]/table/tbody/tr[{row}]/td/span/a'
                player_element = driver.find_element(By.XPATH, player_xpath)
                player_name = player_element.text.strip()
                # Break if player name is empty or equals "Total"
                if not player_name or player_name.lower() == "total":
                    break
                player_link = player_element.get_attribute("href")
                
                # XPath for player's minutes played (first column in the minutes table)
                minutes_xpath = f'//*[@id="fittPageContainer"]/div[2]/div/div[5]/div/div/section/div/div[6]/div[2]/div/div[2]/table/tbody/tr[{row}]/td[1]/span'
                minutes_element = driver.find_element(By.XPATH, minutes_xpath)
                minutes = minutes_element.text.strip()
                
                data.append({
                    "Player Name": player_name,
                    "Team": school_name,
                    "Minutes Played": minutes,
                    "Player Link": player_link
                })
                row += 1
            except NoSuchElementException:
                break
        
        if data:
            df = pd.DataFrame(data, columns=["Player Name", "Team", "Minutes Played", "Player Link"])
            filename = f"espn_player_stats_{school_name}.csv"
            df.to_csv(filename, index=False)
            print(f"Data saved to {filename}")
        else:
            print(f"No player data found for {school_name}.")
    else:
        print(f"Skipping team {school_name}; not in target list.")
    
    driver.quit()


Team ID 1: ALASKA ANCHORAGE
Skipping team ALASKA ANCHORAGE; not in target list.
Team ID 2: AUBURN
Scraping data for AUBURN...
Data saved to espn_player_stats_AUBURN.csv
Team ID 3: BIRMINGHAM-SOUTHERN
Skipping team BIRMINGHAM-SOUTHERN; not in target list.
Team ID 4: DESALES UNIVERSITY
Skipping team DESALES UNIVERSITY; not in target list.
Team ID 5: UAB
Scraping data for UAB...
Data saved to espn_player_stats_UAB.csv
Team ID 6: SOUTH ALABAMA
Scraping data for SOUTH ALABAMA...
Data saved to espn_player_stats_SOUTH ALABAMA.csv
Team ID 7: AMHERST
Skipping team AMHERST; not in target list.
Team ID 8: ARKANSAS
Scraping data for ARKANSAS...
Data saved to espn_player_stats_ARKANSAS.csv
Team ID 9: ARIZONA STATE
Scraping data for ARIZONA STATE...
Data saved to espn_player_stats_ARIZONA STATE.csv
Team ID 10 not found (no school name).
Team ID 11: COLORADO MESA
Skipping team COLORADO MESA; not in target list.
Team ID 12: ARIZONA
Scraping data for ARIZONA...
Data saved to espn_player_stats_ARIZONA.c

KeyboardInterrupt: 