In [None]:
#%% [code]
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# File names
input_csv = "espn_player_stats__all_with_states.csv"
output_csv = "espn_player_stats_major_conferences_with_state2.csv"

# Load CSV into a DataFrame
df = pd.read_csv(input_csv)
if "Player State" not in df.columns:
    df["Player State"] = ""

# Set up Chrome options (headless)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize a single WebDriver instance
driver = webdriver.Chrome(options=options)

# List of candidate XPaths for the location element based on your new design.
xpath_candidates = [
    '//*[@id="fittPageContainer"]/div[2]/div/div[1]/div/div/div[1]/div[2]/div/ul/li[3]/div[2]/div',
    '//*[@id="fittPageContainer"]/div[2]/div/div[1]/div/div/div[1]/div[2]/div/ul/li[2]/div[2]/div',
    '//*[@id="fittPageContainer"]/div[2]/div/div[1]/div/div/div[1]/div[2]/div/ul/li[4]/div[2]/div'
]

# Loop through each row and update the CSV after each player.
for index, row in df.iterrows():
    # Skip this row if "Player State" is already non-empty.
    current_state = str(row.get("Player State", "")).strip()
    if (current_state != 'nan') and (current_state != ''):
        print(f"Row {index}: {row['Player Name']} already has state '{current_state}'. Skipping.")
        continue

    player_link = row["Player Link"]
    try:
        driver.get(player_link)
        time.sleep(6)  # Wait 6 seconds for page to load

        state_abbr = ""
        for xp in xpath_candidates:
            try:
                elem = driver.find_element(By.XPATH, xp)
                location_text = elem.text.strip()  # e.g., "Farmington, MI"
                parts = location_text.split(',')
                if len(parts) >= 2:
                    state_abbr = parts[-1].strip()[:2]  # Extract first two characters
                break  # Found a candidate; exit loop
            except NoSuchElementException:
                continue

        df.at[index, "Player State"] = state_abbr
        print(f"Row {index}: {row['Player Name']} - State: {state_abbr}")
    except Exception as e:
        print(f"Row {index}: {row['Player Name']} - Error: {e}")
        df.at[index, "Player State"] = ""
    
    # Update the CSV file after processing each row.
    df.to_csv(output_csv, index=False)

driver.quit()
print("Finished processing. Updated CSV saved as", output_csv)


Row 0: Mark Sears already has state 'AL'. Skipping.
Row 1: Grant Nelson already has state 'ND'. Skipping.
Row 2: Aden Holloway already has state 'NC'. Skipping.
Row 3: Latrell Wrightsell Jr. already has state 'NE'. Skipping.
Row 4: Labaron Philon already has state 'AL'. Skipping.
Row 5: Chris Youngblood already has state 'AL'. Skipping.
Row 6: Clifford Omoruyi already has state 'Ni'. Skipping.
Row 7: Derrion Reid already has state 'GA'. Skipping.
Row 8: Mouhamed Dioubate already has state 'NY'. Skipping.
Row 9: Jarin Stevenson already has state 'NC'. Skipping.
Row 10: Houston Mallette already has state 'CA'. Skipping.
Row 11: Aiden Sherrell already has state 'MI'. Skipping.
Row 12: Max Scharnowski already has state 'IL'. Skipping.
Row 13: Adou Thiero already has state 'PA'. Skipping.
Row 14: Boogie Fland already has state 'NY'. Skipping.
Row 15: Johnell Davis already has state 'IN'. Skipping.
Row 16: D.J. Wagner already has state 'NJ'. Skipping.
Row 17: Zvonimir Ivisic already has stat