<a href="https://colab.research.google.com/github/tajaddini/basketball-reference/blob/main/dpoy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import os

options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--disable-gpu')
options.add_argument('--window-size=1920,1080')
options.add_argument('--headless')
options.add_argument('--log-level=3')
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')

os.environ['WDM_LOG'] = '0'

driver = None

try:
    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=options
    )

    driver.set_page_load_timeout(120)
    driver.implicitly_wait(15)

    driver.get('https://www.basketball-reference.com/awards/dpoy.html')

    time.sleep(5)

    wait = WebDriverWait(driver, 30)
    table = wait.until(EC.presence_of_element_located((By.ID, "dpoy_NBA")))

    data = []
    rows = driver.find_elements(By.CSS_SELECTOR, "table#dpoy_NBA tbody tr")

    for row in rows:
        try:
            row_class = row.get_attribute('class')
            if 'thead' in str(row_class):
                continue

            season_elem = row.find_elements(By.CSS_SELECTOR, 'th[data-stat="season"]')

            if not season_elem:
                continue

            season = season_elem[0].text.strip()

            if not season or season == 'Season':
                continue

            player_id = ""
            player_name = ""

            player_link_elem = row.find_elements(By.CSS_SELECTOR, 'td[data-stat="player"] a')
            if player_link_elem:
                player_name = player_link_elem[0].text.strip()
                href = player_link_elem[0].get_attribute('href')
                if href:
                    if '/players/' in href:
                        player_id = href.split('/players/')[1].replace('.html', '')
            else:
                player_elem = row.find_elements(By.CSS_SELECTOR, 'td[data-stat="player"]')
                if player_elem:
                    player_name = player_elem[0].text.strip()

            def safe_get(selector):
                try:
                    elem = row.find_element(By.CSS_SELECTOR, selector)
                    return elem.text.strip()
                except:
                    return ""

            lg = safe_get('td[data-stat="lg_id"]')
            age = safe_get('td[data-stat="age"]')
            tm = safe_get('td[data-stat="team_id"]')
            stl = safe_get('td[data-stat="stl_per_g"]')
            blk = safe_get('td[data-stat="blk_per_g"]')

            if player_name:
                data.append({
                    'Player_ID': player_id,
                    'Season': season,
                    'Lg': lg,
                    'Player': player_name,
                    'Age': age,
                    'Tm': tm,
                    'STL': stl,
                    'BLK': blk
                })

        except:
            continue

    if len(data) == 0:
        raise Exception("no data")

    df = pd.DataFrame(data)

    output_file = 'basketball_dpoy.xlsx'

    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        df.to_excel(writer, index=False, sheet_name='dpoy')
        worksheet = writer.sheets['dpoy']

        worksheet.column_dimensions['A'].width = 20
        worksheet.column_dimensions['B'].width = 12
        worksheet.column_dimensions['C'].width = 8
        worksheet.column_dimensions['D'].width = 25
        worksheet.column_dimensions['E'].width = 8
        worksheet.column_dimensions['F'].width = 8
        worksheet.column_dimensions['G'].width = 8
        worksheet.column_dimensions['H'].width = 8

    print("Done :)")

except Exception as e:
    print(f"error: {e}")

finally:
    if driver:
        try:
            driver.quit()
        except:
            pass