In [82]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


# Open the target URL
def get_html_from_torvik_players(year):

    last_year = year - 1

    url = f"https://barttorvik.com/playerstat.php?link=y&sIndex=53&year={year}&minmin=5&start={last_year}1101&end={year}0501"

    driver = webdriver.Chrome()  # or webdriver.Firefox() if you prefer

    iters = 0

    driver.get(url)
    while True and iters < 40:
        try:
            # Wait up to 10 seconds for the "Show 100 more" element to be clickable
            expand_element = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.ID, "expand"))
            )
            # Locate the <a> tag within the <th id="expand">
            show_more_button = expand_element.find_element(By.TAG_NAME, "a")
            
            # Click the button
            show_more_button.click()
            
            # Optionally wait a bit for the new content to load before the next click.
            time.sleep(2)
            iters += 1
            
        except Exception as e:
            # If the button is no longer found or clickable, exit the loop.
            print("No more 'Show 100 more' button found or an error occurred:", e)
            break

    # Once the loop is complete, you can scrape the loaded content.
    html_source = driver.page_source
    # For example, use BeautifulSoup to parse the html_source if needed.

    driver.quit()

    return html_source


In [70]:
from bs4 import BeautifulSoup
import pandas as pd


In [75]:
def get_data_from_html(html_source):

    # Parse the HTML
    soup = BeautifulSoup(html_source, "html.parser")

    # Locate the table. Here we search for the table by its style attribute.
    table = soup.find("table", {"style": "white-space:nowrap;margin:auto;table-layout:fixed"})
    if not table:
        raise ValueError("Table not found!")

    def extract_complete_row(row):
        to_use = [0,2,3,4,6,7,10,11,13,16, 18, 19, 20, 21, 22, 23, 24, 26,27,28]
        complete_row = [x for i,x in enumerate(row) if i in to_use]
        return complete_row
        

    data = []
    tbody = table.find("tbody")
    rows = tbody.find_all("tr") if tbody else table.find_all("tr")
    for row in rows:
        # Use both <td> and <th> in case some rows use header cells for data
        cells = row.find_all(["td", "th"])
        row_data = [cell.get_text(strip=True) for cell in cells]
        row_data = extract_complete_row(row_data)
        if row_data:
            data.append(row_data)

    headers = ["Rk", "Class", "Height", "Player", "Team", 
            "Conf", "Min%", 'PRPG!', 'BPM', 'ORTG', 
            "USG", "EFG", "TS", "OR", "DR", "AST", "TO",
            "BLK", "STL", "FTR"]

    df = pd.DataFrame(data, columns=headers)
    return df


In [85]:
years = [i for i in range(2008, 2024)]

In [88]:
df_2024["Season"] = 2024

In [89]:
all_dfs = [df_2024]

import tqdm

for year in tqdm.tqdm(years):
    html = get_html_from_torvik_players(year)
    tmp_df = get_data_from_html(html)
    tmp_df["Season"] = year
    all_dfs.append(tmp_df)


100%|██████████| 16/16 [37:21<00:00, 140.08s/it]


In [91]:
final_df = pd.concat(all_dfs, axis=0)

In [None]:
final_df.to_csv("torvik_player_data.csv")

In [78]:
df_2024.sort_values(by="PRPG!", ascending=False)

Unnamed: 0,Rk,Class,Height,Player,Team,Conf,Min%,PRPG!,BPM,ORTG,USG,EFG,TS,OR,DR,AST,TO,BLK,STL,FTR
1,2,Sr,7-4,Zach Edey,Purdue,B10,79.2,7.9,15.5,128.8,33.4,62.4,65.9,18.1,25.5,14.6,12.7,6.9,0.5,80.9
15,16,Sr,6-1,Mark Sears,Alabama,SEC,83.6,6.6,10.3,129.1,25.6,60.4,65.8,2.8,10.2,21.2,15.5,0.3,2.6,46.6
10,11,Sr,6-6,Terrence Shannon Jr.,Illinois,B10,70.6,6.6,10.6,124.8,27.9,55.9,62.1,2.7,10.0,13.2,12.1,2.3,1.7,59.9
30,31,Jr,6-2,KJ Simpson,Colorado,P12,86.8,6.0,9.2,122.6,26.4,55.2,60.6,2.8,16.4,27.1,13.9,0.2,2.7,36.3
6,7,Jr,6-10,DaRon Holmes II,Dayton,A10,80.7,5.9,11.5,119.1,31.2,58.4,62.6,6.8,23.7,19.0,13.7,7.2,1.8,72.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3325,3326,Sr,6-3,Artese Stapleton,UMKC,Sum,9.6,-0.0,-5.1,85.8,21.5,46.6,47.3,1.0,7.7,26.4,29.6,0.0,1.6,27.6
3537,3538,So,6-0,Broc Bidwell,Campbell,CAA,15.3,-0.0,-6.2,112.4,6.3,50.0,52.8,0.7,6.3,6.8,10.7,0.7,1.0,11.1
3306,3307,Fr,6-7,Nate Fouts,Le Moyne,NEC,20.2,-0.0,-5.0,90.5,21.1,53.5,51.8,2.4,17.4,19.1,22.5,3.0,1.0,15.5
3947,Show 100 more,,,,,,,,,,,,,,,,,,,


In [81]:
df_2024["Min%"].astype(float).min()

5.0

In [46]:
len(headers)

20

In [47]:
len(data[0])

20

In [None]:
import pandas as pd


Unnamed: 0,Rk,Class,Height,Player,Team,Conf,Min%,PRPG!,BPM,ORTG,USG,EFG,TS,OR,DR,AST,TO,BLK,STL,FTR
147,148,Sr,6-8,Eric Dixon,Villanova,BE,83.2,6.3,7.5,119.3,32.4,54.4,59.2,6.1,12.4,14.6,12.1,1.2,1.4,34.2
38,39,Jr,6-4,Bennett Stirtz,Drake,MVC,98.6,6.2,10.2,127.5,26.2,55.5,59.7,2.6,14.2,35.0,12.8,1.0,3.7,39.8
21,22,Sr,7-1,Ryan Kalkbrenner,Creighton,BE,80.6,6.2,12.5,133.4,21.4,68.9,70.1,8.8,18.0,10.0,10.0,7.9,1.2,45.6
8,9,Sr,6-10,Johni Broome,Auburn,SEC,67.2,6.1,15.1,123.7,31.2,53.5,55.1,15.2,26.9,23.5,8.5,9.3,1.4,34.9
79,80,Jr,6-2,Bruce Thornton,Ohio St.,B10,87.8,6.0,8.8,130.1,21.5,59.0,63.8,1.2,9.8,23.5,10.8,0.4,1.8,42.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1785,1786,Sr,7-1,Rick Issanza,Loyola Marymount,WCC,28.2,-0.0,-0.1,101.8,11.3,61.5,59.3,8.0,15.6,2.9,24.6,7.8,0.9,48.7
1797,1798,Sr,6-4,Caleb Robinson,Nicholls St.,Slnd,8.3,-0.0,-0.2,89.7,18.2,42.0,42.8,7.5,17.0,4.6,15.3,2.6,4.2,16.0
2242,2243,Fr,6-10,Halvine Dzellat,Cincinnati,B12,1.4,-0.0,-1.3,99.9,13.0,0.0,38.5,7.5,15.3,0.0,0.0,6.9,3.9,200.0
2550,Show 100 more,,,,,,,,,,,,,,,,,,,


In [13]:
print(html_source[1000000:1500000])

 rgb(249, 170, 172); display: none;">9.8</td><td class="mobileout _65" style="text-align:center;border-right:1px solid black;background-color:#FAD0D3">1.3</td><td class="mobileout _1920" style="text-align:center">1-4</td><td class="mobileout _21" style="text-align:center;background-color:#F9ADAF">.250</td></tr><tr><td title="Click to see comps for this player" class="_0 mobileout" id="270">199</td><td class="_45 mobileout" style="display: none;">-</td><td class="mobileout" style="white-space:nowrap !important"><span style="white-space: nowrap"><div style="float: left; text-align: left;"></div><div style="float: right; text-align: right">Fr</div></span></td><td class="mobileout _26">6-6</td><td style="text-align:left;"><a href="playerstat.php?year=2025&amp;p=Kasparas%20Jakucionis&amp;t=Illinois">Kasparas Jakucionis</a> </td><td class="_34" style="display: none;"><span title="RecruiT-Rank" style="font-size:8px">94</span></td><td class="mobileout _teamx" style="text-align:left"><a href="t