In [52]:
import requests
import pandas as pd

# Settings
year = 2023
headers = {"User-Agent": "Mozilla/5.0"}

# 1) Get all D-I teams and their IDs via ESPN teams API
teams_url = (
    "https://site.web.api.espn.com/apis/site/v2/"
    "sports/basketball/mens-college-basketball/teams?region=us&lang=en&limit=1000"
)
resp = requests.get(teams_url, headers=headers)
resp.raise_for_status()
teams_json = resp.json()['sports'][0]['leagues'][0]['teams']
team_list = [(t['team']['id'], t['team']['displayName']) for t in teams_json]
print(f"🏀 Found {len(team_list)} D-I teams via teams API.")

# 2) Function to fetch per-game stats + WinPct for one team
def fetch_team_stats(team_id, team_name, year):
    # Scrape per-game HTML tables
    stats_url = f"https://www.espn.com/mens-college-basketball/team/stats/_/id/{team_id}/year/{year}"
    r = requests.get(stats_url, headers=headers)
    r.raise_for_status()
    tables = pd.read_html(r.text)
    names_df, stats_df = tables[0], tables[1]
    stats_df.columns = [c if not isinstance(c, tuple) else c[1] for c in stats_df.columns]
    stats_df['Player'] = names_df['Name']
    stats_df['Team']   = team_name
    # Fetch WinPct from summary JSON
    sum_url = (
        "https://site.web.api.espn.com/apis/site/v2/"
        "sports/basketball/mens-college-basketball/summary"
        f"?team={team_id}&season={year}&seasontype=2"
    )
    j = requests.get(sum_url, headers=headers).json()
    win_pct = 0
    for rec in j.get('team', {}).get('record', {}).get('items', []):
        for s in rec.get('stats', []):
            if s.get('name') in ('winPct','WinPct'):
                win_pct = float(s.get('value'))
                break
        if win_pct:
            break
    stats_df['WinPct'] = win_pct
    return stats_df

# 3) Loop through all teams, collect stats, and track skips
all_stats = []
skipped = []
for tid, name in team_list:
    try:
        df_team = fetch_team_stats(tid, name, year)
        all_stats.append(df_team)
    except Exception as e:
        skipped.append(name)
print(f"Finished fetching; skipped {len(skipped)} teams.")
if skipped:
    print("Skipped teams:", skipped)

# 4) Combine into one DataFrame and check coverage
if not all_stats:
    raise RuntimeError("No team stats fetched – check API or year selection.")
raw = pd.concat(all_stats, ignore_index=True)
print(f"🏀 Loaded {len(raw)} player-rows for {year}")
expected = len(team_list)
scraped = raw['Team'].nunique()
print(f"Teams expected: {expected}, Teams scraped: {scraped}")
missing = set([n for _,n in team_list]) - set(raw['Team'].unique())
if missing:
    print(f"Missing teams (no data): {missing}")

# 5) Define stats for TalentScore
desired = ['GP','MIN','PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
stats = [s for s in desired if s in raw.columns]
print("Using stats:", stats)

# 6) Clean & filter usage
df = raw.copy()
for s in stats:
    df[s] = pd.to_numeric(df[s], errors='coerce')
# Fill missing percentages and winPct
for p in ['FG%','3P%','FT%','WinPct']:
    if p in df.columns:
        df[p] = df[p].fillna(0)
# Usage filters
df = df[(df['MIN'] >= 15) & (df['GP'] >= 5)]

# 7) Advanced metrics: totals & per-40
for s in ['PTS','REB','AST','STL','BLK','TO']:
    df[f'Total_{s}'] = df[s] * df['GP']
    df[f'{s}_per40'] = df[s] / df['MIN'] * 40

# 8) Compute z-scores & invert turnovers
z = (df[stats] - df[stats].mean()) / df[stats].std()
if 'TO' in stats:
    z['TO'] = -z['TO']
for s in stats:
    df['z_' + s] = z[s]

# 9) TalentScore & Top20
df['TalentScore'] = z.sum(axis=1)
top20 = df.sort_values('TalentScore', ascending=False).head(20)
print(top20[['Player','Team','TalentScore']])

🏀 Found 361 D-I teams via teams API.
Finished fetching; skipped 0 teams.
🏀 Loaded 5437 player-rows for 2023
Teams expected: 361, Teams scraped: 361
Using stats: ['GP', 'MIN', 'PTS', 'AST', 'REB', 'STL', 'BLK', 'TO', 'FG%', '3P%', 'FT%', 'WinPct']
                   Player                              Team  TalentScore
4596    Yaxel Lendeborg F                       UAB Blazers    16.635357
930    Ryan Kalkbrenner C                Creighton Bluejays    14.347542
1094       Cooper Flagg F                  Duke Blue Devils    13.211843
1069     Bennett Stirtz G                    Drake Bulldogs    12.944422
4063        Oscar Cluff C    South Dakota State Jackrabbits    12.777409
3561       Braden Smith G               Purdue Boilermakers    12.257003
255        Johni Broome F                     Auburn Tigers    12.168544
481     Earl Timberlake G                   Bryant Bulldogs    11.768469
864      Nique Clifford G               Colorado State Rams    11.632213
3678  Alvaro Folgueiras

In [53]:
import pandas as pd

# 1) Allow pandas to print every row & column in the notebook
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# 2) Display the entire DataFrame
display(df)

# 3) Export to CSV for external review
df.to_csv('all_players.csv', index=False)
print("Wrote full table to all_players.csv")

Unnamed: 0,GP,MIN,PTS,REB,AST,STL,BLK,TO,FG%,FT%,3P%,Player,Team,WinPct,Total_PTS,PTS_per40,Total_REB,REB_per40,Total_AST,AST_per40,Total_STL,STL_per40,Total_BLK,BLK_per40,Total_TO,TO_per40,z_GP,z_MIN,z_PTS,z_AST,z_REB,z_STL,z_BLK,z_TO,z_FG%,z_3P%,z_FT%,z_WinPct,TalentScore
0,30,29.6,13.7,5.1,2.5,1.1,0.7,2.6,46.5,69.7,33.3,Quion Williams G,Abilene Christian Wildcats,0,411.0,18.513514,153.0,6.891892,75.0,3.378378,33.0,1.486486,21.0,0.945946,78.0,3.513514,-0.040245,0.801804,1.043878,0.612811,0.666156,0.548691,0.710455,-1.845051,0.180836,0.236611,-0.223459,,2.692486
1,32,25.8,13.1,3.9,1.2,1.0,0.4,1.9,52.4,70.3,11.1,Leonardo Bettiol F,Abilene Christian Wildcats,0,419.2,20.310078,124.8,6.046512,38.4,1.860465,32.0,1.550388,12.8,0.620155,60.8,2.945736,0.300523,0.129851,0.8945,-0.475064,-0.014376,0.316575,0.00476,-0.760525,0.937559,-1.613219,-0.1714,,-0.450815
2,31,23.7,10.2,5.6,0.8,1.4,0.2,1.5,49.6,84.0,42.6,Bradyn Hubbard F,Abilene Christian Wildcats,0,316.2,17.21519,173.6,9.451477,24.8,1.350211,43.4,2.362869,6.2,0.337553,46.5,2.531646,0.130139,-0.241492,0.172507,-0.809794,0.949711,1.245037,-0.465703,-0.140796,0.578436,1.011539,1.017276,,3.44686
3,32,27.6,8.9,2.3,2.1,1.3,0.1,2.0,34.1,91.3,30.4,Hunter Jack Madden G,Abilene Christian Wildcats,0,284.8,12.898551,73.6,3.333333,67.2,3.043478,41.6,1.884058,3.2,0.144928,64.0,2.898551,0.300523,0.448144,-0.151146,0.27808,-0.921752,1.012922,-0.700935,-0.915457,-1.409566,-0.005034,1.650658,,-0.413563
4,32,15.5,5.8,2.5,0.5,1.2,0.3,1.2,44.2,63.3,18.6,Dontrez Williams G,Abilene Christian Wildcats,0,185.6,14.967742,80.0,6.451613,16.0,1.290323,38.4,3.096774,9.6,0.774194,38.4,3.096774,0.300523,-1.691496,-0.922932,-1.060842,-0.80833,0.780806,-0.230472,0.324001,-0.114158,-0.988277,-0.778753,,-5.189929
5,31,21.6,5.2,2.7,3.2,1.4,0.4,1.7,50.5,63.8,0.0,Rich Smith G,Abilene Christian Wildcats,0,161.2,9.62963,83.7,5.0,99.2,5.925926,43.4,2.592593,12.4,0.740741,52.7,3.148148,0.130139,-0.612834,-1.07231,1.198589,-0.694908,1.245037,0.00476,-0.45066,0.693868,-2.538134,-0.73537,,-2.831823
7,32,18.1,3.7,1.6,1.3,1.3,0.1,1.5,36.2,74.1,17.2,Nasir DeGruy G,Abilene Christian Wildcats,0,118.4,8.176796,51.2,3.535912,41.6,2.872928,41.6,2.872928,3.2,0.220994,48.0,3.314917,0.300523,-1.231739,-1.445755,-0.391381,-1.318729,1.012922,-0.700935,-0.140796,-1.140224,-1.104933,0.158306,,-6.002741
10,8,19.6,1.6,4.0,0.5,0.9,0.1,1.0,40.0,41.7,0.0,Joseph Venzant G,Abilene Christian Wildcats,0,12.8,3.265306,32.0,8.163265,4.0,1.020408,7.2,1.836735,0.8,0.204082,8.0,2.040816,-3.788697,-0.966494,-1.968578,-1.060842,0.042335,0.08446,-0.700935,0.633866,-0.652843,-2.538134,-2.65287,,-13.568732
15,32,36.5,13.9,5.4,3.5,1.6,0.3,2.3,38.6,79.4,38.4,Ethan Taylor G,Air Force Falcons,0,444.8,15.232877,172.8,5.917808,112.0,3.835616,51.2,1.753425,9.6,0.328767,73.6,2.520548,0.300523,2.02193,1.093671,1.449637,0.836289,1.709268,-0.230472,-1.380254,-0.832404,0.661571,0.618158,,6.247918
16,32,26.6,8.9,2.7,0.9,0.7,0.1,1.2,38.9,83.9,32.7,Luke Kearney F,Air Force Falcons,0,284.8,13.383459,86.4,4.06015,28.8,1.353383,22.4,1.052632,3.2,0.150376,38.4,1.804511,0.300523,0.271315,-0.151146,-0.726112,-0.694908,-0.379771,-0.700935,0.324001,-0.793927,0.186615,1.008599,,-1.355745


Wrote full table to all_players.csv


In [56]:
import requests
import pandas as pd

# Settings
year = 2023
headers = {"User-Agent": "Mozilla/5.0"}

# 1) Get all D-I teams via ESPN teams API
teams_url = (
    "https://site.web.api.espn.com/apis/site/v2/"
    "sports/basketball/mens-college-basketball/teams?region=us&lang=en&limit=1000"
)
resp = requests.get(teams_url, headers=headers)
resp.raise_for_status()
teams_json = resp.json()['sports'][0]['leagues'][0]['teams']
team_list = [(t['team']['id'], t['team']['displayName']) for t in teams_json]
print(f"🏀 Found {len(team_list)} D-I teams via teams API.")

# 2) Fetch per-game stats + WinPct for one team
def fetch_team_stats(team_id, team_name, year):
    stats_url = f"https://www.espn.com/mens-college-basketball/team/stats/_/id/{team_id}/year/{year}"
    r = requests.get(stats_url, headers=headers)
    r.raise_for_status()
    tables = pd.read_html(r.text)
    names_df, stats_df = tables[0], tables[1]
    stats_df.columns = [c if not isinstance(c, tuple) else c[1] for c in stats_df.columns]
    stats_df['Player'] = names_df['Name']
    stats_df['Team']   = team_name
    # Get WinPct
    sum_url = (
        "https://site.web.api.espn.com/apis/site/v2/"
        "sports/basketball/mens-college-basketball/summary"
        f"?team={team_id}&season={year}&seasontype=2"
    )
    j = requests.get(sum_url, headers=headers).json()
    win_pct = 0
    for rec in j.get('team', {}).get('record', {}).get('items', []):
        for s in rec.get('stats', []):
            if s.get('name') in ('winPct','WinPct'):
                win_pct = float(s.get('value'))
                break
        if win_pct:
            break
    stats_df['WinPct'] = win_pct
    return stats_df

# 3) Loop through teams and collect
all_stats = []
skipped = []
for tid, name in team_list:
    try:
        df_team = fetch_team_stats(tid, name, year)
        all_stats.append(df_team)
    except Exception:
        skipped.append(name)
print(f"Finished fetching; skipped {len(skipped)} teams.")
if skipped:
    print("Skipped teams:", skipped)

# 4) Combine and verify coverage
raw = pd.concat(all_stats, ignore_index=True)
print(f"🏀 Loaded {len(raw)} player-rows for {year}")
print(f"Teams scraped: {raw['Team'].nunique()} of {len(team_list)}")
missing = set(n for _,n in team_list) - set(raw['Team'].unique())
if missing:
    print("Missing teams:", missing)

# 5) Define stats & clean/filter
desired = ['GP','MIN','PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
stats = [s for s in desired if s in raw.columns]
print("Using stats:", stats)
df = raw.copy()
for s in stats:
    df[s] = pd.to_numeric(df[s], errors='coerce')
df[['FG%','3P%','FT%','WinPct']] = df[['FG%','3P%','FT%','WinPct']].fillna(0)
df = df[(df['MIN'] >= 15) & (df['GP'] >= 5)]

# 6) Advanced metrics: total & per-40
for s in ['PTS','REB','AST','STL','BLK','TO']:
    df[f'Total_{s}'] = df[s] * df['GP']
    df[f'{s}_per40'] = df[s] / df['MIN'] * 40

# 7) Compute z-scores, invert turnovers
z = (df[stats] - df[stats].mean()) / df[stats].std()
if 'TO' in stats:
    z['TO'] = -z['TO']
for s in stats:
    df['z_' + s] = z[s]

# 8) Calculate TalentScore
df['TalentScore'] = z.sum(axis=1)

# 9) Anchor-based NIL Worth calculation
# Fuzzy-match anchor names in the Player column
toppin_matches = df[df['Player'].str.contains('Toppin', case=False, na=False)]['Player'].unique()
agbim_matches = df[df['Player'].str.contains('Agbim', case=False, na=False)]['Player'].unique()
print('Toppin matches found:', toppin_matches)
print('Agbim matches found:', agbim_matches)
# Ensure exactly one match for each
if len(toppin_matches) != 1 or len(agbim_matches) != 1:
    raise RuntimeError('Could not uniquely identify JT Toppin or Obi Agbim. ' 
                       'Please adjust your substring filters.')
# Use the found exact names
jtt_name = toppin_matches[0]
obi_name = agbim_matches[0]
anchors = {
    jtt_name: 4000000,
    obi_name: 1000000
}
# Lookup their TalentScores
scores = {name: df.loc[df['Player'] == name, 'TalentScore'].iloc[0] for name in anchors}
# Unpack anchor points
a = (anchors[jtt_name] - anchors[obi_name]) / (scores[jtt_name] - scores[obi_name])
b = anchors[jtt_name] - a * scores[jtt_name]
# Compute NIL_Worth
df['NIL_Worth'] = (df['TalentScore'] * a + b).clip(lower=0).round().astype(int)

# 10) Show top 20 with NIL Worth


🏀 Found 361 D-I teams via teams API.
Finished fetching; skipped 0 teams.
🏀 Loaded 5437 player-rows for 2023
Teams scraped: 361 of 361
Using stats: ['GP', 'MIN', 'PTS', 'AST', 'REB', 'STL', 'BLK', 'TO', 'FG%', '3P%', 'FT%', 'WinPct']
Toppin matches found: ['JT Toppin F']
Agbim matches found: ['Obi Agbim G']


In [57]:
# 10) Show top 20 players with TalentScore and anchored NIL_Worth
top20 = df.sort_values('TalentScore', ascending=False).head(20)
print(top20[['Player', 'Team', 'TalentScore', 'NIL_Worth']])

                   Player                              Team  TalentScore  \
4596    Yaxel Lendeborg F                       UAB Blazers    16.635357   
930    Ryan Kalkbrenner C                Creighton Bluejays    14.347542   
1094       Cooper Flagg F                  Duke Blue Devils    13.211843   
1069     Bennett Stirtz G                    Drake Bulldogs    12.944422   
4063        Oscar Cluff C    South Dakota State Jackrabbits    12.777409   
3561       Braden Smith G               Purdue Boilermakers    12.257003   
255        Johni Broome F                     Auburn Tigers    12.168544   
481     Earl Timberlake G                   Bryant Bulldogs    11.768469   
864      Nique Clifford G               Colorado State Rams    11.632213   
3678  Alvaro Folgueiras F           Robert Morris Colonials    11.286147   
3591        Paul Otieno F                Quinnipiac Bobcats    11.194637   
3782    Rasheer Fleming F              Saint Joseph's Hawks    11.003312   
2335       K

In [59]:
# Cap NIL worth so JT Toppin remains the maximum
max_worth = anchors[jtt_name]
df['NIL_Worth'] = (df['TalentScore'] * a + b).clip(lower=0, upper=max_worth).round().astype(int)

In [60]:
# 10) Show top 20 players with TalentScore and anchored NIL_Worth
top20 = df.sort_values('TalentScore', ascending=False).head(20)
print(top20[['Player', 'Team', 'TalentScore', 'NIL_Worth']])

                   Player                              Team  TalentScore  \
4596    Yaxel Lendeborg F                       UAB Blazers    16.635357   
930    Ryan Kalkbrenner C                Creighton Bluejays    14.347542   
1094       Cooper Flagg F                  Duke Blue Devils    13.211843   
1069     Bennett Stirtz G                    Drake Bulldogs    12.944422   
4063        Oscar Cluff C    South Dakota State Jackrabbits    12.777409   
3561       Braden Smith G               Purdue Boilermakers    12.257003   
255        Johni Broome F                     Auburn Tigers    12.168544   
481     Earl Timberlake G                   Bryant Bulldogs    11.768469   
864      Nique Clifford G               Colorado State Rams    11.632213   
3678  Alvaro Folgueiras F           Robert Morris Colonials    11.286147   
3591        Paul Otieno F                Quinnipiac Bobcats    11.194637   
3782    Rasheer Fleming F              Saint Joseph's Hawks    11.003312   
2335       K

In [61]:
# 11) Show ALL players sorted by NIL_Worth
df_sorted = df.sort_values('NIL_Worth', ascending=False)
# Display in notebook
display(df_sorted[['Player','Team','TalentScore','NIL_Worth']])
# Export to CSV
output_file = 'players_by_NIL_Worth.csv'
df_sorted.to_csv(output_file, index=False)
print(f"Exported full sorted list to {output_file}")

Unnamed: 0,Player,Team,TalentScore,NIL_Worth
2584,Essam Mostafa F,Middle Tennessee Blue Raiders,9.305316,4000000
2335,Kellen Tynes G,Maine Black Bears,10.880949,4000000
4063,Oscar Cluff C,South Dakota State Jackrabbits,12.777409,4000000
3561,Braden Smith G,Purdue Boilermakers,12.257003,4000000
3365,Nate Bittle C,Oregon Ducks,9.998913,4000000
4090,Brendan Terry F,Southeast Missouri State Redhawks,10.061332,4000000
582,Keonte Jones F,Cal State Northridge Matadors,10.596577,4000000
4596,Yaxel Lendeborg F,UAB Blazers,16.635357,4000000
3590,Amarri Monroe F,Quinnipiac Bobcats,9.097905,4000000
3591,Paul Otieno F,Quinnipiac Bobcats,11.194637,4000000


Exported full sorted list to players_by_NIL_Worth.csv


In [70]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.display import display

# — Settings —
year = 2023
headers = {"User-Agent": "Mozilla/5.0"}

# — 1) Fetch list of all D-I teams —
teams_url = (
    "https://site.web.api.espn.com/apis/site/v2/"
    "sports/basketball/mens-college-basketball/teams?region=us&lang=en&limit=1000"
)
resp = requests.get(teams_url, headers=headers); resp.raise_for_status()
teams_json = resp.json()['sports'][0]['leagues'][0]['teams']
team_list  = [(t['team']['id'], t['team']['displayName']) for t in teams_json]

print(f"🏀 Found {len(team_list)} D-I teams")


def fetch_team_stats(team_id, team_name, year):
    """Fetch per-game stats + real Class from the roster page + team WinPct."""
    # a) per-game stats table
    stats_url = f"https://www.espn.com/mens-college-basketball/team/stats/_/id/{team_id}/year/{year}"
    r = requests.get(stats_url, headers=headers); r.raise_for_status()
    tabs = pd.read_html(r.text)
    names_df, stats_df = tabs[0], tabs[1]
    # flatten any multi-index
    names_df.columns  = [c if not isinstance(c, tuple) else c[1] for c in names_df.columns]
    stats_df.columns  = [c if not isinstance(c, tuple) else c[1] for c in stats_df.columns]
    stats_df['Player'] = names_df['Name']
    stats_df['Team']   = team_name

    # b) roster page (to get Class)
    roster_url = f"https://www.espn.com/mens-college-basketball/team/roster/_/id/{team_id}/year/{year}"
    rt = requests.get(roster_url, headers=headers); rt.raise_for_status()
    soup = BeautifulSoup(rt.text, 'html.parser')
    # find the <table> whose header row contains “Class”
    roster_table = None
    for tbl in soup.find_all('table'):
        hdrs = [th.get_text(strip=True).lower() for th in tbl.select('thead th')]
        if 'class' in hdrs:
            roster_table = tbl
            break
    if roster_table is None:
        raise RuntimeError(f"Roster table for {team_name} missing Class column")
    # build dataframe
    cols = [th.get_text(strip=True) for th in roster_table.select('thead th')]
    rows = []
    for tr in roster_table.select('tbody tr'):
        rows.append([td.get_text(strip=True) for td in tr.select('td')])
    roster_df = pd.DataFrame(rows, columns=cols)
    # map Name → Class
    stats_df['Class'] = stats_df['Player'].map(
        dict(zip(roster_df['Name'], roster_df['Class']))
    ).fillna('Unknown')

    # c) team WinPct
    summary_url = (
        "https://site.web.api.espn.com/apis/site/v2/"
        "sports/basketball/mens-college-basketball/summary"
        f"?team={team_id}&season={year}&seasontype=2"
    )
    j = requests.get(summary_url, headers=headers).json()
    win_pct = 0.0
    for rec in j.get('team',{}).get('record',{}).get('items',[]):
        for s in rec.get('stats',[]):
            if s.get('name','').lower() == 'winpct':
                win_pct = float(s.get('value',0))
                break
        if win_pct: break
    stats_df['WinPct'] = win_pct

    return stats_df

# — 2) Loop all teams —
all_stats, skipped = [], []
for tid, name in team_list:
    try:
        all_stats.append(fetch_team_stats(tid, name, year))
    except Exception as e:
        skipped.append((name, str(e)))

print(f"Finished; skipped {len(skipped)} teams")
if skipped:
    for nm, err in skipped:
        print(f" – {nm}: {err}")

# — 3) Combine & clean —
raw = pd.concat(all_stats, ignore_index=True)
print(f"Loaded {len(raw)} player-rows across {raw['Team'].nunique()} teams")

# define stats, filter by usage
desired = ['GP','MIN','PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
stats   = [s for s in desired if s in raw.columns]
df = raw.copy()
for s in stats:
    df[s] = pd.to_numeric(df[s], errors='coerce')
df[['FG%','3P%','FT%','WinPct']] = df[['FG%','3P%','FT%','WinPct']].fillna(0)
df = df[(df['MIN']>=15)&(df['GP']>=5)]

# advanced totals & per-40
for s in ['PTS','REB','AST','STL','BLK','TO']:
    df[f'Total_{s}'] = df[s]*df['GP']
    df[f'{s}_per40']  = df[s]/df['MIN']*40

# z-scores & invert turnovers
z = (df[stats] - df[stats].mean())/df[stats].std()
if 'TO' in z: z['TO'] = -z['TO']
for s in stats:
    df['z_'+s] = z[s]

# TalentScore
df['TalentScore'] = z.sum(axis=1)

# anchor NIL worth
jtt = df[df['Player'].str.contains('Toppin',case=False)]['Player'].iloc[0]
obi = df[df['Player'].str.contains('Agbim',case=False)]['Player'].iloc[0]
anchors = {jtt:4_000_000, obi:1_000_000}
s1,s2 = df.loc[df['Player']==jtt,'TalentScore'].iloc[0], df.loc[df['Player']==obi,'TalentScore'].iloc[0]
v1,v2 = anchors[jtt], anchors[obi]
a = (v1-v2)/(s1-s2); b = v1 - a*s1
df['NIL_Worth'] = (df['TalentScore']*a + b).clip(0,v1).round().astype(int)

# age factor
def age_factor(c):
    cl = str(c).lower()
    return 1.2 if 'fr' in cl else 1.15 if 'so' in cl else 1.1 if 'jr' in cl else 1.05 if 'sr' in cl else 1.0
df['AgeFactor'] = df['Class'].apply(age_factor)
df['Weighted_NIL_Worth'] = (df['NIL_Worth']*df['AgeFactor']).round().astype(int)

# Show top 20
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']])

# Display/export full list
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
display(df.sort_values('Weighted_NIL_Worth',ascending=False)[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']])
df.sort_values('Weighted_NIL_Worth',ascending=False)[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']].to_csv('players_by_weighted_NIL.csv',index=False)

🏀 Found 361 D-I teams
Finished; skipped 0 teams
Loaded 5437 player-rows across 361 teams
                    Player    Class                               Team  \
2584       Essam Mostafa F  Unknown      Middle Tennessee Blue Raiders   
2335        Kellen Tynes G  Unknown                  Maine Black Bears   
4063         Oscar Cluff C  Unknown     South Dakota State Jackrabbits   
3561        Braden Smith G  Unknown                Purdue Boilermakers   
3365         Nate Bittle C  Unknown                       Oregon Ducks   
4090       Brendan Terry F  Unknown  Southeast Missouri State Redhawks   
582         Keonte Jones F  Unknown      Cal State Northridge Matadors   
4596     Yaxel Lendeborg F  Unknown                        UAB Blazers   
3590       Amarri Monroe F  Unknown                 Quinnipiac Bobcats   
3591         Paul Otieno F  Unknown                 Quinnipiac Bobcats   
1069      Bennett Stirtz G  Unknown                     Drake Bulldogs   
4166  Melvin Council Jr

Unnamed: 0,Player,Class,Team,TalentScore,AgeFactor,Weighted_NIL_Worth
2584,Essam Mostafa F,Unknown,Middle Tennessee Blue Raiders,9.305316,1.0,4000000
2335,Kellen Tynes G,Unknown,Maine Black Bears,10.880949,1.0,4000000
4063,Oscar Cluff C,Unknown,South Dakota State Jackrabbits,12.777409,1.0,4000000
3561,Braden Smith G,Unknown,Purdue Boilermakers,12.257003,1.0,4000000
3365,Nate Bittle C,Unknown,Oregon Ducks,9.998913,1.0,4000000
4090,Brendan Terry F,Unknown,Southeast Missouri State Redhawks,10.061332,1.0,4000000
582,Keonte Jones F,Unknown,Cal State Northridge Matadors,10.596577,1.0,4000000
4596,Yaxel Lendeborg F,Unknown,UAB Blazers,16.635357,1.0,4000000
3590,Amarri Monroe F,Unknown,Quinnipiac Bobcats,9.097905,1.0,4000000
3591,Paul Otieno F,Unknown,Quinnipiac Bobcats,11.194637,1.0,4000000


In [84]:
# Build df_rosters dataframe from roster_list
if roster_list:
    df_rosters = pd.concat(roster_list, ignore_index=True)
else:
    df_rosters = pd.DataFrame(columns=['Player','Class','Team'])

In [92]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.display import display

# — 3) Fetch ESPN roster pages via pandas.read_html for Class —
roster_list = []
skipped = []
for team_id, team_name in team_list:
    url = f"https://www.espn.com/mens-college-basketball/team/roster/_/id/{team_id}"
    try:
        tables = pd.read_html(url)
        rf = None
        # Identify the correct table by presence of Name and Yr/Year columns
        for t in tables:
            cols = t.columns
            # Flatten multi-index columns
            flat_cols = []
            for c in cols:
                if isinstance(c, tuple):
                    flat_cols.append(c[1] or c[0])
                else:
                    flat_cols.append(c)
            t.columns = flat_cols
            if 'Name' in t.columns and ('Yr' in t.columns or 'Year' in t.columns):
                rf = t
                break
        if rf is None:
            raise RuntimeError(f"Roster table with Name+Yr/Year not found for {team_name}")
        # Rename to Player and Class
        rf = rf.rename(columns={'Name':'Player', 'Yr':'Class', 'Year':'Class'})
        rf = rf[['Player','Class']]
        rf['Team'] = team_name
        roster_list.append(rf)
    except Exception as e:
        skipped.append((team_name, str(e)))
print(f"Finished ESPN rosters; skipped {len(skipped)} teams.")
for nm, err in skipped[:5]:
    print(f" – {nm}: {err}")

# — 4) Clean & Filter Usage — —
desired = ['GP','MIN','PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
stats = [s for s in desired if s in df.columns]
for s in stats:
    df[s] = pd.to_numeric(df[s], errors='coerce')
# usage threshold
df = df[(df['MIN']>=15) & (df['GP']>=5)].copy()

# — 5) Advanced Metrics & z-scores —
for s in ['PTS','REB','AST','STL','BLK','TO']:
    df[f'Total_{s}'] = df[s] * df['GP']
    df[f'{s}_per40'] = df[s] / df['MIN'] * 40
z = (df[stats] - df[stats].mean()) / df[stats].std()
if 'TO' in z: z['TO'] = -z['TO']
for s in stats:
    df['z_'+s] = z[s]

# — 6) TalentScore & NIL Worth —
df['TalentScore'] = z.sum(axis=1)
jtt = df[df['Player'].str.contains('Toppin',case=False)]['Player'].iloc[0]
obi = df[df['Player'].str.contains('Agbim',case=False)]['Player'].iloc[0]
anchors={jtt:4_000_000,obi:1_000_000}
s1,s2=df.loc[df['Player']==jtt,'TalentScore'].iloc[0],df.loc[df['Player']==obi,'TalentScore'].iloc[0]
v1,v2=anchors[jtt],anchors[obi]
a=(v1-v2)/(s1-s2);b=v1-a*s1
df['NIL_Worth'] = (df['TalentScore']*a + b).clip(0,v1).round().astype(int)

# — 7) Age Factor & Weighted NIL —
def age_factor(c):
    cl = str(c).lower()
    if 'fr' in cl: return 1.2
    if 'so' in cl: return 1.15
    if 'jr' in cl: return 1.1
    if 'sr' in cl: return 1.05
    return 1.0
df['AgeFactor'] = df['Class'].apply(age_factor)
df['Weighted_NIL_Worth'] = (df['NIL_Worth'] * df['AgeFactor']).round().astype(int)

# — 8) Show & Export —
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']])

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
display(df.sort_values('Weighted_NIL_Worth', ascending=False)[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']])
df.sort_values('Weighted_NIL_Worth', ascending=False)[['Player','Class','Team','TalentScore','AgeFactor','Weighted_NIL_Worth']].to_csv('players_by_weighted_NIL.csv', index=False)
print("Exported to players_by_weighted_NIL.csv")


Finished ESPN rosters; skipped 361 teams.
 – Abilene Christian Wildcats: Roster table with Name+Yr/Year not found for Abilene Christian Wildcats
 – Air Force Falcons: Roster table with Name+Yr/Year not found for Air Force Falcons
 – Akron Zips: Roster table with Name+Yr/Year not found for Akron Zips
 – Alabama A&M Bulldogs: Roster table with Name+Yr/Year not found for Alabama A&M Bulldogs
 – Alabama Crimson Tide: Roster table with Name+Yr/Year not found for Alabama Crimson Tide
                    Player    Class                               Team  \
2584       Essam Mostafa F  Unknown      Middle Tennessee Blue Raiders   
2335        Kellen Tynes G  Unknown                  Maine Black Bears   
4063         Oscar Cluff C  Unknown     South Dakota State Jackrabbits   
3561        Braden Smith G  Unknown                Purdue Boilermakers   
3365         Nate Bittle C  Unknown                       Oregon Ducks   
4090       Brendan Terry F  Unknown  Southeast Missouri State Redhawks   

Unnamed: 0,Player,Class,Team,TalentScore,AgeFactor,Weighted_NIL_Worth
2584,Essam Mostafa F,Unknown,Middle Tennessee Blue Raiders,9.305316,1.0,4000000
2335,Kellen Tynes G,Unknown,Maine Black Bears,10.880949,1.0,4000000
4063,Oscar Cluff C,Unknown,South Dakota State Jackrabbits,12.777409,1.0,4000000
3561,Braden Smith G,Unknown,Purdue Boilermakers,12.257003,1.0,4000000
3365,Nate Bittle C,Unknown,Oregon Ducks,9.998913,1.0,4000000
4090,Brendan Terry F,Unknown,Southeast Missouri State Redhawks,10.061332,1.0,4000000
582,Keonte Jones F,Unknown,Cal State Northridge Matadors,10.596577,1.0,4000000
4596,Yaxel Lendeborg F,Unknown,UAB Blazers,16.635357,1.0,4000000
3590,Amarri Monroe F,Unknown,Quinnipiac Bobcats,9.097905,1.0,4000000
3591,Paul Otieno F,Unknown,Quinnipiac Bobcats,11.194637,1.0,4000000


Exported to players_by_weighted_NIL.csv


In [93]:
pip install sportsipy

Note: you may need to restart the kernel to use updated packages.


In [99]:
from sportsipy.ncaab.teams import Teams
import pandas as pd

In [100]:
# Merge on Player + Team
df = pd.merge(df_stats, df_roster, on=['Player','Team'], how='left')

# How many still missing?
miss = df['Class'].isna().sum()
print(f"Players missing Class after merge: {miss}")

# (There should be very few, if any — mostly edge cases like transfers mid-season)
df['Class'] = df['Class'].fillna('Unknown')

Players missing Class after merge: 5437


In [110]:
pip install sportsipy

Note: you may need to restart the kernel to use updated packages.


In [None]:
import requests
import pandas as pd

# — 0) Pre‑reqs —
# Make sure you already have your `team_list = [(team_id, team_name), …]`
# and your `df_stats` ready.

headers = {"User-Agent": "Mozilla/5.0"}

def fetch_espn_roster_json(team_id, team_name):
    """
    Use ESPN’s common/v3 API to fetch the full roster JSON,
    then extract each player’s name and class year.
    """
    url = (
        "https://site.web.api.espn.com/apis/common/v3/sports/"
        "basketball/mens-college-basketball/teams/"
        f"{team_id}/roster?region=us&lang=en"
    )
    r = requests.get(url, headers=headers, timeout=10)
    r.raise_for_status()
    j = r.json()

    out = []
    for athlete in j.get("athletes", []):
        name = athlete.get("displayName")
        # ESPN’s JSON gives group (e.g. "freshman", "senior", etc.)
        cls  = athlete.get("group", "").capitalize() or "Unknown"
        out.append({"Player": name, "Team": team_name, "Class": cls})
    return pd.DataFrame(out)


# — 1) Build the full roster DF —
roster_dfs = []
skipped = []
for tid, tname in team_list:
    try:
        df_r = fetch_espn_roster_json(tid, tname)
        roster_dfs.append(df_r)
    except Exception as e:
        skipped.append((tname, str(e)))

print(f"✅ Fetched rosters for {len(roster_dfs)} teams, skipped {len(skipped)} teams.")
if skipped:
    for n, err in skipped[:5]:
        print(" –", n, err)

df_roster = pd.concat(roster_dfs, ignore_index=True)
print("Roster columns:", df_roster.columns.tolist())
print("Unique teams:", df_roster['Team'].nunique())


# — 2) Merge into your stats DF —
df = pd.merge(df_stats, df_roster, on=['Player','Team'], how='left')
miss = df['Class'].isna().sum()
print(f"Players missing Class after merge: {miss}")
df['Class'] = df['Class'].fillna('Unknown')


# — 3) Age‐factor mapping & weighted NIL —
def age_factor(c):
    c = str(c).lower()
    if 'fresh' in c:    return 1.2
    if 'soph' in c:     return 1.15
    if 'junior' in c:   return 1.1
    if 'senior' in c:   return 1.05
    return 1.0

df['AgeFactor'] = df['Class'].apply(age_factor)

# Make sure NIL_Worth exists; if not, recompute it like before:
# df['NIL_Worth'] = ...

df['Weighted_NIL_Worth'] = (df['NIL_Worth'] * df['AgeFactor']).round().astype(int)


# — 4) Inspect —
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Team','Class','TalentScore','AgeFactor','Weighted_NIL_Worth']])

In [123]:
import requests, pprint

# Pick a sample team
sample_tid, sample_name = team_list[0]
url = (
    "https://site.web.api.espn.com/apis/common/v3/sports/"
    "basketball/mens-college-basketball/teams/"
    f"{sample_tid}/roster?region=us&lang=en"
)
j = requests.get(url, headers=headers).json()

# Show the first positionGroup and its athlete structure
print("PositionGroups keys:", list(j['positionGroups'][0].keys()))
ath = j['positionGroups'][0].get('athletes', [None])[0]
print("Sample athlete keys:", list(ath.keys()) if ath else "No athletes here")
pprint.pprint(ath, width=100)

PositionGroups keys: ['type', 'displayName', 'athletes']
Sample athlete keys: ['id', 'alternateIds', 'guid', 'firstName', 'lastName', 'displayName', 'fullName', 'shortName', 'displayWeight', 'weight', 'displayHeight', 'height', 'links', 'birthPlace', 'jersey', 'headshot', 'position', 'statistics', 'experience', 'status', 'teams']
{'alternateIds': [{'sdr': '4593749'}],
 'birthPlace': {'city': 'Phoenix', 'country': 'USA', 'state': 'AZ'},
 'displayHeight': '6\' 4"',
 'displayName': 'Immanuel Allen',
 'displayWeight': '215 lbs',
 'experience': {'abbreviation': 'SR', 'displayValue': 'Senior', 'years': 4},
 'firstName': 'Immanuel',
 'fullName': 'Immanuel Allen',
 'guid': '7599ec20-8031-3ec2-8959-c18eb2df3885',
 'headshot': {'alt': 'Immanuel Allen',
              'href': 'https://a.espncdn.com/i/headshots/mens-college-basketball/players/full/4593749.png'},
 'height': 76,
 'id': '4593749',
 'jersey': '25',
 'lastName': 'Allen',
 'links': [{'href': 'https://www.espn.com/mens-college-basketball/

In [131]:
# After df_roster = pd.concat(roster_dfs, ignore_index=True)
replacements = {
    'Middle Tennessee Blue': 'Middle Tennessee Blue Raiders',
    'Wright State': 'Wright State Raiders',
    'Colgate': 'Colgate Raiders',
    'Texas Tech Red': 'Texas Tech Red Raiders'
}
df_roster['Team'] = df_roster['Team'].replace(replacements)

In [133]:
import requests
import pandas as pd

headers = {"User-Agent": "Mozilla/5.0"}

def fetch_espn_roster_json(team_id, team_name):
    """
    Use ESPN’s JSON API to fetch roster and pull class from the experience field.
    """
    url = (
        "https://site.web.api.espn.com/apis/common/v3/sports/"
        "basketball/mens-college-basketball/teams/"
        f"{team_id}/roster?region=us&lang=en"
    )
    r = requests.get(url, headers=headers, timeout=10)
    r.raise_for_status()
    j = r.json()

    out = []
    for group in j.get("positionGroups", []):
        for athlete in group.get("athletes", []):
            name = athlete.get("displayName")
            exp  = athlete.get("experience", {}) or {}
            cls  = exp.get("displayValue", "Unknown")
            out.append({"Player": name, "Team": team_name, "Class": cls})
    return pd.DataFrame(out)


# Build the full roster DF —
roster_dfs = []
skipped = []
for tid, tname in team_list:
    try:
        df_r = fetch_espn_roster_json(tid, tname)
        roster_dfs.append(df_r)
    except Exception as e:
        skipped.append((tname, str(e)))

print(f"✅ Fetched rosters for {len(roster_dfs)} teams, skipped {len(skipped)} teams.")
df_roster = pd.concat(roster_dfs, ignore_index=True)
print("Roster columns:", df_roster.columns.tolist())
print("Unique teams:", df_roster['Team'].nunique())


# Merge into your stats DF —
df = pd.merge(df_stats, df_roster, on=['Player','Team'], how='left')
missing = df['Class'].isna().sum()
print(f"Players missing Class after merge: {missing}")
df['Class'] = df['Class'].fillna('Unknown')


# Age‑factor mapping & weighted NIL —
def age_factor(c):
    c = str(c).lower()
    if 'fresh' in c:    return 1.2
    if 'soph' in c:     return 1.15
    if 'junior' in c:   return 1.1
    if 'senior' in c:   return 1.05
    return 1.0

df['AgeFactor'] = df['Class'].apply(age_factor)
df['Weighted_NIL_Worth'] = (df['NIL_Worth'] * df['AgeFactor']).round().astype(int)


# Inspect top 20 —
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Team','Class','TalentScore','AgeFactor','Weighted_NIL_Worth']])

✅ Fetched rosters for 361 teams, skipped 0 teams.
Roster columns: ['Player', 'Team', 'Class']
Unique teams: 361
Players missing Class after merge: 5437


KeyError: 'NIL_Worth'

In [135]:
# Which teams appear in stats but not in roster?
stats_teams  = set(df_stats['Team'].unique())
roster_teams = set(df_roster['Team'].unique())
print("Missing in roster:", stats_teams - roster_teams)
print("Extra in roster:", roster_teams - stats_teams)

Missing in roster: set()
Extra in roster: set()


In [137]:
# --- (Re)compute TalentScore & NIL_Worth ---

# 1) z‐score your features
features = ['PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
for f in features:
    df[f] = pd.to_numeric(df[f], errors='coerce')
z = (df[features] - df[features].mean()) / df[features].std()
z['TO'] = -z['TO']

# 2) TalentScore
df['TalentScore'] = z.sum(axis=1)

# 3) Linear mapping to NIL_Worth using your two anchors
s_toppin = df.loc[df['Player'].str.contains('Toppin', case=False), 'TalentScore'].iat[0]
s_agbim   = df.loc[df['Player'].str.contains('Agbim',   case=False), 'TalentScore'].iat[0]
v_toppin, v_agbim = 4_000_000, 1_000_000
a = (v_toppin - v_agbim) / (s_toppin - s_agbim)
b = v_toppin - a * s_toppin
df['NIL_Worth'] = (df['TalentScore'] * a + b).clip(0, v_toppin).round().astype(int)

In [138]:
def age_factor(c):
    cl = str(c).lower()
    if 'fresh' in cl:  return 1.2
    if 'soph' in cl:   return 1.15
    if 'junior' in cl: return 1.1
    if 'senior' in cl: return 1.05
    return 1.0

df['AgeFactor'] = df['Class'].apply(age_factor)
df['Weighted_NIL_Worth'] = (df['NIL_Worth'] * df['AgeFactor']).round().astype(int)

# Check top prospects
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Team','Class','TalentScore','NIL_Worth','AgeFactor','Weighted_NIL_Worth']])

                 Player                               Team    Class  \
5436              Total          Youngstown State Penguins  Unknown   
2493              Total                       Mercer Bears  Unknown   
4950  Jonnivius Smith F                   UTSA Roadrunners  Unknown   
1365      Renato Ruiz G     Florida International Panthers  Unknown   
565               Total  Cal State Bakersfield Roadrunners  Unknown   
1909              Total                Iowa State Cyclones  Unknown   
4943              Total                        UTEP Miners  Unknown   
4260              Total      Stephen F. Austin Lumberjacks  Unknown   
3782  Rasheer Fleming F               Saint Joseph's Hawks  Unknown   
579               Total         Cal State Fullerton Titans  Unknown   
4275              Total                    Stetson Hatters  Unknown   
582      Keonte Jones F      Cal State Northridge Matadors  Unknown   
3287              Total                   Oklahoma Sooners  Unknown   
3779  

In [139]:
# Show a handful of the players we couldn’t merge a Class for:
missing = df[df['Class']=='Unknown'][['Player','Team']].drop_duplicates().head(20)
print("Sample missing Player+Team combinations:")
print(missing.to_string(index=False))

Sample missing Player+Team combinations:
              Player                       Team
    Quion Williams G Abilene Christian Wildcats
  Leonardo Bettiol F Abilene Christian Wildcats
    Bradyn Hubbard F Abilene Christian Wildcats
Hunter Jack Madden G Abilene Christian Wildcats
  Dontrez Williams G Abilene Christian Wildcats
        Rich Smith G Abilene Christian Wildcats
  Christian Alston G Abilene Christian Wildcats
      Nasir DeGruy G Abilene Christian Wildcats
    Cade Hornecker F Abilene Christian Wildcats
     Yaniel Rivera G Abilene Christian Wildcats
    Joseph Venzant G Abilene Christian Wildcats
      Colby Tanner G Abilene Christian Wildcats
       Ma'Shy Hill F Abilene Christian Wildcats
       Jack Sawyer F Abilene Christian Wildcats
               Total Abilene Christian Wildcats
      Ethan Taylor G          Air Force Falcons
      Luke Kearney F          Air Force Falcons
Wesley Celichowski C          Air Force Falcons
     Kyle Marshall G          Air Force Falcons

In [140]:
# --- 1) Build & clean roster DataFrame ---
roster_dfs = []
skipped = []
for tid, tname in team_list:
    try:
        df_r = fetch_espn_roster_json(tid, tname)
        roster_dfs.append(df_r)
    except Exception as e:
        skipped.append((tname, str(e)))

df_roster = pd.concat(roster_dfs, ignore_index=True)

# Drop the “Total” aggregate row if present
df_roster = df_roster[df_roster['Player'] != 'Total']

# Fix those four short‑name vs full‑name mismatches:
team_map = {
    'Middle Tennessee Blue':     'Middle Tennessee Blue Raiders',
    'Wright State':              'Wright State Raiders',
    'Colgate':                   'Colgate Raiders',
    'Texas Tech Red':            'Texas Tech Red Raiders'
}
df_roster['Team'] = df_roster['Team'].replace(team_map)

# --- 2) Normalize stats names to drop trailing position letter ---
# (Your df_stats Player column currently has names like "Johni Broome F")
df_stats['Player'] = df_stats['Player'].str.replace(r'\s+[FGC]$', '', regex=True)

# --- 3) Merge roster Class into stats ---
df = pd.merge(df_stats, df_roster, on=['Player','Team'], how='left')
missing = df['Class'].isna().sum()
print(f"Players missing Class after merge: {missing}")
df['Class'] = df['Class'].fillna('Unknown')

# --- 4) (Re)compute TalentScore & NIL_Worth, then apply age factor ---
# — z‑score features —
features = ['PTS','AST','REB','STL','BLK','TO','FG%','3P%','FT%','WinPct']
for f in features:
    df[f] = pd.to_numeric(df[f], errors='coerce')
z = (df[features] - df[features].mean()) / df[features].std()
z['TO'] = -z['TO']
df['TalentScore'] = z.sum(axis=1)

# — map TalentScore to NIL_Worth using your anchors —
s1 = df.loc[df['Player'].str.contains('Toppin',   case=False), 'TalentScore'].iat[0]
s2 = df.loc[df['Player'].str.contains('Agbim',    case=False), 'TalentScore'].iat[0]
v1, v2 = 4_000_000, 1_000_000
a = (v1 - v2) / (s1 - s2)
b = v1 - a * s1
df['NIL_Worth'] = (df['TalentScore'] * a + b).clip(0, v1).round().astype(int)

# — age factor & weighted NIL —
def age_factor(c):
    c = str(c).lower()
    if 'fresh'  in c: return 1.2
    if 'soph'   in c: return 1.15
    if 'junior' in c: return 1.1
    if 'senior' in c: return 1.05
    return 1.0

df['AgeFactor'] = df['Class'].apply(age_factor)
df['Weighted_NIL_Worth'] = (df['NIL_Worth'] * df['AgeFactor']).round().astype(int)

# --- 5) Inspect the top 20 by weighted NIL worth ---
top20 = df.sort_values('Weighted_NIL_Worth', ascending=False).head(20)
print(top20[['Player','Team','Class','TalentScore','NIL_Worth','AgeFactor','Weighted_NIL_Worth']])

Players missing Class after merge: 385
                  Player                           Team      Class  \
3957          Eric Zheng             Seattle U Redhawks   Freshman   
4460        Jamie Vinson                Texas Longhorns   Freshman   
3274        Braylen Nash            Ohio State Buckeyes   Freshman   
4850         Makoi Mabor        UNC Wilmington Seahawks   Freshman   
2377  Antoni Vlogianitis               Marist Red Foxes   Freshman   
1094        Cooper Flagg               Duke Blue Devils   Freshman   
1771          Evan Otten            Idaho State Bengals   Freshman   
4461           Nic Codie                Texas Longhorns   Freshman   
2537          Jack Sauer            Miami (OH) RedHawks   Freshman   
3864         Corey Brown               Samford Bulldogs   Freshman   
4495           JT Toppin         Texas Tech Red Raiders  Sophomore   
3681   Alvaro Folgueiras        Robert Morris Colonials  Sophomore   
4336        Drew McElroy               TCU Horned F

In [145]:
# make sure the folder exists
import os
os.makedirs('data', exist_ok=True)

# export for the Next.js API to consume
df.to_json('data/players.json', orient='records')
print("✅ Wrote data/players.json with", len(df), "records")

✅ Wrote data/players.json with 5442 records


In [144]:
import os
print(os.getcwd())

/Users/aaronpearlstein


In [160]:
df.to_json('player.json', orient='records')

In [161]:
import os

# adjust this to the absolute path of your Next project
out_path = os.path.expanduser('~/projects/nil-scout-frontend/data/players.json')
df.to_json(out_path, orient='records')
print("Wrote", out_path)

Wrote /Users/aaronpearlstein/projects/nil-scout-frontend/data/players.json


In [162]:
# adjust this path to wherever your Next project lives:
output_path = '/Users/aaronpearlstein/projects/nil-scout-frontend/data/players.json'
df.to_json(output_path, orient='records')
print("Wrote", len(df), "records to", output_path)

Wrote 5442 records to /Users/aaronpearlstein/projects/nil-scout-frontend/data/players.json


In [163]:
cp /path/to/your/generated/players.json ~/projects/nil-scout-frontend/data/players.json

cp: /path/to/your/generated/players.json: No such file or directory


In [164]:
# app.py
import streamlit as st
import pandas as pd

@st.cache
def load_data():
    return pd.read_json("players.json")

df = load_data()

st.title("🏀 NIL Scout Dashboard")
search = st.text_input("Search player or team…")
filtered = df[df["Player"].str.contains(search, case=False) |
              df["Team"].str.contains(search, case=False)]

st.dataframe(filtered)

ModuleNotFoundError: No module named 'streamlit'