In [145]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from datetime import datetime, timedelta
import json
from time import sleep

from scrape import get_first_basket, getId

In [37]:
yst = datetime.today() - timedelta(days = 1)

url = f'https://www.basketball-reference.com/boxscores/?month={yst.month}&day={yst.day}&year={yst.year}'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')

In [52]:
def predict_first_basket() :
    ######### starting_lineups #########
    return np.random.randint(0, 10)

In [61]:
game_ids = [getId(x) for x in soup.find_all('a', href = True) if 'boxscores/pbp' in x['href']]
dfs = []
for gameId in game_ids :
    sleep(3)
    df, starting_lineups = get_first_basket(gameId, starting_lineups = True)
    idx = predict_first_basket()
    df['first_basket_pred'] = (starting_lineups[0] + starting_lineups[1])[idx]
    dfs.append(df[['game_id', 'Home', 'Away', 'first_basket', 'first_basket_tm', 'first_basket_pred']])

first_basket_df = pd.concat(dfs).set_index('game_id')
first_basket_df['Date'] = yst.date()
first_basket_df['correct_pred'] = (first_basket_df['first_basket'] == first_basket_df['first_basket_pred'])
first_basket_df[['Date', 'Home', 'Away', 'first_basket', 'first_basket_tm', 'first_basket_pred']]

Unnamed: 0,game_id,Date,Home,Away,first_basket,first_basket_tm,first_basket_pred
0,202411100DEN,2024-11-10,DEN,DAL,murraja01,DEN,watsope01
1,202411100DET,2024-11-10,DET,HOU,stewais01,DET,smithja05
2,202411100IND,2024-11-10,IND,NYK,hartjo01,NYK,townska01
3,202411100LAL,2024-11-10,LAL,TOR,dickgr01,TOR,quickim01
4,202411100MIL,2024-11-10,MIL,BOS,antetgi01,MIL,brownja02
5,202411100MIN,2024-11-10,MIN,MIA,edwaran01,MIN,mcdanja02
6,202411100OKC,2024-11-10,OKC,GSW,dortlu01,OKC,willija06
7,202411100ORL,2024-11-10,ORL,WAS,coulibi01,WAS,bitadgo01
8,202411100PHI,2024-11-10,PHI,CHO,ballla01,CHO,greenjo02
9,202411100PHO,2024-11-10,PHO,SAC,dunnry01,PHO,jonesty01


In [64]:
first_basket_df = first_basket_df[['game_id', 'Date', 'Home', 'Away', 'first_basket', 'first_basket_tm', 'first_basket_pred', 'correct_pred']]

In [69]:
acc = first_basket_df['correct_pred'].mean()
print(f'Accuracy: {round(100 * acc, 1)}%  [{first_basket_df["correct_pred"].sum()}/{first_basket_df.shape[0]}]')
first_basket_df

Accuracy: 0.0%  [0/11]


Unnamed: 0,game_id,Date,Home,Away,first_basket,first_basket_tm,first_basket_pred,correct_pred
0,202411100DEN,2024-11-10,DEN,DAL,murraja01,DEN,watsope01,False
1,202411100DET,2024-11-10,DET,HOU,stewais01,DET,smithja05,False
2,202411100IND,2024-11-10,IND,NYK,hartjo01,NYK,townska01,False
3,202411100LAL,2024-11-10,LAL,TOR,dickgr01,TOR,quickim01,False
4,202411100MIL,2024-11-10,MIL,BOS,antetgi01,MIL,brownja02,False
5,202411100MIN,2024-11-10,MIN,MIA,edwaran01,MIN,mcdanja02,False
6,202411100OKC,2024-11-10,OKC,GSW,dortlu01,OKC,willija06,False
7,202411100ORL,2024-11-10,ORL,WAS,coulibi01,WAS,bitadgo01,False
8,202411100PHI,2024-11-10,PHI,CHO,ballla01,CHO,greenjo02,False
9,202411100PHO,2024-11-10,PHO,SAC,dunnry01,PHO,jonesty01,False


In [8]:
from unidecode import unidecode
import string
from fuzzywuzzy import process



In [123]:
def normalize_name(x) :

    for suffix in [' Jr.', ' Sr.', ' III', ' II', ' IV', ' Jr', ' Sr'] :
        x = x.replace(suffix, '')
    x = x.translate(str.maketrans('', '', string.punctuation))
    x = unidecode(x).lower()

    return x

In [126]:
def get_ratings(year) :
    url = f'https://hoopshype.com/nba2k/{year-1}-{year}/'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html')
    table = soup.find('table')
    ratings = pd.read_html(str(table))[0]
    ratings.columns = ['drop', 'name', 'rating']
    ratings = ratings.drop(columns = 'drop')
    ratings['name_norm'] = ratings['name'].apply(normalize_name)
    ratings = ratings[['name_norm', 'rating']]
    return ratings

In [127]:
ratings = get_ratings(2025)

In [128]:
def get_players(year) :

    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'lxml')
    table = soup.find('table')
    while table.find_all('tr', class_ = 'thead') :
        table.find('tr', class_ = 'thead')
    try :
        table.find('tr', class_ = 'norank').decompose()
    except :
        pass

    players = pd.read_html(str(table))[0].rename(columns = {'Player': 'name'})
    players['name_norm'] = players['name'].apply(normalize_name)
    players['player_id'] = [getId(x) for x in table.find_all('a', href = True) if 'players' in x['href']]
    players = players.copy()[players['GS'] > 0]
    players = players[['name', 'name_norm', 'player_id']].drop_duplicates().reset_index(drop = True)

    return players


In [130]:
merged = pd.merge(players, ratings, on = 'name_norm', how = 'left')

In [150]:
name_map = {'dennis schroeder': 'dennis schroder',
 'santiago aldama': 'santi aldama',
 'scottie pippen': 'scotty pippen',
 'nicolas claxton': 'nic claxton',
 'herb jones': 'herbert jones'}

In [131]:
merged[merged['rating'].isna()]

Unnamed: 0,name,name_norm,player_id,rating
36,Domantas Sabonis,domantas sabonis,sabondo01,
44,Dennis Schröder,dennis schroder,schrode01,
50,Desmond Bane,desmond bane,banede01,
105,Santi Aldama,santi aldama,aldamsa01,
111,Scotty Pippen Jr.,scotty pippen,pippesc02,
112,Brandon Boston Jr.,brandon boston,bostobr01,
149,Nic Claxton,nic claxton,claxtni01,
152,Alex Sarr,alex sarr,sarral01,
158,Al Horford,al horford,horfoal01,
183,Herbert Jones,herbert jones,joneshe01,


In [121]:
normalize_name('Trey Murphy III')

'trey murphyi'

In [143]:
for name_norm in merged[merged['rating'].isna()]['name_norm'] :

    closest_match, match_score = process.extractOne(name_norm, ratings['name_norm'].to_list())

    print(name_norm, closest_match, match_score)

domantas sabonis matas buzelis 62
dennis schroder dennis schroeder 97
desmond bane emoni bates 70
santi aldama santiago aldama 89
scotty pippen scottie pippen 89
brandon boston brandon ingram 64
nic claxton nicolas claxton 85
alex sarr alex caruso 70
al horford jalen hoodschifino 63
herbert jones herb jones 87
andre jackson jaren jackson 85
doug mcdermott josh minott 48


In [149]:
with open('utils/name_map_2k.json', 'w') as f:
    json.dump(name_map, f)

with open('utils/name_map_2k.json', 'r') as f:
    loaded_data = json.load(f)

In [4]:
import json
import pandas as pd
import requests
from bs4 import BeautifulSoup

from unidecode import unidecode
import string
from fuzzywuzzy import process
from scrape import getId

In [9]:
def normalize_name(x) :

    for suffix in [' Jr.', ' Sr.', ' III', ' II', ' IV', ' Jr', ' Sr'] :
        x = x.replace(suffix, '')
    x = x.translate(str.maketrans('', '', string.punctuation))

    return unidecode(x).lower()

def get_ratings(year) :

    url = f'https://hoopshype.com/nba2k/{year-1}-{year}/'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html')
    table = soup.find('table')
    ratings = pd.read_html(str(table))[0]
    ratings.columns = ['drop', 'name', 'rating']
    ratings = ratings.drop(columns = 'drop')
    ratings['name_norm'] = ratings['name'].apply(normalize_name)
    ratings = ratings[['name_norm', 'rating']]

    return ratings

def get_players(year) :

    url = f'https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html'
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'lxml')
    table = soup.find('table')
    while table.find_all('tr', class_ = 'thead') :
        table.find('tr', class_ = 'thead')
    try :
        table.find('tr', class_ = 'norank').decompose()
    except :
        pass

    players = pd.read_html(str(table))[0].rename(columns = {'Player': 'name'})
    players['name_norm'] = players['name'].apply(normalize_name)
    players['player_id'] = [getId(x) for x in table.find_all('a', href = True) if 'players' in x['href']]
    players = players.copy()[players['GS'] > 0]
    players = players.drop_duplicates().reset_index(drop = True)

    return players

In [10]:
season = 2025

ratings = get_ratings(season)
players = get_players(season)

In [91]:
# Replace with name_map_2k
with open('utils/name_map_2k.json', 'r') as f:
    name_map = json.load(f)
ratings['name_norm'] = ratings['name_norm'].apply(lambda x: name_map[x] if x in name_map.keys() else x)

# Initial merge
players_ratings = pd.merge(players, ratings,
                  how = 'left',
                  on = 'name_norm')


# Check for duplicates
players_ratings['player_id'].loc[221] = 'antetgi01'
if players_ratings['player_id'].value_counts().max() > 1 :
    duplicated_ratings = players_ratings.copy()[players_ratings['player_id'].duplicated(keep = False)]
    duplicated_ids = duplicated_ratings['player_id'].unique().tolist()
    players_ratings['rating'].loc[duplicated_ratings.index] = np.nan
    print(f'!!!  playerId {", ".join(duplicated_ids)} duplicated, ratings set to NULL')

# Suggested additions to name_map_2k

# Hard-coded ratings (the Sabonis)
with open('utils/name_map_2k.json', 'r') as f:
    name_map = json.load(f)
ratings['name_norm'] = ratings['name_norm'].apply(lambda x: name_map[x] if x in name_map.keys() else x)

# Infer from kNN

!!!  playerId antetgi01 duplicated, ratings set to NULL


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_ratings['player_id'].loc[221] = 'antetgi01'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_ratings['rating'].loc[duplicated_ratings.index] = np.nan


In [93]:
players_ratings[players_ratings['rating'].isna()]

Unnamed: 0,Rk,name,Age,Team,Pos,G,GS,MP,FG,FGA,...,AST,STL,BLK,TOV,PF,PTS,Awards,name_norm,player_id,rating
0,1,Giannis Antetokounmpo,30,MIL,PF,9,9,34.8,12.9,21.2,...,5.2,0.4,0.9,2.7,3.6,31.6,,giannis antetokounmpo,antetgi01,
36,37,Domantas Sabonis,28,SAC,C,11,11,36.8,7.9,12.5,...,6.6,0.7,0.2,3.0,4.0,20.7,,domantas sabonis,sabondo01,
50,51,Desmond Bane,26,MEM,SG,5,5,26.4,7.0,14.4,...,2.8,1.0,0.6,1.8,3.0,18.8,,desmond bane,banede01,
112,121,Brandon Boston Jr.,23,NOP,SG,8,4,26.9,4.5,9.8,...,3.9,1.4,0.3,0.8,1.4,12.0,,brandon boston,bostobr01,
152,180,Alex Sarr,19,WAS,PF,9,9,25.3,3.2,9.8,...,2.2,0.6,2.4,1.0,2.9,9.2,,alex sarr,sarral01,
158,190,Al Horford,38,BOS,C,10,10,26.7,3.2,6.6,...,2.4,1.1,0.7,1.0,1.3,8.7,,al horford,horfoal01,
183,229,Herbert Jones,26,NOP,SF,4,4,28.3,3.0,6.3,...,1.0,1.8,0.3,1.3,2.3,6.8,,herbert jones,joneshe01,
218,335,Andre Jackson Jr.,23,MIL,SG,9,3,15.7,1.1,2.8,...,1.6,0.9,0.3,1.3,2.3,3.0,,andre jackson,jacksan01,
220,343,Doug McDermott,33,SAC,SF,7,1,7.6,0.9,2.9,...,0.0,0.1,0.0,0.0,0.7,2.9,,doug mcdermott,mcderdo01,
221,377,Xavier Tillman Sr.,26,BOS,PF,9,1,10.2,0.7,2.6,...,0.7,0.3,0.0,0.3,0.7,1.7,,xavier tillman,antetgi01,


In [79]:
players_ratings

Unnamed: 0,Rk,name,Age,Team,Pos,G,GS,MP,FG,FGA,...,AST,STL,BLK,TOV,PF,PTS,Awards,name_norm,player_id,rating
0,1,Giannis Antetokounmpo,30,MIL,PF,9,9,34.8,12.9,21.2,...,5.2,0.4,0.9,2.7,3.6,31.6,,giannis antetokounmpo,antetgi01,
1,2,Anthony Davis,31,LAL,PF,9,9,35.1,10.8,18.7,...,2.8,1.3,2.0,2.2,1.2,31.2,,anthony davis,davisan02,94.0
2,3,Jayson Tatum,26,BOS,SF,11,11,36.0,9.5,20.5,...,5.0,1.6,0.5,2.9,2.5,30.5,,jayson tatum,tatumja01,95.0
3,4,Nikola Jokić,29,DEN,C,10,10,38.1,10.8,19.2,...,11.7,1.7,1.0,4.1,2.0,29.7,,nikola jokic,jokicni01,97.0
4,5,LaMelo Ball,23,CHO,PG,10,10,33.4,10.2,23.0,...,6.2,1.5,0.3,4.7,4.1,29.4,,lamelo ball,ballla01,87.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,319,Isaiah Collier,20,UTA,PG,2,1,14.5,1.5,5.0,...,2.5,0.5,0.5,0.5,0.5,3.5,,isaiah collier,colliis01,70.0
218,335,Andre Jackson Jr.,23,MIL,SG,9,3,15.7,1.1,2.8,...,1.6,0.9,0.3,1.3,2.3,3.0,,andre jackson,jacksan01,
219,339,Tidjane Salaün,19,CHO,PF,8,2,16.8,1.1,4.3,...,1.0,0.4,0.0,0.9,0.8,3.0,,tidjane salaun,salauti01,71.0
220,343,Doug McDermott,33,SAC,SF,7,1,7.6,0.9,2.9,...,0.0,0.1,0.0,0.0,0.7,2.9,,doug mcdermott,mcderdo01,


In [60]:
players_ratings['player_id'].value_counts()

antetgi01    1
sarral01     1
drumman01    1
moodymo01    1
carrica01    1
            ..
anunoog01    1
georgke01    1
wiggian01    1
gordoaa01    1
tillmxa01    1
Name: player_id, Length: 222, dtype: int64

In [63]:
players_ratings

Unnamed: 0,Rk,name,Age,Team,Pos,G,GS,MP,FG,FGA,...,AST,STL,BLK,TOV,PF,PTS,Awards,name_norm,player_id,rating
0,1,Giannis Antetokounmpo,30,MIL,PF,9,9,34.8,12.9,21.2,...,5.2,0.4,0.9,2.7,3.6,31.6,,giannis antetokounmpo,antetgi01,97.0
1,2,Anthony Davis,31,LAL,PF,9,9,35.1,10.8,18.7,...,2.8,1.3,2.0,2.2,1.2,31.2,,anthony davis,davisan02,94.0
2,3,Jayson Tatum,26,BOS,SF,11,11,36.0,9.5,20.5,...,5.0,1.6,0.5,2.9,2.5,30.5,,jayson tatum,tatumja01,95.0
3,4,Nikola Jokić,29,DEN,C,10,10,38.1,10.8,19.2,...,11.7,1.7,1.0,4.1,2.0,29.7,,nikola jokic,jokicni01,97.0
4,5,LaMelo Ball,23,CHO,PG,10,10,33.4,10.2,23.0,...,6.2,1.5,0.3,4.7,4.1,29.4,,lamelo ball,ballla01,87.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,319,Isaiah Collier,20,UTA,PG,2,1,14.5,1.5,5.0,...,2.5,0.5,0.5,0.5,0.5,3.5,,isaiah collier,colliis01,70.0
218,335,Andre Jackson Jr.,23,MIL,SG,9,3,15.7,1.1,2.8,...,1.6,0.9,0.3,1.3,2.3,3.0,,andre jackson,jacksan01,
219,339,Tidjane Salaün,19,CHO,PF,8,2,16.8,1.1,4.3,...,1.0,0.4,0.0,0.9,0.8,3.0,,tidjane salaun,salauti01,71.0
220,343,Doug McDermott,33,SAC,SF,7,1,7.6,0.9,2.9,...,0.0,0.1,0.0,0.0,0.7,2.9,,doug mcdermott,mcderdo01,
