In [1]:
import pandas as pd
import json
import requests
import numpy as np
from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
import pytz
from bs4 import BeautifulSoup
import requests
import os

In [19]:
import string
from unidecode import unidecode

def normalize_name(x) :

    for suffix in [' Jr.', ' Sr.', ' III', ' II', ' IV', ' Jr', ' Sr'] :
        x = x.replace(suffix, '')
    x = x.translate(str.maketrans('', '', string.punctuation))

    return unidecode(x).lower()

In [26]:
odds = pd.read_csv('../data/odds_first_basket.csv')[['name']].drop_duplicates()
metadata = pd.read_csv('../data/player_metadata.csv')[['player_id', 'name', 'name_norm']]

In [27]:
odds['name_norm'] = odds['name'].apply(normalize_name)

In [31]:
odds.merge(metadata, on = 'name_norm', suffixes = ['_odds', '_md'], how = 'left').tail(50)

Unnamed: 0,name_odds,name_norm,player_id,name_md
30,Kyrie Irving,kyrie irving,irvinky01,Kyrie Irving
31,Brandon Clarke,brandon clarke,clarkbr01,Brandon Clarke
32,Dereck Lively II,dereck lively,livelde01,Dereck Lively II
33,P.J. Washington,pj washington,washipj01,P.J. Washington
34,Klay Thompson,klay thompson,thompkl01,Klay Thompson
35,Jaylen Wells,jaylen wells,wellsja01,Jaylen Wells
36,Daniel Gafford,daniel gafford,gaffoda01,Daniel Gafford
37,Nikola Jokic,nikola jokic,jokicni01,Nikola Jokić
38,Stephen Curry,stephen curry,curryst01,Stephen Curry
39,Andrew Wiggins,andrew wiggins,wiggian01,Andrew Wiggins


In [5]:
games =  pd.read_csv('../data/rosters.nosync/rosters_2025.csv')

In [7]:
games[(games['game_id'].apply(lambda x: x.startswith('20241205'))) & (games['starter'])]

Unnamed: 0,game_id,Player,player_id,Team,MP,FG,FGA,FG%,3P,3PA,...,AST%,STL%,BLK%,TOV%,USG%,ORtg,DRtg,BPM,VORP,starter
7048,202412050CLE,Jarrett Allen,allenja01,CLE,35.366667,3,5,0.600,0,0,...,19.0,2.5,0.0,25.4,9.1,145.0,100.0,6.2,6.1,True
7049,202412050CLE,Darius Garland,garlada01,CLE,33.250000,9,20,0.450,3,10,...,17.0,0.0,0.0,4.5,27.3,115.0,111.0,-2.7,-0.5,True
7050,202412050CLE,Donovan Mitchell,mitchdo01,CLE,32.933333,11,24,0.458,6,10,...,28.2,2.7,4.0,17.2,35.8,102.0,101.0,4.9,4.8,True
7051,202412050CLE,Evan Mobley,mobleev01,CLE,32.250000,7,11,0.636,3,5,...,16.3,4.1,0.0,6.8,18.5,149.0,100.0,12.1,9.5,True
7052,202412050CLE,Isaac Okoro,okorois01,CLE,11.633333,1,2,0.500,1,2,...,19.2,0.0,0.0,0.0,7.0,171.0,111.0,0.2,0.5,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7208,202412050WAS,Luka Dončić,doncilu01,DAL,32.400000,7,17,0.412,3,10,...,39.4,2.9,2.5,17.2,29.4,120.0,93.0,3.3,3.6,True
7209,202412050WAS,Kyrie Irving,irvinky01,DAL,24.800000,10,16,0.625,3,5,...,20.3,3.8,0.0,10.4,32.0,138.0,98.0,7.6,5.0,True
7210,202412050WAS,P.J. Washington,washipj01,DAL,24.483333,2,7,0.286,2,4,...,8.9,0.0,6.6,0.0,14.7,148.0,99.0,3.2,2.6,True
7211,202412050WAS,Klay Thompson,thompkl01,DAL,21.300000,4,9,0.444,3,5,...,11.6,0.0,0.0,10.0,19.3,129.0,110.0,-1.3,0.3,True


In [5]:
season = 2025
today = datetime.now(ZoneInfo('America/New_York'))


url = f'https://www.basketball-reference.com/leagues/NBA_{season}_games-{today.strftime("%B").lower()}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
table = soup.find('table')
while table.find_all('tr', class_ = 'thead') :
    table.find('tr', class_ = 'thead').decompose()
games = pd.read_html(str(table))[0].rename(columns = {'Start (ET)': 'Time'})
games['Home'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][1::2]
games['Away'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][0::2]
games['Date'] = pd.to_datetime(games['Date'])
games = games[pd.to_datetime(games['Date']) == pd.to_datetime(today.date())].reset_index(drop = True)
games['Time'] = (games['Date'].astype(str) + ' ' +  games['Time']).apply(lambda x: datetime.strptime(x.upper() + 'M', "%Y-%m-%d %I:%M%p"))
games['game_id'] = games['Date'].apply(lambda x: datetime.strftime(x, "%Y%m%d")) + '0' + games['Home']
games = games[['game_id', 'Date', 'Time', 'Home', 'Away']]

In [9]:
if os.getenv("GITHUB_ACTIONS") == "true" :
        api_key = os.getenv('ODDS_API_KEY')
else :
    with open('secrets/odds_api_key.txt') as f:
        api_key = f.read()

In [22]:
tmrw_utc = (today + timedelta(days = 1)).astimezone(pytz.utc)

In [23]:
tmrw_utc.strftime('%Y-%m-%dT%H:%M:%SZ')

'2024-11-28T00:47:06Z'

In [24]:
events_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events',
                               params = {'apiKey': api_key,
                                         'commenceTimeTo': tmrw_utc.strftime('%Y-%m-%dT%H:%M:%SZ')})

In [43]:
odds = pd.DataFrame(events_response.json()).rename(columns = {'id': 'event_id'})
odds['commence_time'] = pd.to_datetime(odds['commence_time'])

In [44]:
odds = odds.head(5)
odds

Unnamed: 0,event_id,sport_key,sport_title,commence_time,home_team,away_team
0,8371571bb6059f03debc55b594422565,basketball_nba,NBA,2024-11-27 00:10:00+00:00,Washington Wizards,Chicago Bulls
1,979282582fedc5d942f4f0f2f22763ee,basketball_nba,NBA,2024-11-27 00:35:00+00:00,Miami Heat,Milwaukee Bucks
2,312350cf1c97873d6cb81224bac3873e,basketball_nba,NBA,2024-11-27 01:00:00+00:00,Minnesota Timberwolves,Houston Rockets
3,0a3002d2aaa7b94c5c9c4d3bf792ac7d,basketball_nba,NBA,2024-11-27 02:00:00+00:00,Utah Jazz,San Antonio Spurs
4,0ae85f4502bca2d13e62d2e35c94744c,basketball_nba,NBA,2024-11-27 03:00:00+00:00,Phoenix Suns,Los Angeles Lakers


In [45]:
with open('utils/odds_tm_map.json', 'r') as f :
    odds_tm_map = json.load(f)

In [46]:
odds['game_id'] = today.strftime('%Y%m%d') + '0' + odds['home_team'].map(odds_tm_map)

In [51]:
games.merge(
    odds[['game_id', 'event_id']],
    on = 'game_id',
    how = 'left'
)

Unnamed: 0,game_id,Date,Time,Home,Away,event_id
0,202411260WAS,2024-11-26,2024-11-26 19:00:00,WAS,CHI,8371571bb6059f03debc55b594422565
1,202411260MIA,2024-11-26,2024-11-26 19:30:00,MIA,MIL,979282582fedc5d942f4f0f2f22763ee
2,202411260MIN,2024-11-26,2024-11-26 20:00:00,MIN,HOU,312350cf1c97873d6cb81224bac3873e
3,202411260UTA,2024-11-26,2024-11-26 21:00:00,UTA,SAS,0a3002d2aaa7b94c5c9c4d3bf792ac7d
4,202411260PHO,2024-11-26,2024-11-26 22:00:00,PHO,LAL,0ae85f4502bca2d13e62d2e35c94744c


In [65]:
gms = pd.read_csv('data/games.csv')
gms.insert(5, 'event_id', np.nan)

In [67]:
gms.to_csv('data/games.csv', index = None)

In [55]:
def datetime_to_cron_utc(t) :
    t_et = pd.to_datetime(t).replace(tzinfo = pytz.timezone('US/Eastern'))
    t_utc = t_et.astimezone(pytz.utc)
    return f"{t_utc.minute} {t_utc.hour} {t_utc.day} {t_utc.month} *"

In [62]:
[(t, datetime_to_cron_utc(pd.to_datetime(t) - timedelta(minutes = 30))) for t in games['Time'].unique()]

[(numpy.datetime64('2024-11-26T19:00:00.000000000'), '30 23 26 11 *'),
 (numpy.datetime64('2024-11-26T19:30:00.000000000'), '0 0 27 11 *'),
 (numpy.datetime64('2024-11-26T20:00:00.000000000'), '30 0 27 11 *'),
 (numpy.datetime64('2024-11-26T21:00:00.000000000'), '30 1 27 11 *'),
 (numpy.datetime64('2024-11-26T22:00:00.000000000'), '30 2 27 11 *')]

In [77]:
odds_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events/{eventId}/odds',
                             params = {'apiKey': api_key,
                                       'regions': 'us',
                                       'markets': 'player_first_basket',
                                       'oddsFormat': 'decimal'})

In [78]:
fanduel_idx = np.argmax(['fanduel' in x['key'] for x in odds_response.json()['bookmakers']])
odds = odds_response.json()['bookmakers'][fanduel_idx]['markets'][0]

In [85]:
odds_df = (
    pd.DataFrame(odds['outcomes'])
    .rename(columns = {'description': 'player', 'price': 'decimal_odds'})
    .drop(columns = 'name')
)

In [86]:
odds_df['event_id'] = eventId

Unnamed: 0,player,decimal_odds
0,Deandre Ayton,6.5
1,Anthony Edwards,7.0
2,Julius Randle,7.5
3,Jerami Grant,7.5
4,Rudy Gobert,8.5
5,Anfernee Simons,9.0
6,Shaedon Sharpe,10.0
7,Toumani Camara,11.0
8,Jaden McDaniels,11.0
9,Mike Conley,14.0


In [None]:
# Scrape historical first basket
# Predict first basket scorer
#   PPG
#   VORP, OWS, ...
#   USG%
#   First basket history


# Scrape historical tip-off winners
# Predict tip-off winner
#   Height
#   Tip-off winning ratio
#   Rebounds

In [10]:
pd.DataFrame(events_response.json())

Unnamed: 0,id,sport_key,sport_title,commence_time,home_team,away_team
0,84d6b1ca05b2ad858b5b9ac52ea4b0ee,basketball_nba,NBA,2024-11-08T01:10:00Z,Chicago Bulls,Minnesota Timberwolves
1,1e6137994854a6163d6f9fea8dc6e83e,basketball_nba,NBA,2024-11-08T01:10:00Z,Milwaukee Bucks,Utah Jazz
2,808cd638f31e1657b3c5cd3841a76b4a,basketball_nba,NBA,2024-11-08T01:10:00Z,San Antonio Spurs,Portland Trail Blazers
3,74058cb479045efa21ebd5c57050e23e,basketball_nba,NBA,2024-11-13T00:00:00Z,Boston Celtics,Atlanta Hawks
4,e80e0c514aa69872360f58b079796a8b,basketball_nba,NBA,2024-11-13T00:00:00Z,Detroit Pistons,Miami Heat
5,2d650fff73dcc627c1c0de3a4694e9fc,basketball_nba,NBA,2024-11-13T00:30:00Z,Philadelphia 76ers,New York Knicks
6,30e4d7fc5396e7b0b8be16b278c123fe,basketball_nba,NBA,2024-11-13T01:00:00Z,Milwaukee Bucks,Toronto Raptors
7,6050b0be3ce7b57b77ef6695b9cecaa1,basketball_nba,NBA,2024-11-13T02:00:00Z,Utah Jazz,Phoenix Suns
8,f0bfcb563a5de00b392bda6960db1796,basketball_nba,NBA,2024-11-13T03:00:00Z,Golden State Warriors,Dallas Mavericks
9,4a86e15d2c8c52972524be69d15941cc,basketball_nba,NBA,2024-11-13T03:00:00Z,Portland Trail Blazers,Minnesota Timberwolves


In [6]:
import requests
from bs4 import BeautifulSoup
from scrape import getId

In [7]:
url = f'https://www.basketball-reference.com/boxscores/?month=11&day=12&year=2024'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')

In [8]:
page

<Response [200]>

In [9]:
game_ids = [getId(x) for x in soup.find_all('a', href = True) if 'boxscores/pbp' in x['href']]

In [10]:
game_ids

['202411120BOS',
 '202411120DET',
 '202411120GSW',
 '202411120MIL',
 '202411120ORL',
 '202411120PHI',
 '202411120POR',
 '202411120UTA']

In [12]:
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from time import sleep

from scrape import get_first_basket, getId


yst = datetime.now(ZoneInfo('America/New_York')) - timedelta(days = 1)

In [15]:
yst.month

11

In [17]:
url = 'https://www.nba.com/stats/draft/combine-anthro'
page = requests.get(url)

In [25]:
html = str(BeautifulSoup(page.content, 'lxml'))

In [39]:
html.split('<table')

['<!DOCTYPE html>\n<html data-build="17287" data-version="4.52.0" lang="en"><head><meta charset="utf-8"/><link href="/site-manifest.json" rel="manifest"/><meta content="width=device-width,initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no" name="viewport"/><script src="/newrelic/newrelic-prod.js"></script><script id="theme-loader" src="/theme-loader.js"></script><title>Draft Combine Anthrometric | Stats | NBA.com</title><meta content="NBA Draft Combine Anthrometric data" name="description"/><link href="/favicon.ico" rel="icon"/><link href="/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/><link href="/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/><link href="/apple-touch-icon.png" rel="apple-touch-icon"/><link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180"/><link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="120x120"/><link href="/apple-touch-icon.png"/><meta content="#da532c" name="msapplication-TileColor"/><