In [1]:
import pandas as pd
from helpers.scrape import get_roster
from time import sleep
from IPython.display import clear_output

In [2]:
game_ids = pd.read_csv('data/first_basket_2025.csv')['game_id'].to_list()
for i, game_id in enumerate(game_ids) :

    print(f'[{i+1}/{len(game_ids)}] {game_id}')
    roster = get_roster(game_id)
    roster['starter'] = roster.groupby(['Team']).cumcount() < 5

    roster.to_csv('data/rosters.nosync/rosters_2025.csv', index = None, header = None, mode = 'a')
    sleep(5)
    clear_output()

In [6]:
import pandas as pd
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
import requests
import os
import json



if os.getenv("GITHUB_ACTIONS") == "true" :
  api_key = os.getenv('ODDS_API_KEY')
else :
  with open('secrets/odds_api_key.txt') as f:
    api_key = f.read()

with open('utils/odds_tm_map.json', 'r') as f :
  odds_tm_map = json.load(f)

games = pd.read_csv('data/games.csv')

# Store subset of games in the next 30 minutes
now = datetime.now(ZoneInfo('America/New_York'))
games_now = games[(games['Time'] > now.strftime('%Y-%m-%d %H:%M:%S')) & (games['Time'] <= (now + timedelta(minutes = 220)).strftime('%Y-%m-%d %H:%M:%S'))]
print(games_now.shape)

(2, 7)


In [7]:
games_now

Unnamed: 0,game_id,Date,Time,Home,Away,event_id,insert_timestamp_utc
118,202412070NOP,2024-12-07,2024-12-07 19:00:00,NOP,OKC,53f2f3e546f666cc19dc82a02f0e76fe,2024-12-07 14:14:07.788716+00:00
119,202412070WAS,2024-12-07,2024-12-07 19:00:00,WAS,DEN,2d89d4690f6f62dd422e539fbb16c997,2024-12-07 14:14:07.788716+00:00


In [8]:

# Iterate through games
for _, game in games_now.iterrows() :

    eventId = game['event_id']
    print(f'Querying game {game["game_id"]}...')

    odds_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events/{eventId}/odds',
                             params = {'apiKey': api_key,
                                       'regions': 'us',
                                       'markets': 'player_first_basket',
                                       'oddsFormat': 'decimal'})
    
    bm_dfs = [pd.DataFrame(columns = ['name', 'price', 'bookmaker', 'update_time'])]
    for bookmaker in odds_response.json()['bookmakers'] :
        
        bm_df = pd.DataFrame(bookmaker['markets'][0]['outcomes'])
        bm_df['bookmaker'] = bookmaker['key']
        bm_df['update_time'] = bookmaker['markets'][0]['last_update']

        print(f'Found {len(bm_df)} lines from {bookmaker}')

        bm_dfs.append(
            bm_df
            .drop(columns = 'name')
            .rename(columns = {'description': 'name'})
        )

    game_df = pd.concat(bm_dfs).reset_index(drop = True)
    game_df['game_id'] = game['game_id']
    game_df['event_id'] = game['event_id']

    game_df['insert_timestamp_utc'] = datetime.now(timezone.utc)
    # game_df.to_csv('data/odds_first_basket.csv', index = None, header = None, mode = 'a')

Querying game 202412070NOP...
Found 10 lines from {'key': 'draftkings', 'title': 'DraftKings', 'markets': [{'key': 'player_first_basket', 'last_update': '2024-12-07T20:28:21Z', 'outcomes': [{'name': 'Yes', 'description': 'Shai Gilgeous-Alexander', 'price': 5.75}, {'name': 'Yes', 'description': 'Brandon Ingram', 'price': 6.5}, {'name': 'Yes', 'description': 'Jalen Williams', 'price': 6.5}, {'name': 'Yes', 'description': 'Isaiah Hartenstein', 'price': 7.0}, {'name': 'Yes', 'description': 'C.J. McCollum', 'price': 9.0}, {'name': 'Yes', 'description': 'Yves Missi', 'price': 9.5}, {'name': 'Yes', 'description': 'Dejounte Murray', 'price': 10.0}, {'name': 'Yes', 'description': 'Luguentz Dort', 'price': 10.5}, {'name': 'Yes', 'description': 'Isaiah Joe', 'price': 11.0}, {'name': 'Yes', 'description': 'Herb Jones', 'price': 15.0}]}]}
Found 10 lines from {'key': 'fanduel', 'title': 'FanDuel', 'markets': [{'key': 'player_first_basket', 'last_update': '2024-12-07T20:28:35Z', 'outcomes': [{'name':

  game_df = pd.concat(bm_dfs).reset_index(drop = True)
  game_df = pd.concat(bm_dfs).reset_index(drop = True)


In [10]:
odds_response.json()['bookmakers']

[{'key': 'fanduel',
  'title': 'FanDuel',
  'markets': [{'key': 'player_first_basket',
    'last_update': '2024-12-07T20:27:54Z',
    'outcomes': [{'name': 'Yes', 'description': 'Nikola Jokic', 'price': 5.1},
     {'name': 'Yes', 'description': 'Jordan Poole', 'price': 7.5},
     {'name': 'Yes', 'description': 'Michael Porter Jr', 'price': 8.0},
     {'name': 'Yes', 'description': 'Marvin Bagley III', 'price': 9.5},
     {'name': 'Yes', 'description': 'Christian Braun', 'price': 10.0},
     {'name': 'Yes', 'description': 'Bilal Coulibaly', 'price': 11.0},
     {'name': 'Yes', 'description': 'Carlton Carrington', 'price': 14.0}]}]},
 {'key': 'draftkings',
  'title': 'DraftKings',
  'markets': [{'key': 'player_first_basket',
    'last_update': '2024-12-07T20:28:00Z',
    'outcomes': [{'name': 'Yes', 'description': 'Nikola Jokic', 'price': 5.0},
     {'name': 'Yes', 'description': 'Marvin Bagley III', 'price': 7.0},
     {'name': 'Yes', 'description': 'Michael Porter Jr', 'price': 7.5},
 

In [14]:
import requests
from bs4 import BeautifulSoup
import pytz
import os
import json

season = 2025
today = datetime.now(ZoneInfo('America/New_York')) + timedelta(days = 1)
tmrw_utc = (today + timedelta(days = 1)).astimezone(pytz.utc)


url = f'https://www.basketball-reference.com/leagues/NBA_{season}_games-{today.strftime("%B").lower()}.html'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'lxml')
table = soup.find('table')
while table.find_all('tr', class_ = 'thead') :
    table.find('tr', class_ = 'thead').decompose()
games = pd.read_html(str(table))[0].rename(columns = {'Start (ET)': 'Time'})
games['Home'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][1::2]
games['Away'] = [x['href'].split('/')[2] for x in table.find_all('a', href = True) if 'teams' in x['href']][0::2]
games['Date'] = pd.to_datetime(games['Date'])
games = games[pd.to_datetime(games['Date']) == pd.to_datetime(today.date())].reset_index(drop = True)
games['Time'] = (games['Date'].astype(str) + ' ' +  games['Time']).apply(lambda x: datetime.strptime(x.upper() + 'M', "%Y-%m-%d %I:%M%p"))
games['game_id'] = games['Date'].apply(lambda x: datetime.strftime(x, "%Y%m%d")) + '0' + games['Home']
games = games[['game_id', 'Date', 'Time', 'Home', 'Away']]

  games = pd.read_html(str(table))[0].rename(columns = {'Start (ET)': 'Time'})


In [15]:
if os.getenv("GITHUB_ACTIONS") == "true" :
  api_key = os.getenv('ODDS_API_KEY')
else :
  with open('secrets/odds_api_key.txt') as f:
    api_key = f.read()

with open('utils/odds_tm_map.json', 'r') as f :
  odds_tm_map = json.load(f)

events_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events',
                               params = {'apiKey': api_key,
                                         'commenceTimeTo': tmrw_utc.strftime('%Y-%m-%dT%H:%M:%SZ')})


assert events_response.status_code == 200, f'Odds API query not successful {events_response.status_code}'

# Map odds API event_id to game_id
odds = pd.DataFrame(events_response.json()).rename(columns = {'id': 'event_id'})
odds['game_id'] = today.strftime('%Y%m%d') + '0' + odds['home_team'].map(odds_tm_map)

games = games.merge(
    odds[['game_id', 'event_id']],
    on = 'game_id',
    how = 'left'
)

games['insert_timestamp_utc'] = datetime.now(timezone.utc)

In [19]:
import pandas as pd

odds = pd.read_csv('data/odds_first_basket.csv')
odds.tail(50)

Unnamed: 0,name,price,bookmaker,update_time,game_id,event_id,insert_timestamp_utc
229,Deandre Ayton,5.5,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
230,Ivica Zubac,7.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
231,Jerami Grant,8.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
232,James Harden,8.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
233,Anfernee Simons,8.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
234,Shaedon Sharpe,8.5,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
235,Norman Powell,9.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
236,Derrick Jones,13.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
237,Toumani Camara,14.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00
238,Kris Dunn,14.0,bovada,2024-12-04T03:12:12Z,202412030LAC,e707fb0dc8825d51e20056a4bcaa4fa8,2024-12-04 03:12:39.704266+00:00


In [5]:
import requests

In [None]:

eventId = 'f53359e43a9975db8b93a561183faeb8'

odds_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events/{eventId}/odds',
                             params = {'apiKey': api_key,
                                       'regions': 'us',
                                       'markets': 'player_first_basket',
                                       'oddsFormat': 'decimal'})

SSLError: HTTPSConnectionPool(host='api.the-odds-api.com', port=443): Max retries exceeded with url: /v4/sports/basketball_nba/events/f53359e43a9975db8b93a561183faeb8/odds?apiKey=a7bfde5bb651ac64e61f99c67631ef47&regions=us&markets=player_first_basket&oddsFormat=decimal (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1147)')))

In [None]:
# Store subset of games in the next 30 minutes
now = datetime.now(ZoneInfo('America/New_York'))
now = datetime(2024, 12, 1, 15, 2)

games_now = games[(games['Time'] > now) & (games['Time'] <= now + timedelta(minutes = 30))]

# Iterate through games
for _, game in games_now.iterrows() :

    eventId = game['event_id']

    odds_response = requests.get(f'https://api.the-odds-api.com/v4/sports/basketball_nba/events/{eventId}/odds',
                             params = {'apiKey': api_key,
                                       'regions': 'us',
                                       'markets': 'player_first_basket',
                                       'oddsFormat': 'decimal'})
    

    bm_dfs = [pd.DataFrame(columns = ['name', 'price', 'bookmaker', 'update_time'])]
    for bookmaker in odds_response.json()['bookmakers'] :
        
        bm_df = pd.DataFrame(bookmaker['markets'][0]['outcomes'])
        bm_df['bookmaker'] = bookmaker['key']
        bm_df['update_time'] = bookmaker['markets'][0]['last_update']

        bm_dfs.append(
            bm_df
            .drop(columns = 'name')
            .rename(columns = {'description': 'name'})
        )

    game_df = pd.concat(bm_dfs).reset_index(drop = True)
    game_df['game_id'] = game['game_id']
    game_df['event_id'] = game['event_id']

    game_df['insert_timestamp_utc'] = datetime.now(timezone.utc)
    game_df.to_csv('data/odds_first_basket.csv', index = None, header = None, mode = 'a')

ValueError: No objects to concatenate

In [75]:
test = pd.read_csv('data/odds_first_basket.csv')[['price']]
test['prob'] = 10 * [0.1]

In [71]:
bm_dfs = [pd.DataFrame(columns = ['name', 'price', 'bookmaker', 'update_time'])]
for bookmaker in odds_response.json()['bookmakers'] :
    
    bm_df = pd.DataFrame(bookmaker['markets'][0]['outcomes'])
    bm_df['bookmaker'] = bookmaker['key']
    bm_df['update_time'] = bookmaker['markets'][0]['last_update']

    bm_dfs.append(
        bm_df
        .drop(columns = 'name')
        .rename(columns = {'description': 'name'})
    )

game_df = pd.concat(bm_dfs).reset_index(drop = True)
game_df['game_id'] = game['game_id']
game_df['event_id'] = game['event_id']

game_df['insert_timestamp_utc'] = datetime.now(timezone.utc)
game_df.to_csv('data/odds_first_basket.csv', index = None, header = None, mode = 'a')

In [85]:
import numpy as np

In [96]:
s = 1
lmbda = 1

test = pd.read_csv('data/odds_first_basket.csv')[['price']]
test['prob'] = 1 / test['price'] * np.random.uniform(0.5, 1.5, (10,))
test['prob'] /= test['prob'].sum()
test['E'] = (test['price'] * test['prob'] - 1) * s
test['var'] = (s ** 2) * (test['prob'] * ((test['price'] - 1) ** 2) + 1 - test['prob'] - (test['prob'] * test['price'] - 1) ** 2)
test['obj'] = test['E'] + lmbda * test['var']
test

Unnamed: 0,price,prob,E,var,obj
0,5.6,0.136351,-0.236435,3.692934,3.456499
1,7.0,0.179002,0.253011,7.201038,7.454049
2,8.5,0.073685,-0.37368,4.931443,4.557763
3,9.0,0.126602,0.139419,8.956494,9.095913
4,9.0,0.109996,-0.010033,7.929672,7.919639
5,9.5,0.079522,-0.244543,6.606128,6.361585
6,10.0,0.062705,-0.372955,5.877266,5.504311
7,10.0,0.054153,-0.458466,5.122077,4.663611
8,10.5,0.058793,-0.382673,6.100837,5.718164
9,11.0,0.119192,0.311109,12.703188,13.014297


$x_i \in \{0,1\} \ \forall \ i$

$E_i = P_i o_i s_i - s_i$