In [None]:
# As with the data analysis file, this requires nba_api to be loaded:
#  pip install nba_api

import pandas as pd
import numpy as np
import json
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelogs
from scipy.stats import norm
from scipy.stats import poisson

pd.options.display.max_columns = None

# Sports books have a "money line" for each bet, which reflects the bet's payout.
# For example, -140 means that each $140 bet on an event will pay out $100,
#   and +140 means that each $100 bet will pay out $140.
# That payout can also be expressed as a probability that the sports book
#   thinks a certain result will happen: -140 -> 100 / (100+140) -> 41.67%.
# So the key is to bet when the payout is too high compared to the probability 
#   that the player believes the event will happen.

# Thus, we will need a function to convert a probability to a money line

def getMLfromProb(theProb):
    if theProb == 0.5:
        theML = 100
    elif theProb < 0.5:
        theML = ((1 - theProb) / theProb) * 100
    elif theProb > 0.5:
        theML = (theProb / (1 - theProb)) * -100.
    return theML

# And also the reverse: a function to convert a money line to a probability

def getProbFromML(theml):
    if theml > 0:
        theprob = 100 / (100 + theml)
    elif theml < 0:
        theprob = abs(theml) / (100 + abs(theml))
    return theprob

In [None]:
# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()

# Size for the simulations
test_size = 1000000  

# Seasons for analysis: the current one and the previous one
seasons = ["2022-23", "2023-24"]

In [None]:
# function to get all games from this year and last year for a player

def getPlayerGames(aPlayer, aMetricDict, aSeasonList):
    # aPlayer is a list of player names, i.e., ["LeBron James"]
    #   Note that some props may have multiple players, so aPlayer is a list
    # aMetricDict is a dict of all the metrics involved in the bet, i.e.,
    #   {'PTS': 34.5, 'REB': 9.5, 'AST': 9.5}
    
    chosen_players = [player for player in nba_players if player['full_name'] in aPlayer]
    player_df = pd.DataFrame(chosen_players)
    chosen_player_ids = player_df['id']
    
    pg_output = pd.DataFrame()
    for eachID in chosen_player_ids:
        for eachSeason in aSeasonList:
            the_playerseason_log = playergamelogs.PlayerGameLogs(player_id_nullable = eachID,
                                                       season_nullable = eachSeason).get_normalized_json()
            tpsl_df = pd.DataFrame(json.loads(the_playerseason_log)['PlayerGameLogs'])
            pg_output = pg_output._append(tpsl_df)
            
    the_metric_list = list(aMetricDict.keys())
    cols_to_keep = ["PLAYER_NAME", "MIN"] + the_metric_list
    pg_output = pg_output[cols_to_keep]

    return pg_output

In [None]:
# Now, simulate random variables with each player's avg and stdev performance
#    and see from those variables what is the prob of going over.

# Get player moments (mean and stdev) for each metric

def getPlayerMeans(pgdf):
    theOutput = pgdf\
        .groupby(['PLAYER_NAME'])\
        .agg(lambda x : x.head(1) if x.dtype=='object' else x.mean())
    return theOutput

def getPlayerStdev(pgdf):
    theOutput = pgdf\
        .groupby(['PLAYER_NAME'])\
        .agg(lambda x : x.head(1) if x.dtype=='object' else x.std())
    return theOutput


In [None]:
# return a dict of info for each player 

def makePlayerDict(aPlayer, aMetricDict):
    aMetricList = list(aMetricDict.keys())
    playerDF = getPlayerGames(aPlayer, aMetricDict, seasons)
    player_dict = dict()
    for eachMetric in aMetricList:
        allPlayerMeans = getPlayerMeans(playerDF)
        allPlayerSTD = getPlayerStdev(playerDF)
        playerMean = allPlayerMeans.loc[aPlayer][eachMetric]
        playerStd = allPlayerSTD.loc[aPlayer][eachMetric]
        playerLine = aMetricDict[eachMetric]
        # From the data analysis file: points are Gaussian...
        if eachMetric == 'PTS':
            metricSim = norm.rvs(playerMean, scale=playerStd, size=test_size)
            metric_update = {"Stat": eachMetric,
                             "Line": playerLine,
                             "p_over": len(metricSim[metricSim > playerLine]) / len(metricSim)}
        # ...and other metrics are Poisson
        else:
            metricSim = poisson.rvs(playerMean, size=test_size)
            metric_update = {"Stat": eachMetric,
                             "Line": playerLine, 
                             "p_over": len(metricSim[metricSim > playerLine]) / len(metricSim)}
        player_dict.update(metric_update)
    return player_dict

In [None]:
# Now make a dict of the player dicts, for multiple-player props

def makePropDict(aPlayerList, aMetricDict):
    the_bet_dict = dict()
    for eachPlayer in aPlayerList:
        if eachPlayer not in the_bet_dict.keys():
            to_update = makePlayerDict(eachPlayer, aMetricDict)
            the_bet_dict[eachPlayer] = to_update
    return the_bet_dict

In [None]:
# A type of prop that we are not yet prepared for is a sum:
#   Either sum across a player (Player A's PTS + REB + AST > Some line), 
#   Or sum across multiple players (A and B combine for more than X points).

def getPlayerSumProb(aPlayer, isHome, aMetricDict):
    # Applies to cases where one player is being summarized across multiple metrics.
    # Here "aMetricDict" should look like ['PTS':0, 'REB':0, ... , 'SUM':X]
    aMetricList = list(aMetricDict.keys())

    # Since 'SUM' is not a real metric in observed data, we have to 
    #   do some tricks with aMetricDict to avoid errors
    playerLine = aMetricDict['SUM']
    newMetricDict = list(aMetricDict.items())
    newMetricDict = dict(newMetricDict[0:-1])

    # Now get the player data
    playerDF = getPlayerGames(aPlayer, newMetricDict, seasons)
    
    # Create a series to contain the sum of the various simulations
    sumSim = np.zeros(test_size)
    for eachMetric in aMetricList:
        allPlayerMeans = getPlayerMeans(playerDF)
        allPlayerSTD = getPlayerStdev(playerDF)
        if eachMetric == 'PTS':
            playerMean = allPlayerMeans.loc[aPlayer][eachMetric]
            playerStd = allPlayerSTD.loc[aPlayer][eachMetric]
            metricSim = norm.rvs(playerMean, scale=playerStd, size=test_size)
            sumSim = sumSim + metricSim
        elif eachMetric == 'SUM': 
            continue
        else:
            playerMean = allPlayerMeans.loc[aPlayer][eachMetric]
            playerStd = allPlayerSTD.loc[aPlayer][eachMetric]
            metricSim = poisson.rvs(playerMean, size=test_size)
            sumSim = sumSim + metricSim
    p_over = len(sumSim[sumSim > playerLine]) / len(sumSim)
    return p_over

In [None]:
# Bring the previous functions all together

def resolvePropBet(list_of_players, dict_of_props):
    # Structure of non-sum props: {'(metric1)': (line), ...}
    # Structure of sum props: {'(metric1)': 0, ..., '(metricN)': 0, 'SUM': 0}
    stats_to_bet = list(dict_of_props.keys())
    p_of_over = 1
    if 'SUM' in stats_to_bet:
        for player in list_of_players:
            p_player = getPlayerSumProb(player, dict_of_props)
            p_of_over = p_of_over * p_player
    else:
        prop_test = makePropDict(list_of_players, dict_of_props) 
        for leg in prop_test:
            p_of_over = p_of_over * prop_test.get(leg)['p_over']
    return print("Over ->", round(p_of_over * 100, 2), "%, or a price of", round(getMLfromProb(p_of_over), 0))
