In [1]:
import pandas as pd
import numpy as np
import random
import math
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
games = pd.read_csv('nba_data/FiveThirtyEight/nba_elo.csv').filter(['date', 'season', 'playoff', 'team1', 'team2', 'score1', 'score2'])
upcoming_games = games[games.score1.isnull()]
games = games[games.score1.notnull()]

In [3]:
def simulate_game(teamA_mean_pts, teamA_sd_pts, teamA_mean_against, teamA_sd_against, teamB_mean_pts, teamB_sd_pts, teamB_mean_against, teamB_sd_against):
    teamA_score = ( random.gauss(teamA_mean_pts, teamA_sd_pts) + random.gauss(teamB_mean_against, teamB_sd_against) ) / 2
    teamB_score = ( random.gauss(teamB_mean_pts, teamB_sd_pts) + random.gauss(teamA_mean_against, teamA_sd_against) ) / 2
    delta = teamA_score - teamB_score
    return delta

In [4]:
def simulate_matchup(teamA, teamB, n=20000, season=datetime.now().year, only_regular_season=True):
    games_season = games[games.season == season]
    if only_regular_season:
        games_season = games_season[games_season.playoff.isnull()]

    teamA_total_pts = games_season[games_season.team1 == teamA]["score1"].sum() + games_season[games_season.team2 == teamA]["score2"].sum()
    teamB_total_pts = games_season[games_season.team1 == teamB]["score1"].sum() + games_season[games_season.team2 == teamB]["score2"].sum()

    teamA_scores = []
    teamA_against = []

    teamB_scores = []
    teamB_against = []

    for idx, teamA_game in games_season[ (games_season.team1 == teamA) | (games_season.team2 == teamA)].iterrows():
        if teamA_game.team1 == teamA:
            teamA_scores.append(teamA_game.score1)
            teamA_against.append(teamA_game.score2)
        else:
            teamA_scores.append(teamA_game.score2)
            teamA_against.append(teamA_game.score1)

    for idx, teamB_game in games_season[ (games_season.team1 == teamB) | (games_season.team2 == teamB)].iterrows():
        if teamB_game.team1 == teamB:
            teamB_scores.append(teamB_game.score1)
            teamB_against.append(teamB_game.score2)
        else:
            teamB_scores.append(teamB_game.score2)
            teamB_against.append(teamB_game.score1)

    teamA_mean_pts = np.mean(teamA_scores)
    teamA_sd_pts = np.std(teamA_scores)
    teamA_mean_against = np.mean(teamA_against)
    teamA_sd_against = np.std(teamA_against)

    teamB_mean_pts = np.mean(teamB_scores)
    teamB_sd_pts = np.std(teamB_scores)
    teamB_mean_against = np.mean(teamB_against)
    teamB_sd_against = np.std(teamB_against)

    simulations = []
    teamA_wins = 0
    teamB_wins = 0
    ties = 0
    for i in range(n):
        gm = simulate_game(teamA_mean_pts, teamA_sd_pts, teamA_mean_against, teamA_sd_against, teamB_mean_pts, teamB_sd_pts, teamB_mean_against, teamB_sd_against)
        simulations.append(gm)
        if gm > 0:
            teamA_wins += 1
        elif gm < 0:
            teamB_wins += 1
        else:
            ties += 1

    print("{}: {}%\n{}: {}%".format(teamA, (teamA_wins * 100 / n), teamB, (teamB_wins * 100 / n)))
    return simulations
    #return ( (teamA_wins * 100 / n), (teamB_wins * 100 / n) )

In [5]:
simulate_matchup("LAC", "LAL", season=2020)

LAC: 48.67%
LAL: 51.33%


[-12.345928108334647,
 -31.10762570869585,
 11.61907538886939,
 -23.718823589783057,
 3.9679668058334983,
 1.4798129225991943,
 5.031501015136001,
 22.25868434528867,
 0.7692409703080187,
 9.25998395111256,
 16.092913089454186,
 -19.735683314602227,
 -12.517865803076646,
 -22.939938355052632,
 7.335937931334158,
 18.575649301738025,
 12.120434425640894,
 -8.303228510690218,
 -21.02409378865731,
 -1.1432600336130747,
 -10.583127557998239,
 -13.137556725388379,
 -2.1069476847823694,
 11.354360571117596,
 -17.27130315601346,
 3.8602271537174175,
 15.118964682068679,
 -13.269564064928133,
 -12.015118076496691,
 0.7792101342337361,
 13.099092401983441,
 10.177176117985127,
 1.7098193321077275,
 -14.26979321999083,
 23.902393812825352,
 -10.733840644701672,
 -8.454709904965313,
 1.0375377723590162,
 -0.8372803365140982,
 -4.148291626851773,
 23.51706272619522,
 -13.770901001452714,
 -17.417035150487294,
 23.070965989572528,
 12.255384467535805,
 7.259350755277126,
 4.4940491372265825,
 11.95

In [33]:
for idx, matchup in upcoming_games.iterrows():
    print("{} vs {}".format(matchup.team1, matchup.team2))
    simulate_matchup(matchup.team1, matchup.team2)
    print("-" * 30)
print("DONE")

NOP vs UTA
NOP: 43.045%
UTA: 56.955%
------------------------------
LAL vs LAC
LAL: 51.36%
LAC: 48.64%
------------------------------
BRK vs ORL
BRK: 50.825%
ORL: 49.175%
------------------------------
WAS vs PHO
WAS: 45.995%
PHO: 54.005%
------------------------------
POR vs MEM
POR: 49.15%
MEM: 50.85%
------------------------------
MIL vs BOS
MIL: 58.7%
BOS: 41.3%
------------------------------
SAS vs SAC
SAS: 50.3%
SAC: 49.7%
------------------------------
DAL vs HOU
DAL: 53.95%
HOU: 46.05%
------------------------------
DEN vs MIA
DEN: 49.435%
MIA: 50.565%
------------------------------
OKC vs UTA
OKC: 49.085%
UTA: 50.915%
------------------------------
LAC vs NOP
LAC: 61.545%
NOP: 38.455%
------------------------------
IND vs PHI
IND: 49.96%
PHI: 50.04%
------------------------------
TOR vs LAL
TOR: 49.035%
LAL: 50.965%
------------------------------
BRK vs WAS
BRK: 54.705%
WAS: 45.295%
------------------------------
BOS vs POR
BOS: 64.26%
POR: 35.74%
-----------------------------

(30,)