# NBA Score Predictions Pipeline

### Necessary Imports

In [39]:
import requests
from bs4 import BeautifulSoup

import pandas as pd
pd.set_option('display.max_columns', None)

import random as rnd
import statistics

### Lists/Dicts for Conferences, Teams, Abbrs, Names

In [40]:
conferences = {'GSW':'WEST', 'POR':'WEST', 'SAC':'WEST', 'UTA':'WEST', 'MIA':'EAST', 'DEN':'WEST', 'MIN':'WEST',
               'PHI':'EAST', 'NOP':'EAST', 'ORL':'EAST', 'MIL':'EAST', 'CHI':'EAST','DET':'EAST', 'TOR':'EAST',
               'PHX':'WEST', 'LAL':'WEST', 'ATL':'EAST', 'WAS':'EAST', 'MEM':'WEST', 'CLE':'EAST', 'LAC':'WEST',
               'BOS':'EAST', 'NYK':'EAST', 'IND':'EAST', 'CHA':'EAST', 'SAS':'WEST', 'HOU':'WEST', 'DAL':'WEST',
               'OKC':'WEST', 'BKN':'EAST'}

team_names =     ['Utah Jazz', 'Sacramento Kings', 'Washington Wizards',
                  'Boston Celtics', 'Milwaukee Bucks', 'Oklahoma City Thunder',
                  'Chicago Bulls', 'Phoenix Suns', 'Philadelphia 76ers',
                  'New Orleans Pelicans', 'Charlotte Hornets', 'Los Angeles Lakers',
                  'Indiana Pacers', 'Toronto Raptors', 'Cleveland Cavaliers',
                  'Denver Nuggets', 'Minnesota Timberwolves', 'Brooklyn Nets',
                  'San Antonio Spurs', 'Dallas Mavericks', 'Houston Rockets',
                  'Detroit Pistons', 'Portland Trail Blazers', 'Atlanta Hawks',
                  'Golden State Warriors', 'Miami Heat', 'Los Angeles Clippers',
                  'New York Knicks', 'Memphis Grizzlies', 'Orlando Magic', 'League Average']

team_names_nocity = ['Jazz', 'Kings', 'Wizards', 'Celtics', 'Bucks', 'Thunder',
                  'Bulls', 'Suns', '76ers', 'Pelicans', 'Hornets', 'Lakers',
                  'Pacers', 'Raptors', 'Cavaliers', 'Nuggets', 'Timberwolves', 'Nets',
                  'Spurs', 'Mavericks', 'Rockets', 'Pistons', 'Trail Blazers', 'Hawks',
                  'Warriors', 'Heat', 'Clippers','Knicks', 'Grizzlies', 'Magic', 'League Average']

team_abbrs =     ['UTA', 'SAC', 'WAS', 'BOS', 'MIL', 'OKC', 'CHI', 'PHX', 'PHI',
                  'NOP', 'CHA', 'LAL', 'IND', 'TOR', 'CLE', 'DEN', 'MIN', 'BKN',
                  'SAS', 'DAL', 'HOU', 'DET', 'POR', 'ATL', 'GSW', 'MIA', 'LAC',
                  'NYK', 'MEM', 'ORL', 'NBA']

team_name_abbr_dict = {}
team_abbr_name_dict = {}
team_names_nocity_dict = {}
team_id_dict={}

for i in range(len(team_names)):
    team_name_abbr_dict[team_names[i]]=team_abbrs[i]
    
for i in range(len(team_names_nocity)):
    team_names_nocity_dict[team_names_nocity[i]]=team_abbrs[i]

for i in range(len(team_abbrs)):
    team_abbr_name_dict[team_abbrs[i]]=team_names[i]

### Webscrape 2022-2023 NBA Boxscore Data

In [41]:
url = 'https://stats.nba.com/stats/leaguegamelog'
headers= {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
          'Referer': 'https://www.nba.com/'}
payload = {
    'Counter': '1000',
    'DateFrom': '',
    'DateTo': '',
    'Direction': 'DESC',
    'LeagueID': '00',
    'PlayerOrTeam': 'T',
    'Season': '2022-23',
    'SeasonType': 'Regular Season',
    'Sorter': 'DATE'}

jsonData = requests.get(url, headers=headers, params=payload).json()


rows = jsonData['resultSets'][0]['rowSet']
columns = jsonData['resultSets'][0]['headers']

df = pd.DataFrame(rows, columns=columns)
df.drop(['VIDEO_AVAILABLE'], axis=1, inplace=True)


df['FG2M'] = df.FGM - df.FG3M
df['FG2A'] = df.FGA - df.FG3A

df['FG2_PTS'] = df.FG2M * 2
df['FG3_PTS'] = df.FG3M * 3

df['OPP_TEAM_ABBR'] = df['MATCHUP'].str.strip().str[-3:]
df['OPP_PTS'] = df['PTS'] - df['PLUS_MINUS']

df['MONTH'] = pd.DatetimeIndex(df['GAME_DATE']).month
df['YEAR'] = pd.DatetimeIndex(df['GAME_DATE']).year

def home_or_away(string):
    if string[4]=='@': return 'AWAY'
    elif string[4]=='v': return 'HOME'
    
df['HOME_AWAY'] = df['MATCHUP'].map(home_or_away)

df['CONFERENCE'] = df['TEAM_ABBREVIATION'].apply(lambda x: conferences.get(x))
df['OPP_CONFERENCE'] = df['OPP_TEAM_ABBR'].apply(lambda x: conferences.get(x))


df['DATE_MATCHUP'] = df['GAME_DATE'].str[5:] + ' ' + df['MATCHUP'].str[4:]
#df['DATE_MATCHUP'] = df['GAME_DATE'] + ' ' + df['MATCHUP'].str[4:]

df.TEAM_NAME = df.TEAM_ABBREVIATION.apply(lambda x: team_abbr_name_dict.get(x))

df.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,FG2M,FG2A,FG2_PTS,FG3_PTS,OPP_TEAM_ABBR,OPP_PTS,MONTH,YEAR,HOME_AWAY,CONFERENCE,OPP_CONFERENCE,DATE_MATCHUP
0,22022,1610612760,OKC,Oklahoma City Thunder,22200773,2023-02-01,OKC @ HOU,,60,12,32,0.375,4,13,0.308,6,7,0.857,8,5,13,4,1,1,2,7,34,-6,8,19,16,12,HOU,40,2,2023,AWAY,WEST,WEST,02-01 @ HOU
1,22022,1610612745,HOU,Houston Rockets,22200773,2023-02-01,HOU vs. OKC,,60,15,28,0.536,1,7,0.143,9,9,1.0,6,12,18,6,2,2,3,5,40,6,14,21,28,3,OKC,34,2,2023,HOME,WEST,WEST,02-01 vs. OKC
2,22022,1610612758,SAC,Sacramento Kings,22200775,2023-02-01,SAC @ SAS,,60,15,29,0.517,2,11,0.182,5,5,1.0,2,12,14,9,2,2,3,3,37,0,13,18,26,6,SAS,37,2,2023,AWAY,WEST,WEST,02-01 @ SAS
3,22022,1610612759,SAS,San Antonio Spurs,22200775,2023-02-01,SAS vs. SAC,,60,17,30,0.567,2,7,0.286,1,1,1.0,1,10,11,12,1,0,4,5,37,0,15,23,30,6,SAC,37,2,2023,HOME,WEST,WEST,02-01 vs. SAC
4,22022,1610612750,MIN,Minnesota Timberwolves,22200774,2023-02-01,MIN vs. GSW,,60,16,31,0.516,5,16,0.313,3,4,0.75,3,12,15,10,3,2,2,5,40,3,11,15,22,15,GSW,37,2,2023,HOME,WEST,WEST,02-01 vs. GSW


### Function that webscrapes and displays todays NBA matchups

In [42]:
def today_matchups():
    url = "https://www.espn.com/nba/scoreboard"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    date = soup.find('h3', class_="Card__Header__Title Card__Header__Title--no-theme").text

    games = soup.find_all("div", class_='Scoreboard__RowContainer flex flex-column flex-auto')
    games_data = []

    df = pd.DataFrame()

    for game in games:
        game_data = {}
        away_tm = game.find_all("div", class_="ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db")[0]
        home_tm = game.find_all("div", class_="ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db")[1]
        game_data['Date'] = date
        game_data["away_tm"] = away_tm.text
        #game_data["away_tm_abbr"] = team_names_nocity_dict[away_tm.text]
        
        game_data["home_tm"] = home_tm.text
        #game_data["home_tm_abbr"] = team_names_nocity_dict[home_tm.text]
        
        
        games_data.append(game_data)

    df = pd.DataFrame(games_data)
    
    print(soup.title.text)
    print(date)
    display(df)

today_matchups()

NBA Basketball Scores - NBA Scoreboard - ESPN
Wednesday, February 1, 2023


Unnamed: 0,Date,away_tm,home_tm
0,"Wednesday, February 1, 2023",Magic,76ers
1,"Wednesday, February 1, 2023",Trail Blazers,Grizzlies
2,"Wednesday, February 1, 2023",Nets,Celtics
3,"Wednesday, February 1, 2023",Thunder,Rockets
4,"Wednesday, February 1, 2023",Warriors,Timberwolves
5,"Wednesday, February 1, 2023",Kings,Spurs
6,"Wednesday, February 1, 2023",Raptors,Jazz
7,"Wednesday, February 1, 2023",Hawks,Suns
8,"Wednesday, February 1, 2023",Wizards,Pistons


### Function that simulates a matchup a specified number of times

In [43]:
def multi_game_simulations(n, team1abbr, team2abbr):
    
    def gauss_game_sim(team1abbr, team2abbr):
        team1Score = int(round(rnd.gauss(df[df.TEAM_ABBREVIATION==team1abbr].PTS.mean(),df[df.TEAM_ABBREVIATION==team1abbr].PTS.std())\
                     +rnd.gauss(df[df.TEAM_ABBREVIATION==team2abbr].OPP_PTS.mean(),df[df.TEAM_ABBREVIATION==team2abbr].OPP_PTS.std()))/2)

        team2Score = int(round(rnd.gauss(df[df.TEAM_ABBREVIATION==team2abbr].PTS.mean(),df[df.TEAM_ABBREVIATION==team2abbr].PTS.std())\
                     +rnd.gauss(df[df.TEAM_ABBREVIATION==team1abbr].OPP_PTS.mean(),df[df.TEAM_ABBREVIATION==team1abbr].OPP_PTS.std()))/2)

        if team1Score > team2Score:
            return 1, team1Score, team2Score
        elif team1Score < team2Score:
            return -1, team1Score, team2Score
        else: return 0, team1Score, team2Score
    
    gameResults = []
    team1Wins = 0
    team2Wins = 0
    Ties = 0
    team1scores=[]
    team2scores=[]
    for i in range(n):
        gm, tm1sc, tm2sc = gauss_game_sim(team1abbr, team2abbr)
        team1scores.append(tm1sc)
        team2scores.append(tm2sc)
        gameResults.append(gm)
        if gm == 1:
            team1Wins += 1
        elif gm == -1:
            team2Wins += 1
        else: Ties += 1
            
    team1WinPer = team1Wins/(n)
    team2WinPer = team2Wins/(n)
    TiePer = Ties/(n)
    
    team1MeanSc = statistics.mean(team1scores)
    team2MeanSc = statistics.mean(team2scores)

    return team1WinPer, team2WinPer, team1MeanSc, team2MeanSc

### Todays matchups with with n-simulations win% and mean score for each matchup 

In [50]:
def today_multi_preds(n):
    url = "https://www.espn.com/nba/scoreboard"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    date = soup.find('h3', class_="Card__Header__Title Card__Header__Title--no-theme").text

    games = soup.find_all("div", class_='Scoreboard__RowContainer flex flex-column flex-auto')
    games_data = []

    df = pd.DataFrame()

    for game in games:
        game_data = {}
        
        away_tm = game.find_all("div", class_="ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db")[0]
        home_tm = game.find_all("div", class_="ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db")[1]
        
        away_sim_Wper, home_sim_Wper, away_mn_sc, home_mn_sc = multi_game_simulations(n, team_names_nocity_dict[away_tm.text], team_names_nocity_dict[home_tm.text])
        
        game_data['Date'] = date
        
        game_data["away_tm"] = away_tm.text
        #game_data["away_tm_abbr"] = team_names_nocity_dict[away_tm.text]
        game_data[f"away_sim_W%"] = away_sim_Wper
        game_data[f"away_mean_sc"] = away_mn_sc
        
        game_data["home_tm"] = home_tm.text
        #game_data["home_tm_abbr"] = team_names_nocity_dict[home_tm.text]
        game_data[f"home_sim_W%"] = home_sim_Wper
        game_data[f"home_mean_sc"] = home_mn_sc
        
        games_data.append(game_data)

    df = pd.DataFrame(games_data)
    
    print(soup.title.text)
    print(date)
    print(f'Through {n} simulations')
    
    df = df.style.background_gradient(subset=['away_sim_W%','home_sim_W%'], cmap='RdYlGn')
    display(df)

today_multi_preds(100)

NBA Basketball Scores - NBA Scoreboard - ESPN
Wednesday, February 1, 2023
Through 100 simulations


Unnamed: 0,Date,away_tm,away_sim_W%,away_mean_sc,home_tm,home_sim_W%,home_mean_sc
0,"Wednesday, February 1, 2023",Trail Blazers,0.47,112.93,Grizzlies,0.48,113.78
1,"Wednesday, February 1, 2023",Nets,0.4,112.24,Celtics,0.59,114.68
2,"Wednesday, February 1, 2023",Thunder,0.53,113.08,Rockets,0.46,111.72
3,"Wednesday, February 1, 2023",Warriors,0.49,115.92,Timberwolves,0.5,115.63
4,"Wednesday, February 1, 2023",Kings,0.66,120.08,Spurs,0.33,112.68
5,"Wednesday, February 1, 2023",Raptors,0.48,114.45,Jazz,0.48,114.43
6,"Wednesday, February 1, 2023",Hawks,0.46,113.67,Suns,0.5,115.27
7,"Wednesday, February 1, 2023",Magic,0.37,109.74,76ers,0.61,113.11
8,"Wednesday, February 1, 2023",Wizards,0.65,117.93,Pistons,0.32,111.54
