This is an NBA Machine Learning Model for the 2023 season. It uses Basketball Reference Data as inputs from the season

# Imports

In [86]:
import pandas as pd
import numpy as np

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler


from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Notebook Presentation

In [87]:
pd.options.display.float_format = '{:,.2f}'.format

# Read in the Data

In [88]:
TPGS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Team stats per game.csv")
OPGS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Opponent stats per game.csv")
TTS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Team total stats.csv")
OTS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Opponent total stats.csv")
TPOS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Team per 100 poss stats.csv")
OPOS = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Opponent per 100 poss stats.csv")
TSHOT = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Team Shooting stats.csv")
OSHOT = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Opponent Shooting stats.csv")
ADV = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NBA Model/Team Advanced stats.csv")

In [89]:
all_dfs = [TPGS, OPGS, TTS, OTS, TPOS, OPOS, TSHOT, OSHOT, ADV]

In [90]:
TSHOT.head()

Unnamed: 0,Team,TSHOT MP,TSHOT FG%,TSHOT Dist.,TSHOT 2P %OF,TSHOT 0-3 %OF,TSHOT 3-10 %OF,TSHOT 10-16 %OF,TSHOT 16-3P %OF,TSHOT 3P %OF,...,TSHOT 2P AST,TSHOT 3P AST,TSHOT %FGA DUNK,TSHOT Md. DUNK,TSHOT %FGA LAY,TSHOT Md. LAY,TSHOT %3PA CORN,TSHOT 3P% CORN,TSHOT Att. HEAVE,TSHOT Md. HEAVE
0,Atlanta Hawks,19855,0.48,13.3,0.67,0.25,0.19,0.14,0.09,0.33,...,0.49,0.77,0.07,455,0.24,1039,0.27,0.36,8.0,0.0
1,Boston Celtics,19980,0.47,15.5,0.52,0.21,0.19,0.07,0.05,0.48,...,0.51,0.82,0.05,337,0.28,1183,0.22,0.4,12.0,0.0
2,Brooklyn Nets,19730,0.49,14.6,0.6,0.22,0.18,0.12,0.08,0.4,...,0.53,0.82,0.06,373,0.22,891,0.24,0.39,6.0,0.0
3,Chicago Bulls,19905,0.49,13.6,0.67,0.28,0.15,0.12,0.12,0.33,...,0.47,0.89,0.05,329,0.27,1163,0.26,0.4,4.0,0.0
4,Charlotte Hornets,19830,0.46,13.1,0.64,0.29,0.21,0.09,0.06,0.36,...,0.54,0.82,0.07,482,0.29,1149,0.26,0.35,10.0,0.0


# Normalizing the data for each DF


In [91]:
def normalize_df(df):
  df_temp = df.drop(columns=["Team"])
  normalized_df=(df_temp-df_temp.min())/(df_temp.max()-df_temp.min())
  normalized_df["Team"] = df.Team
  cols = normalized_df.columns.tolist()
  cols = cols[-1:] + cols[:-1]
  normalized_df = normalized_df[cols]
  return normalized_df

In [92]:
N_TPGS = normalize_df(TPGS)
N_OPGS = normalize_df(OPGS)
N_TTS = normalize_df(TTS)
N_OTS = normalize_df(OTS)
N_TPOS = normalize_df(TPOS)
N_OPOS = normalize_df(OPOS)
N_TSHOT = normalize_df(TSHOT)
N_OSHOT = normalize_df(OSHOT)
N_ADV = normalize_df(ADV)

In [93]:
N_ADV.columns

Index(['Team', 'ADV W', 'ADV L', 'ADV PW', 'ADV PL', 'ADV MOV', 'ADV SOS',
       'ADV SRS', 'ADV ORtg', 'ADV DRtg', 'ADV NRtg', 'ADV Pace', 'ADV FTr',
       'ADV 3PAr', 'ADV TS%', 'ADV OFF eFG%', 'ADV OFF TOV%', 'ADV OFF ORB%',
       'ADV OFF FT/FGA', 'ADV DFF eFG%', 'ADV DFF TOV%', 'ADV DFF DRB%',
       'ADV DFF FT/FGA', 'ADV Attend.', 'ADV Attend./G'],
      dtype='object')

# Creating a Score for each metric

In [94]:
N_TPGS["TPGS Score"] = N_TPGS["TPGS FG"] + N_TPGS["TPGS FG%"] + N_TPGS["TPGS 3P"] + N_TPGS["TPGS 3P%"] + N_TPGS["TPGS 2P%"] + N_TPGS["TPGS FT%"] + N_TPGS["TPGS ORB"] + N_TPGS["TPGS DRB"] + N_TPGS["TPGS AST"] + N_TPGS["TPGS STL"] + N_TPGS["TPGS BLK"] - N_TPGS["TPGS TOV"] - N_TPGS["TPGS PF"] + N_TPGS["TPGS PTS"] 
N_OPGS["OPGS Score"] = (N_OPGS["OPGS FG"] + N_OPGS["OPGS FG%"] + N_OPGS["OPGS 3P"] + N_OPGS["OPGS 3P%"] + N_OPGS["OPGS 2P%"] + N_OPGS["OPGS FT%"] + N_OPGS["OPGS ORB"] + N_OPGS["OPGS DRB"] + N_OPGS["OPGS AST"] + N_OPGS["OPGS STL"] + N_OPGS["OPGS BLK"] - N_OPGS["OPGS TOV"] - N_OPGS["OPGS PF"] + N_OPGS["OPGS PTS"]) * -1
N_TTS["TTS Score"] = N_TTS["TTS FG"] + N_TTS["TTS FG%"] + N_TTS["TTS 3P"] + N_TTS["TTS 3P%"] + N_TTS["TTS 2P%"] + N_TTS["TTS FT%"] + N_TTS["TTS ORB"] + N_TTS["TTS DRB"] + N_TTS["TTS AST"] + N_TTS["TTS STL"] + N_TTS["TTS BLK"] - N_TTS["TTS TOV"] - N_TTS["TTS PF"] + N_TTS["TTS PTS"] 
N_OTS["OTS Score"] = (N_OTS["OTS FG"] + N_OTS["OTS FG%"] + N_OTS["OTS 3P"] + N_OTS["OTS 3P%"] + N_OTS["OTS 2P%"] + N_OTS["OTS FT%"] + N_OTS["OTS ORB"] + N_OTS["OTS DRB"] + N_OTS["OTS AST"] + N_OTS["OTS STL"] + N_OTS["OTS BLK"] - N_OTS["OTS TOV"] - N_OTS["OTS PF"] + N_OTS["OTS PTS"]) * -1
N_TPOS["TPOS Score"] = N_TPOS["TPOS FG"] + N_TPOS["TPOS FG%"] + N_TPOS["TPOS 3P"] + N_TPOS["TPOS 3P%"] + N_TPOS["TPOS 2P%"] + N_TPOS["TPOS FT%"] + N_TPOS["TPOS ORB"] + N_TPOS["TPOS DRB"] + N_TPOS["TPOS AST"] + N_TPOS["TPOS STL"] + N_TPOS["TPOS BLK"] - N_TPOS["TPOS TOV"] - N_TPOS["TPOS PF"] + N_TPOS["TPOS PTS"]
N_OPOS["OPOS Score"] = (N_OPOS["OPOS FG"] + N_OPOS["OPOS FG%"] + N_OPOS["OPOS 3P"] + N_OPOS["OPOS 3P%"] + N_OPOS["OPOS 2P%"] + N_OPOS["OPOS FT%"] + N_OPOS["OPOS ORB"] + N_OPOS["OPOS DRB"] + N_OPOS["OPOS AST"] + N_OPOS["OPOS STL"] + N_OPOS["OPOS BLK"] - N_OPOS["OPOS TOV"] - N_OPOS["OPOS PF"] + N_OPOS["OPOS PTS"]) * -1
N_TSHOT["TSHOT Score"] = N_TSHOT.drop(columns=["TSHOT MP", "TSHOT Dist.", "TSHOT Att. HEAVE"]).iloc[:, 1:].sum(axis=1)
N_OSHOT["OSHOT Score"] = (N_OSHOT.drop(columns=["OSHOT MP", "OSHOT Dist."]).iloc[:, 1:].sum(axis=1)) * -1
N_ADV["ADV Score"] = N_ADV.drop(columns=['Team', 'ADV W', 'ADV L', 'ADV PW', 'ADV PL', 'ADV MOV', 'ADV SOS', 'ADV Attend.', 'ADV Attend./G']).sum(axis=1)

# Create DF with only Team and metric Score

In [98]:
N_TPGS_M = N_TPGS[["Team", "TPGS Score"]]
N_OPGS_M = N_OPGS[["Team", "OPGS Score"]]
N_TTS_M = N_TTS[["Team", "TTS Score"]]
N_OTS_M = N_OTS[["Team", "OTS Score"]]
N_TPOS_M = N_TPOS[["Team", "TPOS Score"]]
N_OPOS_M = N_OPOS[["Team", "OPOS Score"]]
N_TSHOT_M = N_TSHOT[["Team", "TSHOT Score"]]
N_OSHOT_M = N_OSHOT[["Team", "OSHOT Score"]]
N_ADV_M = N_ADV[["Team", "ADV Score"]]

# Merge the DFs

In [99]:
df = N_TPGS_M.merge(N_OPGS_M, on="Team").merge(N_TTS_M, on="Team").merge(N_OTS_M, on="Team").merge(N_TPOS_M, on="Team").merge(N_OPOS_M, on="Team").merge(N_TSHOT_M, on="Team").merge(N_OSHOT_M, on="Team").merge(N_ADV_M, on="Team")

In [103]:
df["Overall Score"] = df.drop(columns=["Team"]).sum(axis=1)

In [108]:
df.sort_values("Overall Score", ascending=False)

Unnamed: 0,Team,TPGS Score,OPGS Score,TTS Score,OTS Score,TPOS Score,OPOS Score,TSHOT Score,OSHOT Score,ADV Score,Overall Score
3,Boston Celtics,6.84,-2.48,6.85,-2.51,7.01,-1.91,9.98,-7.92,8.97,24.83
11,Denver Nuggets,6.28,-3.64,6.27,-3.65,6.82,-3.72,14.82,-11.2,9.26,21.25
13,Philadelphia 76ers,5.14,-2.6,5.17,-2.64,5.72,-2.73,10.94,-8.76,10.6,20.82
24,Cleveland Cavaliers,4.39,-1.21,4.36,-1.2,5.26,-1.59,10.58,-8.47,8.51,20.62
8,Milwaukee Bucks,5.84,-3.41,5.85,-3.47,5.67,-2.53,8.91,-6.04,7.84,18.66
19,Brooklyn Nets,4.85,-3.19,4.83,-3.18,5.26,-3.23,11.05,-7.7,7.91,16.59
1,Golden State Warriors,6.12,-4.79,6.14,-4.83,5.63,-3.58,11.02,-9.43,9.93,16.21
7,Memphis Grizzlies,5.5,-4.0,5.49,-4.01,5.13,-2.96,10.37,-7.64,7.83,15.71
16,Phoenix Suns,4.4,-2.4,4.4,-2.42,4.84,-2.32,9.9,-8.53,7.57,15.45
12,Minnesota Timberwolves,4.63,-4.0,4.62,-4.05,4.14,-3.18,11.73,-8.64,9.03,14.28
