# Random Forest for Team Matchups

This notebook uses a RandomForest model from sklearn to attempt to predict the winner of an NBA games. This notebook references a previous project found [here](https://towardsdatascience.com/predicting-the-outcome-of-nba-games-with-machine-learning-a810bb768f20).

## Load the data
The data we will train our model contains team stat summaries of every regular season game from the start of the 2015-16 season to 2/15/2024.

In [1]:
PATH_TO_TEAM_DATA = "../data/raw/nba_games_runtime.csv"

In [2]:
import pandas as pd

In [5]:
df = pd.read_csv(PATH_TO_TEAM_DATA, index_col=0)
df

Unnamed: 0,mp,mp.1,fg,fga,fg%,3p,3pa,3p%,ft,fta,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won
0,240.0,240.0,37.0,96.0,0.385,12.0,29.0,0.414,20.0,26.0,...,57.1,33.8,258.0,121.0,ATL,94,1,2016,2015-10-27,True
1,240.0,240.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,...,33.3,23.6,132.0,104.0,DET,106,0,2016,2015-10-27,False
2,240.0,240.0,38.0,94.0,0.404,9.0,29.0,0.310,10.0,17.0,...,53.2,34.6,162.0,104.0,CHI,97,1,2016,2015-10-27,False
3,240.0,240.0,37.0,87.0,0.425,7.0,19.0,0.368,16.0,23.0,...,30.4,29.0,138.0,105.0,CLE,95,0,2016,2015-10-27,True
4,240.0,240.0,35.0,83.0,0.422,6.0,18.0,0.333,19.0,27.0,...,69.4,43.7,206.0,104.0,GSW,111,1,2016,2015-10-27,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22039,240.0,240.0,44.0,83.0,0.530,13.0,26.0,0.500,12.0,19.0,...,40.0,34.5,170.0,121.0,MIL,110,0,2024,2024-02-15,True
22040,240.0,240.0,46.0,89.0,0.517,11.0,30.0,0.367,25.0,27.0,...,25.8,31.5,200.0,137.0,POR,91,1,2024,2024-02-15,True
22041,240.0,240.0,31.0,83.0,0.373,8.0,29.0,0.276,21.0,30.0,...,33.3,33.0,181.0,101.0,MIN,128,0,2024,2024-02-15,False
22042,240.0,240.0,49.0,92.0,0.533,20.0,42.0,0.476,22.0,28.0,...,66.7,30.8,175.0,146.0,UTA,137,1,2024,2024-02-15,True


In [9]:
print(df.columns.tolist())  # show what features we're working with

['mp', 'mp.1', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', '+/-', 'ts%', 'efg%', '3par', 'ftr', 'orb%', 'drb%', 'trb%', 'ast%', 'stl%', 'blk%', 'tov%', 'usg%', 'ortg', 'drtg', 'mp_max', 'mp_max.1', 'fg_max', 'fga_max', 'fg%_max', '3p_max', '3pa_max', '3p%_max', 'ft_max', 'fta_max', 'ft%_max', 'orb_max', 'drb_max', 'trb_max', 'ast_max', 'stl_max', 'blk_max', 'tov_max', 'pf_max', 'pts_max', '+/-_max', 'ts%_max', 'efg%_max', '3par_max', 'ftr_max', 'orb%_max', 'drb%_max', 'trb%_max', 'ast%_max', 'stl%_max', 'blk%_max', 'tov%_max', 'usg%_max', 'ortg_max', 'drtg_max', 'team', 'total', 'home', 'index_opp', 'mp_opp', 'mp_opp.1', 'fg_opp', 'fga_opp', 'fg%_opp', '3p_opp', '3pa_opp', '3p%_opp', 'ft_opp', 'fta_opp', 'ft%_opp', 'orb_opp', 'drb_opp', 'trb_opp', 'ast_opp', 'stl_opp', 'blk_opp', 'tov_opp', 'pf_opp', 'pts_opp', '+/-_opp', 'ts%_opp', 'efg%_opp', '3par_opp', 'ftr_opp', 'orb%_opp', 'drb%_opp', 'trb%_opp', 'ast%_

## Data Cleaning and Feature Engineering

In [10]:
# Remove extraneous columns
del df["mp.1"]
del df["mp_opp.1"]
del df["index_opp"]
df

Unnamed: 0,mp,fg,fga,fg%,3p,3pa,3p%,ft,fta,ft%,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won
0,240.0,37.0,96.0,0.385,12.0,29.0,0.414,20.0,26.0,0.769,...,57.1,33.8,258.0,121.0,ATL,94,1,2016,2015-10-27,True
1,240.0,37.0,82.0,0.451,8.0,27.0,0.296,12.0,15.0,0.800,...,33.3,23.6,132.0,104.0,DET,106,0,2016,2015-10-27,False
2,240.0,38.0,94.0,0.404,9.0,29.0,0.310,10.0,17.0,0.588,...,53.2,34.6,162.0,104.0,CHI,97,1,2016,2015-10-27,False
3,240.0,37.0,87.0,0.425,7.0,19.0,0.368,16.0,23.0,0.696,...,30.4,29.0,138.0,105.0,CLE,95,0,2016,2015-10-27,True
4,240.0,35.0,83.0,0.422,6.0,18.0,0.333,19.0,27.0,0.704,...,69.4,43.7,206.0,104.0,GSW,111,1,2016,2015-10-27,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22039,240.0,44.0,83.0,0.530,13.0,26.0,0.500,12.0,19.0,0.632,...,40.0,34.5,170.0,121.0,MIL,110,0,2024,2024-02-15,True
22040,240.0,46.0,89.0,0.517,11.0,30.0,0.367,25.0,27.0,0.926,...,25.8,31.5,200.0,137.0,POR,91,1,2024,2024-02-15,True
22041,240.0,31.0,83.0,0.373,8.0,29.0,0.276,21.0,30.0,0.700,...,33.3,33.0,181.0,101.0,MIN,128,0,2024,2024-02-15,False
22042,240.0,49.0,92.0,0.533,20.0,42.0,0.476,22.0,28.0,0.786,...,66.7,30.8,175.0,146.0,UTA,137,1,2024,2024-02-15,True
