# NBA Game Prediction Model ⛹
Authors:
* Parth Shukla
* Matthew Saxsma

Date: 04-01-2025

***
## __Libraries__

In [64]:
import pandas as pd
import os
import numpy as np
import pandas as pda

import torch
from torchvision import datasets 
from torchvision import transforms
import matplotlib.pyplot as plt


import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import warnings
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

# __Path__

In [65]:
# Notbook path
notebook_directory = os.getcwd()
print(f'Current notebook directory: {notebook_directory}')

Current notebook directory: c:\Users\Matth\Desktop\repos\CollabProject


## __Dataset 1__: `NBA-BoxScores-2024-2025.csv`
* 736 NBA player-games from the 2024-2025 season.
*  Contains 440 players.

In [66]:
box_score_data_path = os.path.join(notebook_directory,'data','NBA-BoxScores-2024-2025.csv')
df_boxscores = pd.read_csv(box_score_data_path)
np.shape(df_boxscores)

(736, 30)

In [67]:
df_boxscores.sample(10) # looks like this consists of player-games of the 2024 to 2025 NBA seasons

Unnamed: 0.1,Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,...,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
498,12,22400078,1610612755,PHI,Philadelphia,1630288,Jeff Dowtin Jr.,Jeff,,DNP - Coach's Decision,...,,,,,,,,,,
676,23,22400084,1610612762,UTA,Utah,201988,Patty Mills,Patty,,,...,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,7.0,-4.0
100,18,22400071,1610612746,LAC,LA,1630539,Kai Jones,Kai,,,...,2.0,2.0,4.0,0.0,0.0,0.0,0.0,2.0,6.0,-5.0
80,27,22400072,1610612757,POR,Portland,1629680,Matisse Thybulle,Matisse,,DND - Injury/Illness,...,,,,,,,,,,
639,16,22400083,1610612749,MIL,Milwaukee,203507,Giannis Antetokounmpo,Giannis,F,,...,1.0,10.0,11.0,5.0,0.0,0.0,3.0,1.0,38.0,-12.0
202,11,22400066,1610612749,MIL,Milwaukee,1631157,Ryan Rollins,Ryan,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
419,22,22400074,1610612742,DAL,Dallas,1628467,Maxi Kleber,Maxi,,,...,0.0,4.0,4.0,1.0,0.0,1.0,0.0,1.0,3.0,18.0
615,19,22400082,1610612745,HOU,Houston,1642263,Reed Sheppard,Reed,,,...,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,6.0,3.0
640,17,22400083,1610612749,MIL,Milwaukee,201572,Brook Lopez,Brook,C,,...,0.0,4.0,4.0,1.0,0.0,3.0,2.0,3.0,7.0,-9.0
587,18,22400081,1610612752,NYK,New York,1628969,Mikal Bridges,Mikal,G,,...,1.0,0.0,1.0,5.0,1.0,0.0,1.0,4.0,21.0,13.0


# __Dataset 2__: `2025teamstats.csv`
* NBA team stats for the 2024-2025 season.

In [68]:
team_stats2_data_path = os.path.join(notebook_directory,'data','2025teamstats.csv')
df_teamstats2025 = pd.read_csv(team_stats2_data_path)
np.shape(df_teamstats2025)

(31, 25)

In [69]:
df_teamstats2025.sample(10)

Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
27,28.0,Charlotte Hornets,55,241.4,38.3,89.8,0.427,13.5,39.5,0.343,...,0.781,12.6,32.9,45.5,23.8,8.1,4.6,15.3,19.4,106.0
9,10.0,Chicago Bulls,57,240.9,42.4,91.9,0.462,15.9,43.2,0.368,...,0.802,9.8,35.5,45.3,28.7,7.5,4.8,14.9,17.8,115.9
15,16.0,Houston Rockets,57,241.3,41.6,93.2,0.447,12.2,35.5,0.343,...,0.747,14.5,33.4,47.9,22.8,8.7,5.1,13.8,19.8,112.9
2,3.0,Denver Nuggets,57,241.8,45.7,90.0,0.508,12.0,31.6,0.381,...,0.776,11.3,34.6,45.9,31.2,8.2,4.7,14.3,17.5,121.3
6,7.0,Atlanta Hawks,57,241.8,42.6,92.1,0.462,13.1,37.5,0.35,...,0.769,11.9,33.1,45.0,29.3,10.0,5.2,15.8,18.9,117.0
0,1.0,Cleveland Cavaliers,57,240.0,45.1,90.6,0.498,16.1,41.2,0.391,...,0.783,10.8,34.4,45.2,29.1,8.4,4.4,13.2,18.6,122.9
22,23.0,New Orleans Pelicans,57,241.3,40.4,90.4,0.447,12.3,35.2,0.349,...,0.766,12.0,31.4,43.4,25.3,9.1,5.5,14.3,18.3,110.5
25,26.0,Philadelphia 76ers,56,241.8,39.2,86.1,0.455,12.6,36.4,0.345,...,0.79,9.9,29.2,39.2,23.0,9.3,4.1,13.4,19.6,108.9
3,4.0,Oklahoma City Thunder,56,240.0,44.0,92.4,0.476,14.1,38.8,0.363,...,0.823,11.0,33.6,44.5,26.5,10.9,5.7,12.0,20.4,118.3
29,30.0,Orlando Magic,59,240.8,37.8,85.9,0.44,10.9,35.8,0.305,...,0.762,11.4,30.7,42.1,23.0,9.0,6.2,14.6,20.5,104.3


# __Dataset 3__: `TeamStatistics.csv`
* Data from all NBA games.
* From 1946.

In [70]:
# dataset - https://www.kaggle.com/datasets/eoinamoore/historical-nba-data-and-player-box-scores/
games_big_data_path = os.path.join(notebook_directory,'data','TeamStatistics.csv')
df_games_big = pd.read_csv(games_big_data_path)
np.shape(df_games_big)

(142836, 48)

In [71]:
df_games_big.sample(7) # quick look at the data

Unnamed: 0,gameId,gameDate,teamCity,teamName,teamId,opponentTeamCity,opponentTeamName,opponentTeamId,home,win,...,leadChanges,pointsFastBreak,pointsFromTurnovers,pointsInThePaint,pointsSecondChance,timesTied,timeoutsRemaining,seasonWins,seasonLosses,coachId
138527,25700008,1957-10-26 19:00:00,Detroit,Pistons,1610612765,Philadelphia,Warriors,1610612744,1,0,...,,,,,,,,,,
74272,29600905,1997-03-12 22:30:00,Golden State,Warriors,1610612744,Los Angeles,Lakers,1610612747,0,0,...,,,,,,,,,,
30877,21300423,2013-12-26 19:00:00,Cleveland,Cavaliers,1610612739,Atlanta,Hawks,1610612737,1,0,...,,,,,,,,,,
48928,20601013,2007-03-21 22:00:00,Minnesota,Timberwolves,1610612750,Sacramento,Kings,1610612758,0,1,...,,,,,,,,,,
31968,41200312,2013-05-21 21:00:00,San Antonio,Spurs,1610612759,Memphis,Grizzlies,1610612763,1,1,...,,,,,,,,,,
9325,22100355,2021-12-06 19:00:00,Washington,Wizards,1610612764,Indiana,Pacers,1610612754,0,0,...,,,,,,,,,,
141183,25200193,1953-01-17 19:00:00,New York,Knicks,1610612752,Philadelphia,Warriors,1610612744,1,1,...,,,,,,,,,,


In [72]:
df_games_big.coachId.unique() # nothing in coach id column. We'll get rid of it.
df_games_big = df_games_big.drop(columns=['coachId'])

In [73]:
# checking what is redundant across rows of same game
df_games_big.loc[2240:2241,].T # we should check for NaNs

Unnamed: 0,2240,2241
gameId,22301095,22301095
gameDate,2024-04-02 19:00:00,2024-04-02 19:00:00
teamCity,Milwaukee,Washington
teamName,Bucks,Wizards
teamId,1610612749,1610612764
opponentTeamCity,Washington,Milwaukee
opponentTeamName,Wizards,Bucks
opponentTeamId,1610612764,1610612749
home,0,1
win,0,1


Missingness of df_teams

In [74]:
# missing percentage per column
cols_without_missing = []
for col in df_games_big.columns:
    missing_percent = df_games_big[col].isna().sum()/len(df_games_big)
    if missing_percent > 0.30: # if more than 30% of the column is missing, we want to drop it
        cols_without_missing.append(False)
    else: 
        cols_without_missing.append(True)

# removing the columns with too many missing values
df_games_big = df_games_big[df_games_big.columns[cols_without_missing]]

# __Dataset 4__: `Games.csv`
* This data also contains information on every NBA game. 
* Each row is a game, which specifies the home and away teams
* This is different than the previous dataset, which has two rows for each game.

In [75]:
games_small_data_path = os.path.join(notebook_directory,'data','Games.csv')
df_games_small = pd.read_csv(games_small_data_path)
np.shape(df_games_small)

  df_games_small = pd.read_csv(games_small_data_path)


(71418, 17)

In [76]:
df_games_small.sample(7)

Unnamed: 0,gameId,gameDate,hometeamCity,hometeamName,hometeamId,awayteamCity,awayteamName,awayteamId,homeScore,awayScore,winner,gameType,attendance,arenaId,gameLabel,gameSubLabel,seriesGameNumber
52500,28300042,1983-11-04 20:00:00,Philadelphia,76ers,1610612755,Milwaukee,Bucks,1610612749,94,104,1610612749,Regular Season,13245.0,12,,,
23778,20700297,2007-12-09 18:00:00,Portland,Trail Blazers,1610612757,Milwaukee,Bucks,1610612749,117,113,1610612757,Regular Season,18317.0,51,,,
56683,27800740,1979-03-11 20:00:00,Indiana,Pacers,1610612754,Phoenix,Suns,1610612756,112,111,1610612754,Regular Season,14173.0,33,,,
34423,29900307,1999-12-14 22:00:00,Seattle,SuperSonics,1610612760,Denver,Nuggets,1610612743,109,84,1610612760,Regular Season,11879.0,72,,,
58357,27700015,1977-10-21 20:00:00,Seattle,SuperSonics,1610612760,Portland,Trail Blazers,1610612757,99,106,1610612757,Regular Season,12156.0,72,,,
52979,28200557,1983-02-06 20:00:00,Seattle,SuperSonics,1610612760,Philadelphia,76ers,1610612755,96,97,1610612755,Regular Season,22217.0,72,,,
7831,21801023,2019-03-14 20:00:00,Toronto,Raptors,1610612761,Los Angeles,Lakers,1610612747,111,98,1610612761,Regular Season,19962.0,643,,,


# Consolidating Datasets 3 and 4
* Dataset 3: `TeamStatistics.csv` has 142,836 rows, 2 per game
* Dataset 4: `Games.csv` has 71,418 rows, 1 per game

In [77]:
# both datasets with games have a game ID and a game date
print(f"Shared columns:\n\n\t{df_games_small.columns[df_games_small.columns.isin(df_games_big.columns)]}\n\n") 

# columns of df_games_big NOT in df_games
print(f"Columns of big df not in smaller:\n\n\t{df_games_big.columns[~df_games_big.columns.isin(df_games_small.columns)]}\n\n")

 # columns of df_games NOT in df_games_big
print(f"Columns of small df not in big:\n\n\t{df_games_small.columns[~df_games_small.columns.isin(df_games_big.columns)]}\n\n")

Shared columns:

	Index(['gameId', 'gameDate'], dtype='object')


Columns of big df not in smaller:

	Index(['teamCity', 'teamName', 'teamId', 'opponentTeamCity',
       'opponentTeamName', 'opponentTeamId', 'home', 'win', 'teamScore',
       'opponentScore', 'assists', 'blocks', 'steals', 'fieldGoalsAttempted',
       'fieldGoalsMade', 'fieldGoalsPercentage', 'threePointersAttempted',
       'threePointersMade', 'threePointersPercentage', 'freeThrowsAttempted',
       'freeThrowsMade', 'freeThrowsPercentage', 'reboundsDefensive',
       'reboundsOffensive', 'reboundsTotal', 'foulsPersonal', 'turnovers',
       'plusMinusPoints', 'numMinutes'],
      dtype='object')


Columns of small df not in big:

	Index(['hometeamCity', 'hometeamName', 'hometeamId', 'awayteamCity',
       'awayteamName', 'awayteamId', 'homeScore', 'awayScore', 'winner',
       'gameType', 'attendance', 'arenaId', 'gameLabel', 'gameSubLabel',
       'seriesGameNumber'],
      dtype='object')




In [78]:
# subsetting by home teams
df_games_big_home = df_games_big[df_games_big.home == 1]
print(f"Home teams data size: {df_games_big_home.shape}")

# subsetting by away teams
df_games_big_away = df_games_big[df_games_big.home == 0]
print(f"Away teams data size: {df_games_big_away.shape}")


Home teams data size: (71418, 31)
Away teams data size: (71418, 31)


In [79]:
# merging home and away teams dataframes
df_games_wide = df_games_big_home.merge(right=df_games_big_away,how='inner',on='gameId',suffixes=('_home','_away'))
print(df_games_wide.shape)
df_games_wide.sample(6)


(71418, 61)


Unnamed: 0,gameId,gameDate_home,teamCity_home,teamName_home,teamId_home,opponentTeamCity_home,opponentTeamName_home,opponentTeamId_home,home_home,win_home,...,freeThrowsAttempted_away,freeThrowsMade_away,freeThrowsPercentage_away,reboundsDefensive_away,reboundsOffensive_away,reboundsTotal_away,foulsPersonal_away,turnovers_away,plusMinusPoints_away,numMinutes_away
14062,21400372,2014-12-17 19:30:00,Detroit,Pistons,1610612765,Dallas,Mavericks,1610612742,1,0,...,35.0,27.0,0.771,36.0,9.0,45.0,15.0,14.0,11.0,240.0
8782,21800070,2018-10-26 20:00:00,Houston,Rockets,1610612745,Los Angeles,Clippers,1610612746,1,0,...,38.0,35.0,0.921,39.0,5.0,44.0,21.0,16.0,20.0,240.0
8852,21800002,2018-10-16 22:30:00,Golden State,Warriors,1610612744,Oklahoma City,Thunder,1610612760,1,1,...,37.0,24.0,0.649,29.0,16.0,45.0,21.0,15.0,-8.0,240.0
61329,27300315,1973-12-28 20:00:00,Los Angeles,Lakers,1610612747,Phoenix,Suns,1610612756,1,1,...,29.0,21.0,0.724,,,,34.0,,-12.0,240.0
67577,26200021,1962-10-27 19:00:00,Boston,Celtics,1610612738,Syracuse,Nationals,1610612755,1,1,...,,26.0,,,,,30.0,,-20.0,0.0
10421,21601189,2017-04-08 18:00:00,Charlotte,Hornets,1610612766,Boston,Celtics,1610612738,1,0,...,17.0,14.0,0.824,38.0,5.0,43.0,19.0,9.0,7.0,240.0


In [80]:
# merging df_games and home and away teams (df_games_wide) dataframes to make one big df_games dataframe
df_games = df_games_small.merge(right=df_games_wide,how='inner',on='gameId',suffixes=('_teams','_games'))
print(df_games.shape)
df_games.sample(7)

(71418, 77)


Unnamed: 0,gameId,gameDate,hometeamCity,hometeamName,hometeamId,awayteamCity,awayteamName,awayteamId,homeScore,awayScore,...,freeThrowsAttempted_away,freeThrowsMade_away,freeThrowsPercentage_away,reboundsDefensive_away,reboundsOffensive_away,reboundsTotal_away,foulsPersonal_away,turnovers_away,plusMinusPoints_away,numMinutes_away
62064,27200286,1972-12-29 19:00:00,Baltimore,Bullets,1610612764,Buffalo,Braves,1610612746,109,118,...,23.0,20.0,0.87,,,,22.0,,9.0,240.0
5162,42000161,2021-05-22 22:30:00,Denver,Nuggets,1610612743,Portland,Trail Blazers,1610612757,109,123,...,19.0,18.0,0.947,29.0,10.0,39.0,14.0,6.0,14.0,240.0
38180,29501113,1996-04-12 20:00:00,Washington,Bullets,1610612764,Minnesota,Timberwolves,1610612750,116,106,...,23.0,16.0,0.696,28.0,18.0,46.0,25.0,16.0,-10.0,240.0
32874,20000590,2001-01-23 19:30:00,Orlando,Magic,1610612753,Toronto,Raptors,1610612761,116,111,...,48.0,38.0,0.792,31.0,16.0,47.0,28.0,17.0,-5.0,290.0
8940,41700225,2018-05-08 20:00:00,Houston,Rockets,1610612745,Utah,Jazz,1610612762,112,102,...,21.0,16.0,0.762,30.0,11.0,41.0,18.0,12.0,-10.0,240.0
49601,28500907,1986-04-08 20:00:00,Golden State,Warriors,1610612744,Dallas,Mavericks,1610612742,129,115,...,26.0,20.0,0.769,31.0,7.0,38.0,18.0,12.0,-14.0,240.0
11931,21501092,2016-03-27 15:30:00,Los Angeles,Clippers,1610612746,Denver,Nuggets,1610612743,105,90,...,13.0,9.0,0.692,24.0,20.0,44.0,19.0,15.0,-15.0,240.0


In [None]:
# print(df_games.columns.tolist())
# columns to drop
drop_cols = ['hometeamCity',
            'awayteamCity',
            'hometeamId',
            'awayteamCity',
            'awayteamId',
            'gameDate_home',
            'teamCity_home',
            'teamName_home',
            'teamId_home',
            'opponentTeamCity_home',
            'opponentTeamName_home',
            'opponentTeamId_home',
            'home_home',
            'teamScore_home',
            'opponentScore_home',
            'numMinutes_home',
            'gameDate_away',
            'teamCity_away',
            'teamName_away',
            'teamId_away',
            'opponentTeamCity_away',
            'opponentTeamName_away',
            'opponentTeamId_away',
            'home_away',
            'win_away',
            'teamScore_away',
            'opponentScore_away',
            'numMinutes_away']

df_games['gameDate'] = pd.to_datetime(df_games['gameDate'])

df_games.drop(columns=drop_cols,inplace=True) # dropping redundant columns
# dropping redundant columns
print(f"New dimensions of df_games: {df_games.shape}\n")

print(f"Remaining columns:\n\n\t{df_games.columns.tolist()}")
df_games.head()

New dimensions of df_games: (71418, 50)

Remaining columns:

	['gameId', 'gameDate', 'hometeamName', 'awayteamName', 'homeScore', 'awayScore', 'winner', 'gameType', 'attendance', 'arenaId', 'gameLabel', 'gameSubLabel', 'seriesGameNumber', 'win_home', 'assists_home', 'blocks_home', 'steals_home', 'fieldGoalsAttempted_home', 'fieldGoalsMade_home', 'fieldGoalsPercentage_home', 'threePointersAttempted_home', 'threePointersMade_home', 'threePointersPercentage_home', 'freeThrowsAttempted_home', 'freeThrowsMade_home', 'freeThrowsPercentage_home', 'reboundsDefensive_home', 'reboundsOffensive_home', 'reboundsTotal_home', 'foulsPersonal_home', 'turnovers_home', 'plusMinusPoints_home', 'assists_away', 'blocks_away', 'steals_away', 'fieldGoalsAttempted_away', 'fieldGoalsMade_away', 'fieldGoalsPercentage_away', 'threePointersAttempted_away', 'threePointersMade_away', 'threePointersPercentage_away', 'freeThrowsAttempted_away', 'freeThrowsMade_away', 'freeThrowsPercentage_away', 'reboundsDefensive_aw

***

# __Multilayer Perceptron__

In [89]:
# Display basic information about the DataFrame
df_games.info()

# Display summary statistics for numerical columns
df_games.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71418 entries, 0 to 71417
Data columns (total 50 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   gameId                        71418 non-null  int64         
 1   gameDate                      71418 non-null  datetime64[ns]
 2   hometeamName                  71418 non-null  object        
 3   awayteamName                  71418 non-null  object        
 4   homeScore                     71418 non-null  int64         
 5   awayScore                     71418 non-null  int64         
 6   winner                        71418 non-null  int64         
 7   gameType                      71418 non-null  object        
 8   attendance                    66390 non-null  float64       
 9   arenaId                       71418 non-null  int64         
 10  gameLabel                     3702 non-null   object        
 11  gameSubLabel                

Unnamed: 0,gameId,gameDate,homeScore,awayScore,winner,attendance,arenaId,seriesGameNumber,win_home,assists_home,...,threePointersPercentage_away,freeThrowsAttempted_away,freeThrowsMade_away,freeThrowsPercentage_away,reboundsDefensive_away,reboundsOffensive_away,reboundsTotal_away,foulsPersonal_away,turnovers_away,plusMinusPoints_away
count,71418.0,71418,71418.0,71418.0,71418.0,66390.0,71418.0,5277.0,71418.0,53876.0,...,50727.0,68872.0,71411.0,68869.0,50681.0,50754.0,54044.0,69426.0,51011.0,70697.0
mean,25827660.0,1996-05-26 19:48:01.445013632,105.794114,102.207161,1610613000.0,14949.441121,32986.84,2.660413,0.617407,23.99703,...,0.337805,25.969334,19.767151,0.753696,30.362799,11.612425,42.172026,23.029067,15.118131,-3.398419
min,10300000.0,1946-11-26 19:00:00,0.0,0.0,1610613000.0,9.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.143,4.0,0.0,0.0,0.0,0.0,-73.0
25%,21300470.0,1982-04-28 20:00:00,96.0,93.0,1610613000.0,11896.0,5.0,1.0,0.0,20.0,...,0.25,20.0,15.0,0.69,26.0,9.0,38.0,19.0,12.0,-12.0
50%,26400000.0,1998-04-04 09:00:00,106.0,102.0,1610613000.0,16133.0,44.0,3.0,1.0,24.0,...,0.333,25.0,19.0,0.759,30.0,11.0,42.0,23.0,15.0,-4.0
75%,28700620.0,2012-03-11 19:30:00,115.0,111.0,1610613000.0,18676.0,135.0,4.0,1.0,28.0,...,0.423,31.0,24.0,0.821,34.0,14.0,47.0,26.0,18.0,6.0
max,62400000.0,2025-03-19 21:00:00,184.0,186.0,1610613000.0,200049.0,1000144.0,7.0,1.0,53.0,...,1.0,91.0,57.0,5.25,60.0,40.0,92.0,115.0,40.0,68.0
std,6406806.0,,14.24303,13.856131,8.638285,5165.007849,178383.4,1.972684,0.486023,5.682109,...,0.14397,8.558493,6.885382,0.102578,5.590181,4.246463,6.905153,5.229507,4.284959,13.102197


In [None]:
ncol = df_games.shape[1]
y = df_games['homeScore']
X = df_games.drop(columns=['homeScore'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

# __Random Forest Regressor__

In [None]:
team1 = input("Enter Team 1:")
team2 = input("Enter Team 2:")

team1_df = df_teams[(df_teams['teamName'] == team1) & (df_teams['opponentTeamName'] == team2)]
team1_df

team2_df = df_teams[(df_teams['teamName'] == team2) & (df_teams['opponentTeamName'] == team1)]
team2_df

#model for team1
features = ['assists', 'blocks', 'steals', 'fieldGoalsMade', 'fieldGoalsPercentage', 'threePointersMade', 'freeThrowsMade', 'reboundsTotal', 'turnovers']
x = team1_df[features]
y = team1_df['teamScore']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

#model for team2
features2 = ['assists', 'blocks', 'steals', 'fieldGoalsMade', 'fieldGoalsPercentage', 'threePointersMade', 'freeThrowsMade', 'reboundsTotal', 'turnovers']
x = team2_df[features]
y = team2_df['teamScore']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model2 = RandomForestRegressor(n_estimators=100, random_state=42)
model2.fit(x_train, y_train)

In [None]:
team1_score = team1_df[['assists', 'blocks', 'steals', 'fieldGoalsMade', 'fieldGoalsPercentage', 'threePointersMade', 'freeThrowsMade', 'reboundsTotal', 'turnovers']]
team2_score = team2_df[['assists', 'blocks', 'steals', 'fieldGoalsMade', 'fieldGoalsPercentage', 'threePointersMade', 'freeThrowsMade', 'reboundsTotal', 'turnovers']]

predicted_score1 = model.predict(team1_score)[0]
predicted_score2 = model2.predict(team2_score)[0]

print(f'{team1} Score: {predicted_score1}')
print(f'{team2} Score: {predicted_score2}')

Bulls Score: 107.29
Celtics Score: 122.48
