In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [2]:
data = pd.read_csv('finalised_NBA_Data.csv')

In [3]:
data.dtypes

TEAM_ABBREVIATION         object
TEAM_NAME                 object
GAME_DATE                 object
MATCHUP                   object
WL                        object
MIN                        int64
PTS                        int64
FGM                        int64
FGA                        int64
FG_PCT                   float64
FG3M                       int64
FG3A                       int64
FG3_PCT                  float64
FTM                        int64
FTA                        int64
FT_PCT                   float64
OREB                       int64
DREB                       int64
REB                        int64
AST                        int64
STL                        int64
BLK                        int64
TOV                        int64
PF                         int64
PLUS_MINUS               float64
HOME_TEAM                 object
AWAY_TEAM                 object
PPM                      float64
POSSESSIONS                int64
PTSperP                  float64
EFGPCT    

In [4]:
# Drop columns that are not needed / provide identifiable information:
data.drop(['MIN','TEAM_ABBREVIATION','GAME_DATE','TEAM_NAME', 'MATCHUP', 'HOME_TEAM','AWAY_TEAM'], axis=1, inplace=True)

In [5]:
# Replace the target variable (WL) with a binary value:

data.replace({'W':1 , 'L':0}, inplace=True)

In [6]:
data.head()

Unnamed: 0,WL,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,...,PLUS_MINUS,PPM,POSSESSIONS,PTSperP,EFGPCT,PACE,HOME,Scoring_Opportunities,Defensive_Index,TO_to_AST_Ratio
0,1,106,38,89,0.427,13,39,0.333,17,20,...,18.0,2.21,83,1.277,50.0,1.729,1,148,47,0.28
1,0,88,35,78,0.449,11,37,0.297,7,13,...,-18.0,1.83,75,1.173,51.923,1.562,0,128,36,0.722222
2,1,122,46,91,0.505,15,37,0.405,15,22,...,38.0,2.54,85,1.435,58.791,1.771,1,150,48,0.380952
3,1,106,38,82,0.463,17,46,0.37,13,14,...,7.0,2.21,80,1.325,56.707,1.667,0,142,40,0.346154
4,0,99,38,86,0.442,9,25,0.36,14,16,...,-7.0,2.06,84,1.179,49.419,1.75,1,127,42,0.533333


In [7]:
# Checking if any NULL values are present in the dataset:

na_rows = data.isnull().sum()

print(na_rows)

WL                       0
PTS                      0
FGM                      0
FGA                      0
FG_PCT                   0
FG3M                     0
FG3A                     0
FG3_PCT                  0
FTM                      0
FTA                      0
FT_PCT                   0
OREB                     0
DREB                     0
REB                      0
AST                      0
STL                      0
BLK                      0
TOV                      0
PF                       0
PLUS_MINUS               0
PPM                      0
POSSESSIONS              0
PTSperP                  0
EFGPCT                   0
PACE                     0
HOME                     0
Scoring_Opportunities    0
Defensive_Index          0
TO_to_AST_Ratio          0
dtype: int64


In [8]:
# As no NULL values are present, we can proceed with the rest of the data preprocessing,
# starting with normalizing the data to ensure that all features are on the same scale (0-1):

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

cols_to_scale = ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'OREB', 'DREB', 
                 'REB', 'AST', 'TOV', 'STL', 'BLK', 'PF', 'PLUS_MINUS', 'PPM', 'POSSESSIONS',
                 'PTSperP', 'EFGPCT', 'PACE', 'Scoring_Opportunities', 'Defensive_Index']

# Apply the scaler to the selected columns
data[cols_to_scale] = scaler.fit_transform(data[cols_to_scale])

# Display the first few rows to verify scaling
print(data.head())



   WL       PTS       FGM       FGA  FG_PCT      FG3M      FG3A  FG3_PCT  \
0   1  0.484848  0.447368  0.622642   0.427  0.481481  0.603175    0.333   
1   0  0.303030  0.368421  0.415094   0.449  0.407407  0.571429    0.297   
2   1  0.646465  0.657895  0.660377   0.505  0.555556  0.571429    0.405   
3   1  0.484848  0.447368  0.490566   0.463  0.629630  0.714286    0.370   
4   0  0.414141  0.447368  0.566038   0.442  0.333333  0.380952    0.360   

     FTM       FTA  ...  PLUS_MINUS       PPM  POSSESSIONS   PTSperP  \
0  0.425  0.370370  ...    0.686869  0.485437     0.543860  0.411052   
1  0.175  0.240741  ...    0.323232  0.300971     0.403509  0.332324   
2  0.375  0.407407  ...    0.888889  0.645631     0.578947  0.530659   
3  0.325  0.259259  ...    0.575758  0.485437     0.491228  0.447388   
4  0.350  0.296296  ...    0.434343  0.412621     0.561404  0.336866   

     EFGPCT      PACE  HOME  Scoring_Opportunities  Defensive_Index  \
0  0.437531  0.543771     1            

In [9]:
data.to_csv('Normalised_NBA_Data.csv', index=False)