In [1]:
#PREPROCESS AND FEATURE ENGINEERING

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler ,LabelEncoder
from sklearn.impute import SimpleImputer

# DATASET
data = pd.read_csv(r"C:\Users\nh013\Desktop\NBA DATASET\GAMES.csv")

# DROP UNNEEDED COLUMNS
columns_to_drop = ['SEASON_ID', 'TEAM_ID', 'GAME_ID', 'GAME_DATE', 'MATCHUP']
data = data.drop(columns_to_drop, axis=1)

# FEATURE 
categorical_features = ['TEAM_ABBREVIATION', 'TEAM_NAME', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
                        'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS']

#LABELENCODER
label_encoder = LabelEncoder()

# CONVERT CATEGORICAL FEATURE TO NUMERIC
for feature in categorical_features:
    data[feature] = label_encoder.fit_transform(data[feature])

# NUMERICAL FEATURE TO FLOAT
data = data.astype(float)

# FILLED MISSING VALUE WITH CONVERTED NUMERIC FORM
categorical_features_to_fill = ['TEAM_ABBREVIATION', 'TEAM_NAME', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
                        'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS']

# SIMPLEIMPUTER IN CATEGORICAL FEATURE
categorical_imputer = SimpleImputer(strategy='most_frequent')

data[categorical_features_to_fill] = categorical_imputer.fit_transform(data[categorical_features_to_fill])

# IDENTIFY MISSING VALUES
print(data.isnull().sum())

# DROP ROWS WITH MISSING VALUES
data.dropna(subset=categorical_features, inplace=True)

# FILL MISSING VALUES WITH FORWARD FILL
data.fillna(method='ffill', inplace=True)

# FILL MISSING VALUES WITH BACKWARD FILL
data.fillna(method='bfill', inplace=True)

# CONVERT NUMERICAL COLUMNS TO NUMERIC
data[categorical_features] = data[categorical_features].apply(pd.to_numeric, errors='coerce')

# FILL MISSING VALUES WITH MEAN
mean = data[categorical_features].mean()
data[categorical_features].fillna(mean, inplace=True)

# NORMALIZE AND SCALE 
scaler = MinMaxScaler()
data[categorical_features] = scaler.fit_transform(data[categorical_features])

# FEATURE ENGINEERING: REBUNDING PERFORMANCE
data['OREB/G'] = data['OREB'] / data['MIN']
data['DREB/G'] = data['DREB'] / data['MIN']

# FEATURE ENGINEERING: ASSIST TURN OVER RASIO
data['AST_TOV_RATIO'] = data['AST'] / data['TOV']

# FEATURE ENGINEERING: FREE THROW EFFICENCY
data['FT_PCT'] = data['FTM'] / data['FTA']

# FEATURE ENGINEERING: POINT DIFFERENTIAL
data['POINT_DIFF'] = data['PTS'] - data['PLUS_MINUS']

# FEATURE ENGINEERING:STEALS AND BLOCK
data['STL/G'] = data['STL'] / data['MIN']
data['BLK/G'] = data['BLK'] / data['MIN']

# FEATURE ENGINEERING: FOUL RATE
data['FOUL_RATE'] = data['PF'] / data['MIN']


#DROP ROWS WITH ANY NAN VALUES
data.dropna(inplace=True)  
data.replace([np.inf, -np.inf], np.nan, inplace=True)  
data.dropna(inplace=True) 



print("Rebounding Performance (OREB/G and DREB/G):")
print(data[['OREB/G', 'DREB/G']].head())

print("\nAssist-Turnover Ratio (AST_TOV_RATIO):")
print(data['AST_TOV_RATIO'].head())

print("\nFree Throw Efficiency (FT_PCT):")
print(data['FT_PCT'].head())

print("\nPoint Differential (POINT_DIFF):")
print(data['POINT_DIFF'].head())

print("\nSteals per Game (STL/G):")
print(data['STL/G'].head())

print("\nBlocks per Game (BLK/G):")
print(data['BLK/G'].head())

print("\nFoul Rate (FOUL_RATE):")
print(data['FOUL_RATE'].head())



# USING GROUPBY METHOD TO CALCULATE W AND L
team_stats = data.groupby(['TEAM_NAME', 'WL']).size().unstack(fill_value=0)
team_stats.rename(columns={'W': 'Wins', 'L': 'Losses'}, inplace=True)

print(team_stats)





print(data)


TEAM_ABBREVIATION    0
TEAM_NAME            0
WL                   0
MIN                  0
PTS                  0
FGM                  0
FGA                  0
FG_PCT               0
FG3M                 0
FG3A                 0
FG3_PCT              0
FTM                  0
FTA                  0
FT_PCT               0
OREB                 0
DREB                 0
REB                  0
AST                  0
STL                  0
BLK                  0
TOV                  0
PF                   0
PLUS_MINUS           0
dtype: int64
Rebounding Performance (OREB/G and DREB/G):
     OREB/G    DREB/G
2  1.992157  1.834881
3  0.747059  1.572755
5  0.564444  0.891228
6  0.705556  1.114035
7  0.793750  0.612719

Assist-Turnover Ratio (AST_TOV_RATIO):
2    4.183673
3    1.004082
5    1.115646
6    0.585714
7    0.590636
Name: AST_TOV_RATIO, dtype: float64

Free Throw Efficiency (FT_PCT):
2    0.833333
3    0.416667
5    0.833333
6    1.250000
7    0.625000
Name: FT_PCT, dtype: float64

Poi