# **Introduction**
This is the proposed machine learning model to predict the outcome of a shot in the playoffs based on a number of factors in the dataset(Player,Location, Shot Type etc).<br>
The approach shows the power of machine learning and random forest classifier to predict very dynamic events.<br>
This shot prediction model is based on feature engineering and helps to display the performance of the players and get an intuitive ranking of their performance. 

#Importing packages and Dataset

In [None]:
#Importing required packages.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

import os
#print(os.listdir("playoff_shots.csv")) 

In [None]:
#Loading dataset
nba_df = pd.read_csv("playoff_shots.csv")
nba_df.head()

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,Shot Chart Detail,41700131,149,2544,LeBron James,1610612739,Cleveland Cavaliers,1,1,24,Missed Shot,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,26,-168,206,1,0,20180415,CLE,IND
1,Shot Chart Detail,41700131,153,2544,LeBron James,1610612739,Cleveland Cavaliers,1,1,5,Missed Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,3,1,1,0,20180415,CLE,IND
2,Shot Chart Detail,41700131,155,2544,LeBron James,1610612739,Cleveland Cavaliers,1,0,58,Missed Shot,Putback Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,-6,1,0,20180415,CLE,IND
3,Shot Chart Detail,41700131,187,2544,LeBron James,1610612739,Cleveland Cavaliers,2,11,8,Made Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,-8,13,1,1,20180415,CLE,IND
4,Shot Chart Detail,41700131,195,2544,LeBron James,1610612739,Cleveland Cavaliers,2,10,22,Made Shot,Running Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,2,10,1,1,20180415,CLE,IND


#Clean data

In [None]:
#Drop non-numerical data fields that statistically irrelevant or covered in another column
nba_df.drop(['GRID_TYPE', 'GAME_ID', 'LOC_X', 'LOC_Y', 'GAME_DATE', 'PLAYER_NAME', 
             'HTM', 'VTM'], inplace=True, axis=1)
'''
'TEAM_NAME', 'ACTION_TYPE', ----don't remove
'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 'SHOT_ZONE_AREA', 'SHOT_DISTANCE', 
'PERIOD', 'MINUTES_REMAINING', 'SECONDS_REMAINING'
''' 
nba_df.head()

Unnamed: 0,GAME_EVENT_ID,PLAYER_ID,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG
0,149,2544,1610612739,Cleveland Cavaliers,1,1,24,Missed Shot,Jump Shot,3PT Field Goal,Above the Break 3,Left Side Center(LC),24+ ft.,26,1,0
1,153,2544,1610612739,Cleveland Cavaliers,1,1,5,Missed Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,1,0
2,155,2544,1610612739,Cleveland Cavaliers,1,0,58,Missed Shot,Putback Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,1,0
3,187,2544,1610612739,Cleveland Cavaliers,2,11,8,Made Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,1,1
4,195,2544,1610612739,Cleveland Cavaliers,2,10,22,Made Shot,Running Finger Roll Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,1,1,1


In [None]:
#check for missing values
print(pd.isnull(nba_df).sum())

GAME_EVENT_ID          0
PLAYER_ID              0
TEAM_ID                0
TEAM_NAME              0
PERIOD                 0
MINUTES_REMAINING      0
SECONDS_REMAINING      0
EVENT_TYPE             0
ACTION_TYPE            0
SHOT_TYPE              0
SHOT_ZONE_BASIC        0
SHOT_ZONE_AREA         0
SHOT_ZONE_RANGE        0
SHOT_DISTANCE          0
SHOT_ATTEMPTED_FLAG    0
SHOT_MADE_FLAG         0
dtype: int64


In [None]:
#Quantify Shot_Type 
shot_type_mapping = {'3PT Field Goal': 3, '2PT Field Goal': 2}
nba_df['SHOT_TYPE'] = nba_df['SHOT_TYPE'].map(shot_type_mapping)
nba_df['SHOT_TYPE'].head(5) 

0    3
1    2
2    2
3    2
4    2
Name: SHOT_TYPE, dtype: int64

In [None]:
#Quantify Action Type
action_type_mapping = {'Alley Oop Dunk Shot': 0, 'Alley Oop Layup shot': 1, 'Cutting Dunk Shot': 2, 'Cutting Finger Roll Layup Shot': 3, 
                     'Cutting Layup Shot': 4, 'Driving Bank Hook Shot': 5, 'Driving Dunk Shot': 6, 'Driving Finger Roll Layup Shot': 7, 
                     'Driving Floating Bank Jump Shot': 8, 'Driving Floating Jump Shot': 9, 'Driving Hook Shot': 10, 
                     'Driving Layup Shot': 11, 'Driving Reverse Dunk Shot': 12, 'Driving Reverse Layup Shot': 13, 'Dunk Shot': 14, 
                     'Fadeaway Jump Shot': 15, 'Finger Roll Layup Shot': 16, 'Floating Jump shot': 17, 'Hook Bank Shot': 18, 
                     'Hook Shot': 19, 'Jump Bank Shot': 20, 'Jump Shot': 21, 'Layup Shot': 22, 'Pullup Jump shot': 23, 
                     'Putback Dunk Shot': 24, 'Putback Layup Shot': 25, 'Reverse Dunk Shot': 26, 'Reverse Layup Shot': 27, 
                     'Running Alley Oop Dunk Shot': 28, 'Running Alley Oop Layup Shot': 29, 'Running Dunk Shot': 30, 
                     'Running Finger Roll Layup Shot': 31, 'Running Jump Shot': 32, 'Running Layup Shot': 33, 
                     'Running Pull-Up Jump Shot': 34, 'Running Reverse Dunk Shot': 35, 'Running Reverse Layup Shot': 36, 
                     'Step Back Bank Jump Shot': 37, 'Step Back Jump shot': 38, 'Tip Dunk Shot': 39, 'Tip Layup Shot': 40, 
                     'Turnaround Bank Hook Shot': 41, 'Turnaround Fadeaway Bank Jump Shot': 42, 'Turnaround Fadeaway shot': 43, 
                     'Turnaround Hook Shot': 44, 'Turnaround Jump Shot': 45} 
nba_df['ACTION_TYPE'] = nba_df['ACTION_TYPE'].map(action_type_mapping)
nba_df['ACTION_TYPE'].head(5)

0    21
1    11
2    25
3    11
4    31
Name: ACTION_TYPE, dtype: int64

In [None]:
#Quantify Team Name 
team_name_mapping = {'Boston Celtics': 0, 'Cleveland Cavaliers': 1, 'Golden State Warriors': 2, 'Houston Rockets': 3, 
                     'Indiana Pacers': 4, 'Miami Heat': 5, 'Milwaukee Bucks': 6, 'Minnesota Timberwolves': 7, 
                     'New Orleans Pelicans': 8, 'Oklahoma City Thunder': 9, 'Philadelphia 76ers': 10, 'Portland Trail Blazers': 11, 
                     'San Antonio Spurs': 12, 'Toronto Raptors': 13, 'Utah Jazz': 14, 'Washington Wizards': 15} 
nba_df['TEAM_NAME'] = nba_df['TEAM_NAME'].map(team_name_mapping)
nba_df['TEAM_NAME'].head(5) 

0    1
1    1
2    1
3    1
4    1
Name: TEAM_NAME, dtype: int64

In [None]:
#Quantify Shot Zone Area
#shot_zone_area_mapping = {'Back Court(BC)': 0, 'Left Side(L)': 1, 'Left Side Center(LC)': 2, 'Center(C)': 3, 
#'Right Side Center(RC)': 4, 'Right Side(R)': 5}
#nba_df['SHOT_ZONE_AREA'] = nba_df['SHOT_ZONE_AREA'].map(shot_zone_area_mapping)
#nba_df['SHOT_ZONE_AREA'].head(5)

In [None]:
#Quantify Shot Zone Basic
shot_zone_basic_mapping = {'Backcourt': 0, 'Left Corner 3': 1,'Right Corner 3': 2, 'Above the Break 3': 3, 
                           'Mid-Range': 4, 'In The Paint (Non-RA)': 5, 'Restricted Area': 6}
nba_df['SHOT_ZONE_BASIC'] = nba_df['SHOT_ZONE_BASIC'].map(shot_zone_basic_mapping)
nba_df['SHOT_ZONE_BASIC'].head(5) 

0    3
1    6
2    6
3    6
4    6
Name: SHOT_ZONE_BASIC, dtype: int64

In [None]:
#Quantify Shot Zone Range

shot_zone_range_mapping = {'Less Than 8 ft.': 8, '8-16 ft.': 16, '16-24 ft.': 24,  
                           '24+ ft.': 30, 'Back Court Shot': 50}
nba_df['SHOT_ZONE_RANGE'] = nba_df['SHOT_ZONE_RANGE'].map(shot_zone_range_mapping)
nba_df['SHOT_ZONE_RANGE'].head(5) 

0    30
1     8
2     8
3     8
4     8
Name: SHOT_ZONE_RANGE, dtype: int64

In [None]:
#Quantify Shot Zone Area
shot_zone_area_mapping = {'Back Court(BC)': 0, 'Center(C)': 1,'Left Side Center(LC)': 2, 'Left Side(L)': 3, 
                           'Right Side Center(RC)': 4, 'Right Side(R)': 5}
nba_df['SHOT_ZONE_AREA'] = nba_df['SHOT_ZONE_AREA'].map(shot_zone_area_mapping)
nba_df['SHOT_ZONE_AREA'].head(5) 

0    2
1    1
2    1
3    1
4    1
Name: SHOT_ZONE_AREA, dtype: int64

In [None]:
#Quantify Event Type
event_type_basic_mapping = {'Missed Shot': 0, 'Made Shot': 1}
nba_df['EVENT_TYPE'] = nba_df['EVENT_TYPE'].map(event_type_basic_mapping)
nba_df['EVENT_TYPE'].head(5) 

0    0
1    0
2    0
3    1
4    1
Name: EVENT_TYPE, dtype: int64

In [None]:
nba_df.to_csv('feature_eng_dataset.csv', index=False)
#nba_df.to_csv('feature_eng_dataset.csv')

In [None]:
#Create dummy variable for shot type
shot_dummy = pd.get_dummies(nba_df['SHOT_TYPE'])
nba_df = pd.concat([nba_df,shot_dummy], axis = 1)
nba_df.drop(['SHOT_TYPE'], inplace=True, axis=1)
nba_df.head()

Unnamed: 0,GAME_EVENT_ID,PLAYER_ID,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,2,3
0,149,2544,1610612739,1,1,1,24,0,21,3,2,30,26,1,0,0,1
1,153,2544,1610612739,1,1,1,5,0,11,6,1,8,0,1,0,1,0
2,155,2544,1610612739,1,1,0,58,0,25,6,1,8,0,1,0,1,0
3,187,2544,1610612739,1,2,11,8,1,11,6,1,8,1,1,1,1,0
4,195,2544,1610612739,1,2,10,22,1,31,6,1,8,1,1,1,1,0


In [None]:
#Create dummy variable for shot zone basic
shot_dummy = pd.get_dummies(nba_df['SHOT_ZONE_BASIC'])
nba_df = pd.concat([nba_df,shot_dummy], axis = 1)
nba_df.drop(['SHOT_ZONE_BASIC'], inplace=True, axis=1)
nba_df.head()

Unnamed: 0,GAME_EVENT_ID,PLAYER_ID,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,2,3,0,1,2.1,3.1,4,5,6
0,149,2544,1610612739,1,1,1,24,0,21,2,30,26,1,0,0,1,0,0,0,1,0,0,0
1,153,2544,1610612739,1,1,1,5,0,11,1,8,0,1,0,1,0,0,0,0,0,0,0,1
2,155,2544,1610612739,1,1,0,58,0,25,1,8,0,1,0,1,0,0,0,0,0,0,0,1
3,187,2544,1610612739,1,2,11,8,1,11,1,8,1,1,1,1,0,0,0,0,0,0,0,1
4,195,2544,1610612739,1,2,10,22,1,31,1,8,1,1,1,1,0,0,0,0,0,0,0,1


#Feature engineering
Choose columns to use and drop columns accordingly.<br>
Add column player_score = (2 x shot_type='2') + (3 x shot_type='3')<br>
Shot type = 2 or 3<br><br>

shot% = (event type=0) / count(event_type); of that player 

In [None]:
#nba_df.drop(['GRID_TYPE'], inplace=True, axis=1)
#shot_type = 2 or 3

#Preview of the Dataset after Feature Engineering

In [None]:
nba_r2 = pd.read_csv("review2.csv")
nba_r2.head()

Unnamed: 0,GAME_EVENT_ID,PLAYER_ID,TEAM_ID,EVENT_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_RANGE,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG
0,149,2544,1610612739,Missed Shot,3PT Field Goal,Above the Break 3,24+ ft.,1,0
1,153,2544,1610612739,Missed Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1,0
2,155,2544,1610612739,Missed Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1,0
3,187,2544,1610612739,Made Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1,1
4,195,2544,1610612739,Made Shot,2PT Field Goal,Restricted Area,Less Than 8 ft.,1,1


In [None]:
nba_fe = pd.read_csv('feature_eng_dataset.csv', index_col=1)
#nba_fe.reset_index
#nba_fe.set_index("PLAYER_ID")
nba_fe.head()

Unnamed: 0_level_0,GAME_EVENT_ID,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG
PLAYER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2544,149,1610612739,1,1,1,24,0,21,3,3,2,30,26,1,0
2544,153,1610612739,1,1,1,5,0,11,2,6,1,8,0,1,0
2544,155,1610612739,1,1,0,58,0,25,2,6,1,8,0,1,0
2544,187,1610612739,1,2,11,8,1,11,2,6,1,8,1,1,1
2544,195,1610612739,1,2,10,22,1,31,2,6,1,8,1,1,1


#MODEL

In [None]:
#Split data to predict if the shot was made or missed
X = nba_df.drop('EVENT_TYPE', axis = 1)
y = nba_df['EVENT_TYPE']

#Train and Test splitting of data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [None]:
print(X_train)

      GAME_EVENT_ID  PLAYER_ID     TEAM_ID  TEAM_NAME  PERIOD  ...  2  3  4  5  6
4983              7     200755  1610612755         10       1  ...  1  0  0  0  0
818             440     201142  1610612744          2       3  ...  0  0  0  1  0
1665            167    1628369  1610612738          0       2  ...  0  0  1  0  0
611             635     201142  1610612744          2       4  ...  0  1  0  0  0
3872             82     201569  1610612745          3       1  ...  0  1  0  0  0
...             ...        ...         ...        ...     ...  ... .. .. .. .. ..
4426            330     201942  1610612761         13       3  ...  0  0  0  0  1
466             554       2544  1610612739          1       4  ...  0  0  0  0  1
3092            473     201567  1610612739          1       3  ...  0  1  0  0  0
3772            341    1628378  1610612762         14       3  ...  0  1  0  0  0
860             484     201142  1610612744          2       3  ...  0  1  0  0  0

[3583 rows x 22

In [None]:
print(y_train)

4983    0
818     0
1665    1
611     1
3872    0
       ..
4426    1
466     1
3092    1
3772    0
860     0
Name: EVENT_TYPE, Length: 3583, dtype: int64


In [None]:
#Predict through Random Forest Classifier
rfc = RandomForestClassifier(n_estimators=350)
rfc.fit(X_train, y_train) 
pred_rfc = rfc.predict(X_test) 

#Evaluation Metrics

In [None]:
# Check results
print(classification_report(y_test, pred_rfc)) 

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       820
           1       1.00      1.00      1.00       716

    accuracy                           1.00      1536
   macro avg       1.00      1.00      1.00      1536
weighted avg       1.00      1.00      1.00      1536

