In [6]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import time
from nba_py.team import TeamClutchSplits
from nba_py.team import TeamYearOverYearSplits, TeamList
%matplotlib inline

In [2]:
team_list = TeamList().info().head(30)

In [3]:
season_team = {}
for team in team_list['TEAM_ID']:
    df = TeamYearOverYearSplits(team,season_type='Playoffs').by_year()
    for index, row in df.iterrows():
        season_data =  season_team.get(row['GROUP_VALUE'])
        if season_data:
            if team not in season_team[row['GROUP_VALUE']]:
                season_team[row['GROUP_VALUE']].append(team)
        else:
            season_team[row['GROUP_VALUE']] = [team]
    time.sleep(2)

In [9]:
def playoff_team(team_id, season):
    if team_id in season_team[season]:
        return 1
    return 0

In [None]:
all_team_data = pd.DataFrame()
for season in season_team:
    for team in team_list['TEAM_ID']:
        team_data = TeamClutchSplits(team,season=season, measure_type = 'Base').last5min_plusminus_5point()
        team_data['PLAYOFFS'] = team_data.apply(lambda row: playoff_team(team,season),axis=1)
        all_team_data = pd.concat([all_team_data,team_data])
        time.sleep(2)

In [13]:
all_team_data.to_csv('clutch_stats_regular.csv')

In [14]:
all_team_data = pd.DataFrame()
for season in season_team:
    for team in team_list['TEAM_ID']:
        team_data = TeamClutchSplits(team, season=season, measure_type = 'Advanced').last5min_plusminus_5point()
        team_data['PLAYOFFS'] = team_data.apply(lambda row: playoff_team(team,season),axis=1)
        all_team_data = pd.concat([all_team_data,team_data])
        time.sleep(2)

In [17]:
all_team_data.to_csv('clutch_stats_adv.csv')

In [18]:
regular_stats = pd.read_csv('clutch_stats_regular.csv')
advs_stats = pd.read_csv('clutch_stats_adv.csv')

In [19]:
regular_features = regular_stats[['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
advs_features = advs_stats[['NET_RATING', 'AST_PCT', 'AST_TO',
       'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT',
       'TS_PCT', 'PACE', 'PIE']]

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(regular_features, regular_stats['PLAYOFFS'], test_size=0.30, random_state=69)

In [21]:
from sklearn.linear_model import LogisticRegression
regular_model = LogisticRegression()
regular_model.fit(X_train, y_train)
predictions = regular_model.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.46      0.24      0.32        99
          1       0.49      0.72      0.58        99

avg / total       0.47      0.48      0.45       198



In [22]:
X_train, X_test, y_train, y_test = train_test_split(advs_features, advs_stats['PLAYOFFS'], test_size=0.30, random_state=300)

In [23]:
adv_model = LogisticRegression()
adv_model.fit(X_train, y_train)
predictions = adv_model.predict(X_test)
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.44      0.38      0.41        90
          1       0.54      0.60      0.57       108

avg / total       0.49      0.50      0.49       198



In [None]:
def add_clutch_stats(team_id, season, measure):
    clutch_splits = TeamClutchSplits(team_id, season=season, measure_type=measure).last5min_plusminus_5point()
    clutch_splits = clutch_splits[['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    columns = clutch_splits.columns
    new_columns = []
    for column in columns:
        new_columns.append("c_"+column)
    clutch_splits.columns = new_columns
    return clutch_splits