In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import time
%matplotlib inline

In [2]:
import nba_py as nba

In [3]:
from nba_py.team import *

In [4]:
rockets = nba.team.TeamYearOverYearSplits(1610612745).by_year()
rockets.head(1)

Unnamed: 0,GROUP_SET,GROUP_VALUE,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,...,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,CFID,CFPARAMS
0,By Year,2017-18,74,60,14,0.811,48.2,39.1,84.5,0.462,...,4,3,11,6,7,8,2,1,210,2017-18


In [4]:
season_team = {}
for team in team_list['TEAM_ID']:
    df = TeamYearOverYearSplits(team,season_type='Playoffs').by_year()
    for index, row in df.iterrows():
        season_data =  season_team.get(row['GROUP_VALUE'])
        if season_data:
            if team not in season_team[row['GROUP_VALUE']]:
                season_team[row['GROUP_VALUE']].append(team)
        else:
            season_team[row['GROUP_VALUE']] = [team]
    time.sleep(2)

In [5]:
def playoff_team(team_id, season):
    if team_id in season_team[season]:
        return 1
    return 0

In [6]:
all_team_data = pd.DataFrame()
for team in team_list['TEAM_ID']:
    team_data = TeamYearOverYearSplits(team,measure_type = 'Advanced').by_year()
    team_data['PLAYOFFS'] = team_data.apply(lambda row: playoff_team(team,row['GROUP_VALUE']),axis=1)
    all_team_data = pd.concat([all_team_data,team_data])
    time.sleep(2)

KeyError: ('2017-18', 'occurred at index 0')

In [14]:
all_team_data = pd.DataFrame()
for team in team_list['TEAM_ID']:
    team_data = TeamYearOverYearSplits(team).by_year()
    team_data['PLAYOFFS'] = team_data.apply(lambda row: playoff_team(team,row['GROUP_VALUE']),axis=1)
    all_team_data = pd.concat([all_team_data,team_data])
    time.sleep(2)

In [5]:
regular_stats = pd.read_csv('all_team_playoffs.csv')
advs_stats = pd.read_csv('all_team_playoffs_adv.csv')

In [6]:
regular_features = regular_stats[['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']]

In [7]:
advs_features = advs_stats[['NET_RATING', 'AST_PCT', 'AST_TO',
       'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT',
       'TS_PCT', 'PACE', 'PIE']]

In [17]:
reg_adv = pd.concat([regular_features,advs_features],axis = 1)

In [18]:
reg_adv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 33 columns):
FGM           652 non-null float64
FGA           652 non-null float64
FG_PCT        652 non-null float64
FG3M          652 non-null float64
FG3A          652 non-null float64
FG3_PCT       652 non-null float64
FTM           652 non-null float64
FTA           652 non-null float64
FT_PCT        652 non-null float64
OREB          652 non-null float64
DREB          652 non-null float64
REB           652 non-null float64
AST           652 non-null float64
TOV           652 non-null float64
STL           652 non-null float64
BLK           652 non-null float64
BLKA          652 non-null float64
PF            652 non-null float64
PFD           652 non-null float64
PTS           652 non-null float64
PLUS_MINUS    652 non-null float64
NET_RATING    652 non-null float64
AST_PCT       652 non-null float64
AST_TO        652 non-null float64
AST_RATIO     652 non-null float64
OREB_PCT      652 no

In [69]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(advs_features, advs_stats['PLAYOFFS'], test_size=0.30, random_state=69)

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()

In [19]:
scaler.fit(reg_adv)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [20]:
scaled_features = scaler.fit_transform(reg_adv)

In [21]:
df_advs_feat = pd.DataFrame(scaled_features,columns = reg_adv.columns)
df_advs_feat.head()

Unnamed: 0,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,...,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,EFG_PCT,TS_PCT,PACE,PIE
0,0.72266,1.142183,-0.213395,2.198562,2.225214,0.58999,-0.958248,-1.406183,1.411584,-1.467248,...,0.159141,0.787408,-1.676275,0.97651,-0.997667,0.166296,1.006109,0.875533,1.846938,-0.816897
1,0.67186,0.897386,-0.029031,1.159376,1.433958,-0.588111,-0.134139,0.199275,-0.86826,-0.784417,...,0.051506,0.534963,-1.092083,1.152907,0.195143,0.243303,0.571482,0.358077,1.64654,0.167763
2,0.925862,0.897386,0.401152,1.631733,1.84757,-0.163994,-1.421809,-1.673759,0.975732,-2.150078,...,1.181674,1.881339,-2.475697,0.623718,-1.660339,-0.295743,1.093034,0.875533,1.511898,0.786693
3,0.67186,0.071196,0.892789,1.678969,1.451942,1.249726,-0.958248,-1.215057,0.808096,-1.876946,...,1.71985,2.38623,-1.768516,0.200367,-1.196468,-0.526762,1.571124,1.392988,0.525568,1.124291
4,0.265456,0.040596,0.401152,1.395555,1.380009,0.448618,-0.700714,-1.023931,0.908678,-1.876946,...,0.751134,1.79719,-1.891504,0.553159,-1.39527,0.166296,1.049572,0.969616,0.719703,0.055231


In [22]:
X = df_advs_feat

In [23]:
y = advs_stats['PLAYOFFS']

In [24]:
X = X.as_matrix()
y = y.as_matrix()

In [34]:
import tensorflow as tf


In [25]:
from sklearn.cross_validation import train_test_split



In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [29]:
import tensorflow.contrib.learn as learn

In [35]:
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_train)



In [36]:
classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes = 2,feature_columns= feature_columns)#,feature_columns=feature_columns)
classifier.fit(X_train, y_train, steps=200, batch_size=32)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 0, '_keep_checkpoint_max': 5, '_model_dir': '/tmp/tmprrfllij_', '_save_checkpoints_secs': 600, '_evaluation_master': '', '_task_type': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_save_summary_steps': 100, '_is_chief': True, '_num_ps_replicas': 0, '_session_config': None, '_task_id': 0, '_master': '', '_tf_random_seed': None, '_environment': 'local', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9f89f530f0>, '_save_checkpoints_steps': None}
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled

DNNClassifier(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._BinaryLogisticHead object at 0x7f9f89f8a470>, 'feature_columns': (_RealValuedColumn(column_name='', dimension=33, default_value=None, dtype=tf.float64, normalizer=None),), 'gradient_clip_norm': None, 'optimizer': None, 'activation_fn': <function relu at 0x7f9f917ee158>, 'input_layer_min_slice_size': None, 'embedding_lr_multipliers': None, 'hidden_units': [10, 20, 10], 'dropout': None})

In [40]:
pred = classifier.predict(X_test,as_iterable=False)

Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
Instructions for updating:
Please switch to predict_classes, or set `outputs` argument.
Instructions for updating:
The default behavior of predict() is changing. The default value for
as_iterable will change to True, and then the flag will be removed
altogether. The behavior of this flag is described below.
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available

In [38]:
from sklearn.metrics import classification_report,confusion_matrix

In [41]:
print(classification_report(y_test,pred))

             precision    recall  f1-score   support

          0       0.87      0.90      0.89        94
          1       0.91      0.87      0.89       102

avg / total       0.89      0.89      0.89       196



In [63]:
from sklearn.ensemble import RandomForestClassifier

In [64]:
rfc = RandomForestClassifier(n_estimators = 600)

In [70]:
rfc.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=600, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [71]:
rfc_pred = rfc.predict(X_test)

In [72]:
#rfc with advanced stats
print(classification_report(y_test,rfc_pred))

             precision    recall  f1-score   support

          0       0.87      0.89      0.88        87
          1       0.91      0.89      0.90       109

avg / total       0.89      0.89      0.89       196



In [68]:
#rfc with regular stats
print(classification_report(y_test,rfc_pred))

             precision    recall  f1-score   support

          0       0.85      0.89      0.87        87
          1       0.90      0.87      0.89       109

avg / total       0.88      0.88      0.88       196



In [53]:
from sklearn.tree import DecisionTreeClassifier

In [59]:
dtree = DecisionTreeClassifier()

In [60]:
dtree.fit(X_train,y_train)


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')

In [61]:
dtree_pred = dtree.predict(X_test)

In [57]:
#dtree with advanced stats
print(classification_report(y_test,dtree_pred))

             precision    recall  f1-score   support

          0       0.82      0.89      0.85        87
          1       0.90      0.84      0.87       109

avg / total       0.87      0.86      0.86       196



In [62]:
#dtree with reuglar stats
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.84      0.90      0.87        87
          1       0.91      0.86      0.89       109

avg / total       0.88      0.88      0.88       196



In [40]:
from sklearn.svm import SVC

In [47]:
svc_model = SVC()

In [48]:
svc_model.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [49]:
predictions = svc_model.predict(X_test)

In [51]:
#classification report for support vector machine

In [50]:
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.84      0.90      0.87        87
          1       0.91      0.86      0.89       109

avg / total       0.88      0.88      0.88       196



In [11]:
from sklearn.linear_model import LogisticRegression
regular_model = LogisticRegression()
regular_model.fit(X_train, y_train)
predictions = regular_model.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.86      0.90      0.88        87
          1       0.91      0.88      0.90       109

avg / total       0.89      0.89      0.89       196



In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(advs_features, advs_stats['PLAYOFFS'], test_size=0.30, random_state=300)

In [13]:
adv_model = LogisticRegression()
adv_model.fit(X_train, y_train)
predictions = adv_model.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))

             precision    recall  f1-score   support

          0       0.87      0.95      0.91        96
          1       0.95      0.86      0.90       100

avg / total       0.91      0.90      0.90       196



In [14]:
test_team =  TeamYearOverYearSplits(1610612740).by_year()[['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
logmodel.predict(test_team.head(1))

NameError: name 'logmodel' is not defined

In [15]:
current_predictions_norm = {}
current_predictions_adv = {}
for index, row in team_list.iterrows():
    current = TeamYearOverYearSplits(row['TEAM_ID']).by_year()[['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS']]
    current_adv = TeamYearOverYearSplits(row['TEAM_ID'],measure_type = 'Advanced').by_year()[['NET_RATING', 'AST_PCT', 'AST_TO',
       'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT',
       'TS_PCT', 'PACE', 'PIE']]
    current_predictions_norm[row['ABBREVIATION']] = regular_model.predict(current.head(1))
    current_predictions_adv[row['ABBREVIATION']] = adv_model.predict(current_adv.head(1))

In [16]:
for norm, adv in zip(current_predictions_norm.keys(),current_predictions_adv.keys()):
    print("Normal Prediciton: "+norm+" "+str(current_predictions_norm[norm]))
    print("Advanced Prediciton: "+adv+" "+str(current_predictions_adv[adv]))
    print("------------------------------")

Normal Prediciton: ATL [0]
Advanced Prediciton: ATL [0]
------------------------------
Normal Prediciton: BOS [1]
Advanced Prediciton: BOS [1]
------------------------------
Normal Prediciton: CLE [0]
Advanced Prediciton: CLE [1]
------------------------------
Normal Prediciton: NOP [0]
Advanced Prediciton: NOP [1]
------------------------------
Normal Prediciton: CHI [0]
Advanced Prediciton: CHI [0]
------------------------------
Normal Prediciton: DAL [0]
Advanced Prediciton: DAL [0]
------------------------------
Normal Prediciton: DEN [0]
Advanced Prediciton: DEN [1]
------------------------------
Normal Prediciton: GSW [1]
Advanced Prediciton: GSW [1]
------------------------------
Normal Prediciton: HOU [1]
Advanced Prediciton: HOU [1]
------------------------------
Normal Prediciton: LAC [0]
Advanced Prediciton: LAC [1]
------------------------------
Normal Prediciton: LAL [0]
Advanced Prediciton: LAL [0]
------------------------------
Normal Prediciton: MIA [0]
Advanced Predici