In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')

In [34]:
pd.set_option('display.max_columns', None)
np.random.seed(100)

In [3]:
scores = pd.read_csv('2012-18_officialBoxScore.csv')
player_scores = pd.read_csv('2012-18_playerBoxScore.csv')
standings = pd.read_csv('2012-18_standings.csv')
team_scores = pd.read_csv('2012-18_teamBoxScore.csv')
college = pd.read_csv('college.csv')

### Predicting whether a team will make the playoffs next season based on past 3 seasons.
#### 1. Data Cleaning

In [8]:
scores_cpy = scores.copy()
# remove all the opponent stats because they are repeated later
scores_cpy = scores_cpy.iloc[:,:61]

#remove unnecessary stats
scores_cpy.drop(columns=['gmTime', 'seasTyp', 'offLNm', 'offFNm', 'teamConf', 'teamDiv', 'teamLoc', 'teamMin', 'teamDayOff'], inplace=True)

# remove duplicate rows
scores_cpy.drop_duplicates(inplace=True)

# adjust the date to be seasons
scores_cpy['gmDate'] = pd.to_datetime(scores_cpy['gmDate'])
season_range = {'201213': (scores_cpy['gmDate'] >= '2012-10-30') & (scores_cpy['gmDate'] <='2013-04-17'), \
          '201314': (scores_cpy['gmDate'] >= '2013-10-29') & (scores_cpy['gmDate'] <='2014-04-16'), \
          '201415': (scores_cpy['gmDate'] >= '2014-10-28') & (scores_cpy['gmDate'] <='2015-04-15'), \
          '201516': (scores_cpy['gmDate'] >= '2015-10-27') & (scores_cpy['gmDate'] <='2016-04-13'), \
          '201617': (scores_cpy['gmDate'] >= '2016-10-25') & (scores_cpy['gmDate'] <='2017-04-12'), \
          '201718': (scores_cpy['gmDate'] >= '2017-10-17') & (scores_cpy['gmDate'] <='2018-04-11')}

seasons_data = []
for key in season_range.keys():
    curr = scores_cpy.loc[season_range[key]]
    curr['season'] = [key]*len(curr)
    curr.drop(columns=['gmDate'], inplace=True)
    seasons_data.append(curr)
all_seasons = pd.concat(seasons_data)

# one-hot-encoding for Win/Loss to be 1/0. Note that we drop one of these columns because they are linearly dependent
win = pd.get_dummies(all_seasons['teamRslt'], prefix='Result', drop_first = True)['Result_Win']
all_seasons['Win'] = win
all_seasons.drop(columns='teamRslt', inplace=True)

# # for each season, find the team's average stats 
stats_by_season = all_seasons.groupby(['season', 'teamAbbr']).mean()

stats_by_season

Unnamed: 0_level_0,Unnamed: 1_level_0,teamPTS,teamAST,teamTO,teamSTL,teamBLK,teamPF,teamFGA,teamFGM,teamFG%,team2PA,team2PM,team2P%,team3PA,team3PM,team3P%,teamFTA,teamFTM,teamFT%,teamORB,teamDRB,teamTRB,teamPTS1,teamPTS2,teamPTS3,teamPTS4,teamPTS5,teamPTS6,teamPTS7,teamPTS8,teamTREB%,teamASST%,teamTS%,teamEFG%,teamOREB%,teamDREB%,teamTO%,teamSTL%,teamBLK%,teamBLKR,teamPPS,teamFIC,teamFIC40,teamOrtg,teamDrtg,teamEDiff,teamPlay%,teamAR,teamAST/TO,teamSTL/TO,Win
season,teamAbbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
201213,ATL,97.951220,24.475610,14.865854,8.097561,4.500000,17.963415,81.024390,37.609756,0.465584,57.841463,29.000000,0.504110,23.182927,8.609756,0.373288,19.743902,14.121951,0.708479,9.243902,31.621951,40.865854,23.841463,24.768293,24.975610,23.402439,0.548780,0.414634,0.000000,0.0,48.451795,64.917021,0.546949,0.519068,22.058495,74.082923,14.275735,8.632868,4.807234,7.865539,1.213796,75.964939,62.702237,104.790709,104.424877,0.365832,0.434633,18.867440,1.779402,58.132051,0.536585
201213,BKN,96.878049,20.341463,14.707317,7.304878,4.768293,18.292683,79.804878,35.878049,0.449987,58.341463,28.219512,0.484071,21.463415,7.658537,0.356373,23.878049,17.463415,0.732144,12.768293,30.000000,42.768293,26.158537,24.109756,22.731707,23.000000,0.658537,0.219512,0.000000,0.0,52.145094,56.829796,0.537116,0.498278,30.775823,73.941318,14.013863,8.131777,5.334624,8.313509,1.218228,71.899390,59.427166,108.316682,106.349655,1.967027,0.439562,16.164246,1.503246,53.735435,0.597561
201213,BOS,96.518519,22.753086,14.580247,8.234568,4.506173,21.197531,79.740741,37.049383,0.466365,62.580247,30.901235,0.496393,17.160494,6.148148,0.360021,20.962963,16.271605,0.776423,8.074074,31.271605,39.345679,24.481481,24.395062,23.333333,22.493827,1.296296,0.382716,0.135802,0.0,47.419702,61.464602,0.543958,0.505091,19.849091,73.645579,14.087037,8.759989,4.804304,7.320688,1.216717,70.694444,57.759542,103.199075,103.510258,-0.311183,0.430501,17.931032,1.628542,59.160886,0.506173
201213,CHA,93.426829,19.353659,14.073171,7.195122,5.841463,19.048780,81.085366,34.426829,0.425313,64.024390,28.707317,0.448746,17.060976,5.719512,0.333800,25.121951,18.853659,0.753800,11.182927,29.134146,40.317073,23.195122,23.841463,22.804878,23.000000,0.475610,0.109756,0.000000,0.0,47.837867,55.989220,0.507712,0.460761,25.550113,71.441363,13.248724,7.782578,6.336188,9.213826,1.156089,65.018293,53.846759,101.542535,111.624296,-10.081761,0.410439,15.310763,1.466465,52.215484,0.256098
201213,CHI,93.182927,23.000000,14.280488,7.170732,5.085366,19.719512,81.682927,35.682927,0.438539,66.256098,30.243902,0.458105,15.426829,5.439024,0.350826,21.195122,16.378049,0.774287,12.512195,30.658537,43.170732,23.341463,23.621951,22.695122,22.756098,0.768293,0.000000,0.000000,0.0,51.261163,64.236896,0.513213,0.471784,29.168224,73.886277,13.565863,7.961722,5.660011,7.697627,1.147188,70.594512,58.373360,103.508767,103.155329,0.353438,0.428205,17.872811,1.779382,54.711895,0.548780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201718,SA,102.731707,22.792683,13.182927,7.670732,5.597561,17.170732,85.341463,39.048780,0.457772,61.231707,30.560976,0.499205,24.109756,8.487805,0.352216,20.914634,16.146341,0.778084,10.329268,33.853659,44.182927,24.670732,25.365854,26.243902,26.280488,0.170732,0.000000,0.000000,0.0,50.954685,58.511374,0.543857,0.507907,23.491973,78.728380,12.224205,8.030850,5.873335,9.283672,1.207244,80.894817,67.230933,107.895820,104.781230,3.114589,0.443034,17.375933,1.917496,63.603366,0.573171
201718,SAC,98.829268,21.573171,13.743902,7.890244,4.121951,19.987805,86.109756,38.768293,0.450896,62.121951,29.768293,0.480474,23.987805,9.000000,0.376822,16.719512,12.292683,0.740568,9.475610,31.414634,40.890244,24.280488,25.792683,23.463415,24.780488,0.512195,0.000000,0.000000,0.0,48.352155,55.702001,0.529893,0.503449,21.451338,77.802732,12.845710,8.234426,4.326435,6.655888,1.150477,70.861280,58.779601,103.797620,111.183345,-7.385726,0.428989,16.701616,1.746960,60.881112,0.329268
201718,TOR,111.634146,24.329268,13.390244,7.646341,6.109756,21.743902,87.414634,41.256098,0.472657,54.414634,29.451220,0.544145,33.000000,11.804878,0.357084,21.829268,17.341463,0.787968,9.780488,34.207317,43.987805,28.219512,28.609756,27.865854,26.231707,0.731707,0.000000,0.000000,0.0,50.978382,58.846857,0.576418,0.540548,22.928709,77.887948,12.135126,7.795849,6.244302,11.352740,1.282055,87.146341,72.137574,113.849795,105.904841,7.944954,0.453299,17.967917,1.988518,60.913710,0.719512
201718,UTA,104.146341,22.426829,14.743902,8.646341,5.134146,19.609756,82.878049,38.280488,0.462518,53.304878,27.463415,0.518249,29.573171,10.817073,0.366495,21.536585,16.768293,0.783278,9.012195,34.231707,43.243902,25.256098,25.573171,26.158537,26.756098,0.402439,0.000000,0.000000,0.0,50.916524,58.396012,0.564794,0.527852,21.509278,79.787250,13.796079,8.993646,5.342322,9.823523,1.260179,80.256098,66.641785,108.424968,103.943649,4.481320,0.431965,17.203739,1.652233,64.520802,0.585366


In [9]:
# clean the standings table to work out play-off booleans
# first make dates in date format and then separate by seasons
standings['stDate'] = pd.to_datetime(standings['stDate'])
season_range = {'201213': (standings['stDate'] >= '2012-10-30') & (standings['stDate'] <='2013-04-17'), \
          '201314': (standings['stDate'] >= '2013-10-29') & (standings['stDate'] <='2014-04-16'), \
          '201415': (standings['stDate'] >= '2014-10-28') & (standings['stDate'] <='2015-04-15'), \
          '201516': (standings['stDate'] >= '2015-10-27') & (standings['stDate'] <='2016-04-13'), \
          '201617': (standings['stDate'] >= '2016-10-25') & (standings['stDate'] <='2017-04-12'), \
          '201718': (standings['stDate'] >= '2017-10-17') & (standings['stDate'] <='2018-04-11')}

standings_data = []
for key in season_range.keys():
    curr = standings.loc[season_range[key]]
    curr['season'] = [key]*len(curr)
    standings_data.append(curr)
all_standings = pd.concat(standings_data)

# find standings at end of the season
end_dates = ['2013-04-17', '2014-04-16', '2015-04-15', '2016-04-13', '2017-04-12', '2018-04-11']
all_standings = all_standings[all_standings['stDate'].isin(end_dates)]
all_standings.drop(columns='stDate', inplace=True)
rankings = all_standings[['teamAbbr', 'rank', 'season']]
rankings = rankings.groupby(['season', 'teamAbbr']).mean()
rankings

Unnamed: 0_level_0,Unnamed: 1_level_0,rank
season,teamAbbr,Unnamed: 2_level_1
201213,ATL,6
201213,BKN,4
201213,BOS,7
201213,CHA,14
201213,CHI,5
...,...,...
201718,SA,7
201718,SAC,12
201718,TOR,1
201718,UTA,4


In [10]:
# join the rankings and stats_by_season so we know the standings of teams at end of each season
all_data = rankings.join(stats_by_season, how='inner')

# add a boolean value of whether they make playoffs
all_data['playoffs'] = (all_data['rank'] <= 8).astype(int)
all_data.drop(columns='rank', inplace=True)
all_data

Unnamed: 0_level_0,Unnamed: 1_level_0,teamPTS,teamAST,teamTO,teamSTL,teamBLK,teamPF,teamFGA,teamFGM,teamFG%,team2PA,team2PM,team2P%,team3PA,team3PM,team3P%,teamFTA,teamFTM,teamFT%,teamORB,teamDRB,teamTRB,teamPTS1,teamPTS2,teamPTS3,teamPTS4,teamPTS5,teamPTS6,teamPTS7,teamPTS8,teamTREB%,teamASST%,teamTS%,teamEFG%,teamOREB%,teamDREB%,teamTO%,teamSTL%,teamBLK%,teamBLKR,teamPPS,teamFIC,teamFIC40,teamOrtg,teamDrtg,teamEDiff,teamPlay%,teamAR,teamAST/TO,teamSTL/TO,Win,playoffs
season,teamAbbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
201213,ATL,97.951220,24.475610,14.865854,8.097561,4.500000,17.963415,81.024390,37.609756,0.465584,57.841463,29.000000,0.504110,23.182927,8.609756,0.373288,19.743902,14.121951,0.708479,9.243902,31.621951,40.865854,23.841463,24.768293,24.975610,23.402439,0.548780,0.414634,0.000000,0.0,48.451795,64.917021,0.546949,0.519068,22.058495,74.082923,14.275735,8.632868,4.807234,7.865539,1.213796,75.964939,62.702237,104.790709,104.424877,0.365832,0.434633,18.867440,1.779402,58.132051,0.536585,1
201213,BKN,96.878049,20.341463,14.707317,7.304878,4.768293,18.292683,79.804878,35.878049,0.449987,58.341463,28.219512,0.484071,21.463415,7.658537,0.356373,23.878049,17.463415,0.732144,12.768293,30.000000,42.768293,26.158537,24.109756,22.731707,23.000000,0.658537,0.219512,0.000000,0.0,52.145094,56.829796,0.537116,0.498278,30.775823,73.941318,14.013863,8.131777,5.334624,8.313509,1.218228,71.899390,59.427166,108.316682,106.349655,1.967027,0.439562,16.164246,1.503246,53.735435,0.597561,1
201213,BOS,96.518519,22.753086,14.580247,8.234568,4.506173,21.197531,79.740741,37.049383,0.466365,62.580247,30.901235,0.496393,17.160494,6.148148,0.360021,20.962963,16.271605,0.776423,8.074074,31.271605,39.345679,24.481481,24.395062,23.333333,22.493827,1.296296,0.382716,0.135802,0.0,47.419702,61.464602,0.543958,0.505091,19.849091,73.645579,14.087037,8.759989,4.804304,7.320688,1.216717,70.694444,57.759542,103.199075,103.510258,-0.311183,0.430501,17.931032,1.628542,59.160886,0.506173,1
201213,CHA,93.426829,19.353659,14.073171,7.195122,5.841463,19.048780,81.085366,34.426829,0.425313,64.024390,28.707317,0.448746,17.060976,5.719512,0.333800,25.121951,18.853659,0.753800,11.182927,29.134146,40.317073,23.195122,23.841463,22.804878,23.000000,0.475610,0.109756,0.000000,0.0,47.837867,55.989220,0.507712,0.460761,25.550113,71.441363,13.248724,7.782578,6.336188,9.213826,1.156089,65.018293,53.846759,101.542535,111.624296,-10.081761,0.410439,15.310763,1.466465,52.215484,0.256098,0
201213,CHI,93.182927,23.000000,14.280488,7.170732,5.085366,19.719512,81.682927,35.682927,0.438539,66.256098,30.243902,0.458105,15.426829,5.439024,0.350826,21.195122,16.378049,0.774287,12.512195,30.658537,43.170732,23.341463,23.621951,22.695122,22.756098,0.768293,0.000000,0.000000,0.0,51.261163,64.236896,0.513213,0.471784,29.168224,73.886277,13.565863,7.961722,5.660011,7.697627,1.147188,70.594512,58.373360,103.508767,103.155329,0.353438,0.428205,17.872811,1.779382,54.711895,0.548780,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201718,SA,102.731707,22.792683,13.182927,7.670732,5.597561,17.170732,85.341463,39.048780,0.457772,61.231707,30.560976,0.499205,24.109756,8.487805,0.352216,20.914634,16.146341,0.778084,10.329268,33.853659,44.182927,24.670732,25.365854,26.243902,26.280488,0.170732,0.000000,0.000000,0.0,50.954685,58.511374,0.543857,0.507907,23.491973,78.728380,12.224205,8.030850,5.873335,9.283672,1.207244,80.894817,67.230933,107.895820,104.781230,3.114589,0.443034,17.375933,1.917496,63.603366,0.573171,1
201718,SAC,98.829268,21.573171,13.743902,7.890244,4.121951,19.987805,86.109756,38.768293,0.450896,62.121951,29.768293,0.480474,23.987805,9.000000,0.376822,16.719512,12.292683,0.740568,9.475610,31.414634,40.890244,24.280488,25.792683,23.463415,24.780488,0.512195,0.000000,0.000000,0.0,48.352155,55.702001,0.529893,0.503449,21.451338,77.802732,12.845710,8.234426,4.326435,6.655888,1.150477,70.861280,58.779601,103.797620,111.183345,-7.385726,0.428989,16.701616,1.746960,60.881112,0.329268,0
201718,TOR,111.634146,24.329268,13.390244,7.646341,6.109756,21.743902,87.414634,41.256098,0.472657,54.414634,29.451220,0.544145,33.000000,11.804878,0.357084,21.829268,17.341463,0.787968,9.780488,34.207317,43.987805,28.219512,28.609756,27.865854,26.231707,0.731707,0.000000,0.000000,0.0,50.978382,58.846857,0.576418,0.540548,22.928709,77.887948,12.135126,7.795849,6.244302,11.352740,1.282055,87.146341,72.137574,113.849795,105.904841,7.944954,0.453299,17.967917,1.988518,60.913710,0.719512,1
201718,UTA,104.146341,22.426829,14.743902,8.646341,5.134146,19.609756,82.878049,38.280488,0.462518,53.304878,27.463415,0.518249,29.573171,10.817073,0.366495,21.536585,16.768293,0.783278,9.012195,34.231707,43.243902,25.256098,25.573171,26.158537,26.756098,0.402439,0.000000,0.000000,0.0,50.916524,58.396012,0.564794,0.527852,21.509278,79.787250,13.796079,8.993646,5.342322,9.823523,1.260179,80.256098,66.641785,108.424968,103.943649,4.481320,0.431965,17.203739,1.652233,64.520802,0.585366,1


In [11]:
# create a new table which has the mean of past 3 seasons and whether the team makes playoffs next season. 
# Note the input into 'rolling' is the # seasons for our moving average window
mov_avg_data = all_data.groupby('teamAbbr').rolling(3).mean()

# remove the NaN rows which are because first 2 seasons for MA don't exist 
mov_avg_data = mov_avg_data.dropna()
mov_avg_data.index = mov_avg_data.index.droplevel(2)
mov_avg_data

Unnamed: 0_level_0,Unnamed: 1_level_0,teamPTS,teamAST,teamTO,teamSTL,teamBLK,teamPF,teamFGA,teamFGM,teamFG%,team2PA,team2PM,team2P%,team3PA,team3PM,team3P%,teamFTA,teamFTM,teamFT%,teamORB,teamDRB,teamTRB,teamPTS1,teamPTS2,teamPTS3,teamPTS4,teamPTS5,teamPTS6,teamPTS7,teamPTS8,teamTREB%,teamASST%,teamTS%,teamEFG%,teamOREB%,teamDREB%,teamTO%,teamSTL%,teamBLK%,teamBLKR,teamPPS,teamFIC,teamFIC40,teamOrtg,teamDrtg,teamEDiff,teamPlay%,teamAR,teamAST/TO,teamSTL/TO,Win,playoffs
teamAbbr,season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
ATL,201415,100.500000,25.036585,14.788618,8.487805,4.365854,18.321138,81.426829,37.666667,0.463769,56.349593,28.349593,0.505089,25.077236,9.317073,0.371079,20.878049,15.849593,0.755977,8.886179,31.585366,40.471545,24.491870,25.235772,25.333333,24.686992,0.500000,0.252033,0.000000,0.00000,48.188620,66.410053,0.555757,0.521281,21.356288,74.077303,14.049302,8.971111,4.634526,7.812651,1.239328,78.116870,64.687902,106.606363,104.685198,1.921165,0.431760,19.106584,1.819415,60.992121,0.577236,1.000000
ATL,201516,102.130081,25.414634,14.821138,8.833333,4.849593,18.715447,82.560976,38.008130,0.461434,55.752033,28.247967,0.508332,26.808943,9.760163,0.363832,20.955285,16.353659,0.779136,8.560976,32.313008,40.873984,24.825203,25.247967,25.808943,25.613821,0.504065,0.130081,0.000000,0.00000,47.868110,66.774324,0.557661,0.520825,20.238078,74.333095,13.901768,9.196565,5.065612,8.791237,1.242398,80.065549,66.363096,106.721125,103.683096,3.038029,0.428327,19.173161,1.838486,63.002015,0.593496,1.000000
ATL,201617,102.849593,24.995935,14.995935,8.792683,5.150407,18.365854,83.500000,38.260163,0.459267,56.605691,28.658537,0.508111,26.894309,9.601626,0.357689,22.000000,16.727642,0.760153,9.097561,33.252033,42.349593,25.166667,25.260163,26.032520,25.707317,0.512195,0.093496,0.028455,0.04878,48.696192,65.175983,0.553348,0.517164,21.160445,74.931963,13.856358,9.080198,5.323939,9.252174,1.237152,80.771341,66.895622,106.392763,103.494349,2.898414,0.428418,18.703422,1.796088,62.217076,0.613821,1.000000
ATL,201718,103.117886,24.329268,15.434959,8.361789,5.024390,18.971545,84.784553,38.296748,0.452790,56.296748,28.292683,0.505010,28.487805,10.004065,0.352122,21.670732,16.520325,0.763680,9.203252,33.581301,42.784553,25.186992,24.865854,26.142276,26.288618,0.500000,0.056911,0.028455,0.04878,48.815200,63.409703,0.548108,0.512171,21.018090,75.862508,14.075510,8.504422,5.127191,9.075237,1.221609,78.586890,65.080883,105.074085,105.971109,-0.897024,0.421070,18.086470,1.688955,56.928043,0.467480,0.666667
BKN,201415,97.808943,20.723577,14.353659,7.642276,4.235772,19.739837,80.239837,36.349593,0.453937,58.634146,28.715447,0.491897,21.605691,7.634146,0.348384,23.483740,17.475610,0.744882,10.626016,30.463415,41.089431,25.516260,24.174797,23.695122,23.508130,0.670732,0.170732,0.073171,0.00000,49.357599,56.954081,0.541335,0.502064,25.228578,73.498312,13.683384,8.304664,4.623982,7.398015,1.224635,70.674289,58.302666,106.460412,107.211906,-0.751494,0.433351,16.404249,1.577451,57.527848,0.532520,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UTA,201718,100.845528,20.508130,14.443089,7.678862,5.085366,19.544715,80.890244,37.109756,0.459954,54.434959,27.467480,0.507830,26.455285,9.642276,0.364798,22.463415,16.983740,0.758240,9.719512,33.532520,43.252033,24.666667,24.239837,25.166667,26.085366,0.642276,0.044715,0.000000,0.00000,51.567526,55.113961,0.556898,0.519803,23.337972,78.900956,13.736399,8.192801,5.458224,9.560817,1.252448,75.679878,62.644727,108.027826,104.452616,3.575209,0.433813,16.221009,1.540457,57.211409,0.565041,0.666667
WAS,201415,97.471545,22.979675,14.939024,7.589431,4.597561,20.581301,82.939024,37.504065,0.453757,64.317073,30.638211,0.478028,18.621951,6.865854,0.368063,21.215447,15.597561,0.736546,10.715447,32.626016,43.341463,25.447154,24.268293,23.617886,22.914634,0.939024,0.219512,0.065041,0.00000,50.441454,61.152800,0.529491,0.495253,24.780254,76.106415,13.925728,8.017634,4.874351,7.222767,1.180857,72.433435,59.566645,103.363520,103.585934,-0.222413,0.430875,17.582518,1.652164,54.319748,0.483740,0.666667
WAS,201516,101.089431,23.914634,14.731707,8.040650,4.378049,20.691057,84.321138,38.837398,0.462157,63.719512,31.304878,0.493264,20.601626,7.532520,0.365316,21.634146,15.882114,0.734313,10.130081,32.772358,42.902439,26.459350,25.284553,24.467480,23.776423,0.849593,0.186992,0.065041,0.00000,50.144825,61.460615,0.540036,0.507044,23.464023,77.064841,13.543301,8.331737,4.558116,6.914148,1.205014,75.701220,62.359477,105.115180,104.537658,0.577522,0.437559,17.996868,1.752106,58.422892,0.532520,0.666667
WAS,201617,103.930894,24.105691,14.548780,8.150407,4.243902,20.975610,85.191057,39.695122,0.467349,63.260163,31.719512,0.503422,21.930894,7.975610,0.364363,22.028455,16.565041,0.753103,9.947154,33.162602,43.109756,26.963415,26.268293,25.093496,24.776423,0.666667,0.158537,0.000000,0.00000,50.070125,60.702212,0.549044,0.514310,23.023773,77.033969,13.279896,8.357264,4.370491,6.772703,1.225540,78.059451,64.482032,106.862740,106.075753,0.786987,0.442778,18.001309,1.788562,59.736596,0.552846,0.666667


In [18]:
# add a column of whether the team made playoffs in the next season
# 2018-19 season playoffs data which is manually inputted from wikipedia
season_1819 = pd.DataFrame()
season_1819['teamAbbr'] = rankings.index.get_level_values('teamAbbr').unique().tolist()
season_1819['playoffs'] = [0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0]
season_1819['season'] = ['201819'] * len(season_1819)
seasons_playoff = all_data['playoffs'].reset_index()
seasons_playoff.append(season_1819)

# remove the 2012-13 season data
seasons_playoff = seasons_playoff[seasons_playoff['season'] != '201213']
seasons_playoff.columns = ['next_season', 'teamAbbr', 'next_playoffs']

In [30]:
# using a dictionary of dates, have a column of the next season
dates_dict = {'201213': '201314', \
             '201314': '201415', \
             '201415': '201516', \
             '201516': '201617', \
             '201617': '201718', \
             '201718': '201819'}
mov_avg_data_new = mov_avg_data.reset_index().copy()
mov_avg_data_new.drop(columns='index', inplace=True)
mov_avg_data_new['next_season'] = mov_avg_data_new['season'].map(dates_dict)
mov_avg_data_new= mov_avg_data_new.merge(seasons_playoff, how='inner', on=['teamAbbr', 'next_season'])
mov_avg_data_new

Unnamed: 0,teamAbbr,season,teamPTS,teamAST,teamTO,teamSTL,teamBLK,teamPF,teamFGA,teamFGM,teamFG%,team2PA,team2PM,team2P%,team3PA,team3PM,team3P%,teamFTA,teamFTM,teamFT%,teamORB,teamDRB,teamTRB,teamPTS1,teamPTS2,teamPTS3,teamPTS4,teamPTS5,teamPTS6,teamPTS7,teamPTS8,teamTREB%,teamASST%,teamTS%,teamEFG%,teamOREB%,teamDREB%,teamTO%,teamSTL%,teamBLK%,teamBLKR,teamPPS,teamFIC,teamFIC40,teamOrtg,teamDrtg,teamEDiff,teamPlay%,teamAR,teamAST/TO,teamSTL/TO,Win,playoffs,next_season,next_playoffs
0,ATL,201415,100.500000,25.036585,14.788618,8.487805,4.365854,18.321138,81.426829,37.666667,0.463769,56.349593,28.349593,0.505089,25.077236,9.317073,0.371079,20.878049,15.849593,0.755977,8.886179,31.585366,40.471545,24.491870,25.235772,25.333333,24.686992,0.500000,0.252033,0.000000,0.00000,48.188620,66.410053,0.555757,0.521281,21.356288,74.077303,14.049302,8.971111,4.634526,7.812651,1.239328,78.116870,64.687902,106.606363,104.685198,1.921165,0.431760,19.106584,1.819415,60.992121,0.577236,1.000000,201516,1
1,ATL,201516,102.130081,25.414634,14.821138,8.833333,4.849593,18.715447,82.560976,38.008130,0.461434,55.752033,28.247967,0.508332,26.808943,9.760163,0.363832,20.955285,16.353659,0.779136,8.560976,32.313008,40.873984,24.825203,25.247967,25.808943,25.613821,0.504065,0.130081,0.000000,0.00000,47.868110,66.774324,0.557661,0.520825,20.238078,74.333095,13.901768,9.196565,5.065612,8.791237,1.242398,80.065549,66.363096,106.721125,103.683096,3.038029,0.428327,19.173161,1.838486,63.002015,0.593496,1.000000,201617,1
2,ATL,201617,102.849593,24.995935,14.995935,8.792683,5.150407,18.365854,83.500000,38.260163,0.459267,56.605691,28.658537,0.508111,26.894309,9.601626,0.357689,22.000000,16.727642,0.760153,9.097561,33.252033,42.349593,25.166667,25.260163,26.032520,25.707317,0.512195,0.093496,0.028455,0.04878,48.696192,65.175983,0.553348,0.517164,21.160445,74.931963,13.856358,9.080198,5.323939,9.252174,1.237152,80.771341,66.895622,106.392763,103.494349,2.898414,0.428418,18.703422,1.796088,62.217076,0.613821,1.000000,201718,0
3,BKN,201415,97.808943,20.723577,14.353659,7.642276,4.235772,19.739837,80.239837,36.349593,0.453937,58.634146,28.715447,0.491897,21.605691,7.634146,0.348384,23.483740,17.475610,0.744882,10.626016,30.463415,41.089431,25.516260,24.174797,23.695122,23.508130,0.670732,0.170732,0.073171,0.00000,49.357599,56.954081,0.541335,0.502064,25.228578,73.498312,13.683384,8.304664,4.623982,7.398015,1.224635,70.674289,58.302666,106.460412,107.211906,-0.751494,0.433351,16.404249,1.577451,57.527848,0.532520,1.000000,201516,0
4,BKN,201516,98.398374,21.378049,14.378049,7.756098,3.991870,19.642276,81.772358,37.138211,0.455265,61.186992,29.898374,0.491169,20.585366,7.239837,0.345616,22.430894,16.882114,0.751526,9.882114,31.089431,40.971545,24.727642,24.630081,24.308943,24.024390,0.536585,0.097561,0.073171,0.00000,48.447137,57.528462,0.538409,0.500176,22.901692,74.233650,13.571777,8.245929,4.252278,6.677433,1.209679,70.783537,58.471814,104.737520,108.744433,-4.006913,0.430626,16.698661,1.627219,58.415744,0.418699,0.666667,201617,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,UTA,201516,95.943089,19.719512,14.955285,7.418699,5.182927,20.069106,80.231707,35.804878,0.447391,58.638211,28.296748,0.484504,21.593496,7.508130,0.348175,22.833333,16.825203,0.738556,11.264228,31.540650,42.804878,23.195122,22.991870,24.784553,24.333333,0.524390,0.113821,0.000000,0.00000,51.337535,55.250324,0.532833,0.494565,26.557861,76.313117,14.215226,8.068525,5.667518,9.065009,1.202093,69.457825,57.496533,104.907766,106.760924,-1.853158,0.427062,15.722502,1.416904,52.355134,0.418699,0.000000,201617,1
86,UTA,201617,97.841463,19.666667,14.634146,7.333333,5.361789,19.443089,79.654472,36.138211,0.454952,55.817073,27.621951,0.497463,23.837398,8.516260,0.357326,23.126016,17.048780,0.738105,10.727642,32.760163,43.487805,23.934959,23.199187,24.898374,25.223577,0.540650,0.044715,0.000000,0.00000,52.200520,54.522209,0.546316,0.508869,25.774358,77.776685,14.013526,7.974394,5.874563,9.804637,1.235097,72.732215,60.241610,106.957484,104.790238,2.167246,0.432887,15.791681,1.447084,52.973161,0.524390,0.333333,201718,1
87,WAS,201415,97.471545,22.979675,14.939024,7.589431,4.597561,20.581301,82.939024,37.504065,0.453757,64.317073,30.638211,0.478028,18.621951,6.865854,0.368063,21.215447,15.597561,0.736546,10.715447,32.626016,43.341463,25.447154,24.268293,23.617886,22.914634,0.939024,0.219512,0.065041,0.00000,50.441454,61.152800,0.529491,0.495253,24.780254,76.106415,13.925728,8.017634,4.874351,7.222767,1.180857,72.433435,59.566645,103.363520,103.585934,-0.222413,0.430875,17.582518,1.652164,54.319748,0.483740,0.666667,201516,0
88,WAS,201516,101.089431,23.914634,14.731707,8.040650,4.378049,20.691057,84.321138,38.837398,0.462157,63.719512,31.304878,0.493264,20.601626,7.532520,0.365316,21.634146,15.882114,0.734313,10.130081,32.772358,42.902439,26.459350,25.284553,24.467480,23.776423,0.849593,0.186992,0.065041,0.00000,50.144825,61.460615,0.540036,0.507044,23.464023,77.064841,13.543301,8.331737,4.558116,6.914148,1.205014,75.701220,62.359477,105.115180,104.537658,0.577522,0.437559,17.996868,1.752106,58.422892,0.532520,0.666667,201617,1


In [36]:
# Now we have all our training data and labels
training_data = mov_avg_data_new.drop(columns = ['teamAbbr', 'season', 'next_season'])

# shuffle all the training data 
training_data = training_data.sample(frac=1).reset_index(drop=True)

train_x = training_data.drop(columns = 'next_playoffs')
train_y = training_data['next_playoffs']

#### 2. Do cross-validation on logistic regression

In [44]:
model = LogisticRegression()
score = np.mean(cross_val_score(model, train_x, train_y, cv=5))
print('The cross validation accuracy is: ', score)

The cross validation accuracy is:  0.7444444444444445


TODOS:
- visualisations
- seeing if better than baseline (predicting next yr from this year if they're in playoffs)