# 2018-19 NBA Season

In [2]:
%matplotlib inline

import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

# suppressing warnings can help keep notebook looking clean

import warnings
warnings.simplefilter(action='ignore')

In [3]:
# load in first team's data (Atlanta Hawks)
# sample it for quick examination

hawks = gpd.read_file("../data/1819/1819 - hawks.csv")
hawks.sample(10)

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Home Score,Away Score,geometry
22,7,30/11/2018 17:00,Chesapeake Energy Arena,Oklahoma City Thunder,Atlanta Hawks,124,109,
73,22,23/03/2019 16:30,State Farm Arena,Atlanta Hawks,Philadelphia 76ers,129,127,
3,1,24/10/2018 16:00,State Farm Arena,Atlanta Hawks,Dallas Mavericks,111,104,
75,23,29/03/2019 16:30,State Farm Arena,Atlanta Hawks,Portland Trail Blazers,98,118,
1,1,19/10/2018 17:00,FedEx Forum,Memphis Grizzlies,Atlanta Hawks,131,117,
69,21,16/03/2019 09:30,TD Garden,Boston Celtics,Atlanta Hawks,129,120,
8,3,03/11/2018 16:30,State Farm Arena,Atlanta Hawks,Miami Heat,123,118,
76,23,31/03/2019 09:30,State Farm Arena,Atlanta Hawks,Milwaukee Bucks,136,135,
30,10,21/12/2018 16:30,Madison Square Garden,New York Knicks,Atlanta Hawks,107,114,
70,21,17/03/2019 15:00,Amway Center,Orlando Magic,Atlanta Hawks,101,91,


In [5]:
# start by separating home and away statistics
# make sure there are 41 entries each

hawks_home = hawks[hawks['Home Team'] == 'Atlanta Hawks']
print(hawks_home.count())
hawks_away = hawks[hawks['Away Team'] == 'Atlanta Hawks']
print(hawks_away.count())

Round Number    41
Date            41
Location        41
Home Team       41
Away Team       41
Home Score      41
Away Score      41
geometry         0
dtype: int64
Round Number    41
Date            41
Location        41
Home Team       41
Away Team       41
Home Score      41
Away Score      41
geometry         0
dtype: int64


In [11]:
# calculate the standard statisics for the overall baseline

hawks_avg_h = (hawks_home['Home Score'].astype(int).sum()) / 41
print(hawks_avg_h)
hawks_opp_avg_h = (hawks_home['Away Score'].astype(int).sum()) / 41
print(hawks_opp_avg_h)
hawks_avg_a = (hawks_away['Away Score'].astype(int).sum()) / 41
print(hawks_avg_a)
hawks_opp_avg_a = (hawks_away['Home Score'].astype(int).sum()) / 41
print(hawks_opp_avg_a)

115.78048780487805
119.8048780487805
110.90243902439025
118.92682926829268


In [32]:
# set variables for teams in each time zone
# double check if it adds up to 30 teams
# these variables will be identical and can be reused later for all teams

pacific = ['Golden State Warriors', 'LA Clippers', 'Los Angeles Lakers', 'Portland Trail Blazers',
           'Sacramento Kings']
mountain = ['Denver Nuggets', 'Phoenix Suns', 'Utah Jazz']
central = ['Chicago Bulls', 'Dallas Mavericks', 'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans',
          'San Antonio Spurs', 'Houston Rockets', 'Memphis Grizzlies', 'Oklahoma City Thunder']
eastern = ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', 'Toronto Raptors',
           'Washington Wizards', 'Cleveland Cavaliers', 'Detroit Pistons', 'Indiana Pacers', 'Miami Heat',
          'Orlando Magic', 'Philadelphia 76ers', 'New York Knicks']

print(len(pacific) + len(mountain) + len(central) + len(eastern))

30


In [39]:
# organize the eight subcategories:
# four time zone opponents, each with both home and away
# make sure it adds up to 82 total games

hawks_home_p = hawks_home[hawks_home['Away Team'].isin(pacific)]
hawks_away_p = hawks_away[hawks_away['Home Team'].isin(pacific)]
hawks_home_m = hawks_home[hawks_home['Away Team'].isin(mountain)]
hawks_away_m = hawks_away[hawks_away['Home Team'].isin(mountain)]
hawks_home_c = hawks_home[hawks_home['Away Team'].isin(central)]
hawks_away_c = hawks_away[hawks_away['Home Team'].isin(central)]
hawks_home_e = hawks_home[hawks_home['Away Team'].isin(eastern)]
hawks_away_e = hawks_away[hawks_away['Home Team'].isin(eastern)]

print(hawks_home_p.count())
print(hawks_away_p.count())
print(hawks_home_m.count())
print(hawks_away_m.count())
print(hawks_home_c.count())
print(hawks_away_c.count())
print(hawks_home_e.count())
print(hawks_away_e.count())

Round Number    5
Date            5
Location        5
Home Team       5
Away Team       5
Home Score      5
Away Score      5
geometry        0
dtype: int64
Round Number    5
Date            5
Location        5
Home Team       5
Away Team       5
Home Score      5
Away Score      5
geometry        0
dtype: int64
Round Number    3
Date            3
Location        3
Home Team       3
Away Team       3
Home Score      3
Away Score      3
geometry        0
dtype: int64
Round Number    3
Date            3
Location        3
Home Team       3
Away Team       3
Home Score      3
Away Score      3
geometry        0
dtype: int64
Round Number    11
Date            11
Location        11
Home Team       11
Away Team       11
Home Score      11
Away Score      11
geometry         0
dtype: int64
Round Number    11
Date            11
Location        11
Home Team       11
Away Team       11
Home Score      11
Away Score      11
geometry         0
dtype: int64
Round Number    22
Date            22
Loca

In [47]:
# calculate pacific statistics

hawks_avg_h_p = (hawks_home_p['Home Score'].astype(int).sum()) / 5
print(hawks_avg_h_p)
hawks_opp_avg_h_p = (hawks_home_p['Away Score'].astype(int).sum()) / 5
print(hawks_opp_avg_h_p)
hawks_avg_a_p = (hawks_away_p['Away Score'].astype(int).sum()) / 5
print(hawks_avg_a_p)
hawks_opp_avg_a_p = (hawks_away_p['Home Score'].astype(int).sum()) / 5
print(hawks_opp_avg_a_p)

112.0
126.4
111.2
118.0


In [45]:
# calculate mountain statistics

hawks_avg_h_m = (hawks_home_m['Home Score'].astype(int).sum()) / 3
print(hawks_avg_h_m)
hawks_opp_avg_h_m = (hawks_home_m['Away Score'].astype(int).sum()) / 3
print(hawks_opp_avg_h_m)
hawks_avg_a_m = (hawks_away_m['Away Score'].astype(int).sum()) / 3
print(hawks_avg_a_m)
hawks_opp_avg_a_m = (hawks_away_m['Home Score'].astype(int).sum()) / 3
print(hawks_opp_avg_a_m)

114.33333333333333
108.0
107.66666666666667
126.0


In [48]:
# calculate central statistics

hawks_avg_h_c = (hawks_home_c['Home Score'].astype(int).sum()) / 11
print(hawks_avg_h_c)
hawks_opp_avg_h_c = (hawks_home_c['Away Score'].astype(int).sum()) / 11
print(hawks_opp_avg_h_c)
hawks_avg_a_c = (hawks_away_c['Away Score'].astype(int).sum()) / 11
print(hawks_avg_a_c)
hawks_opp_avg_a_c = (hawks_away_c['Home Score'].astype(int).sum()) / 11
print(hawks_opp_avg_a_c)

122.63636363636364
122.27272727272727
115.54545454545455
120.27272727272727


In [49]:
# calculate eastern statisics

hawks_avg_h_e = (hawks_home_e['Home Score'].astype(int).sum()) / 22
print(hawks_avg_h_e)
hawks_opp_avg_h_e = (hawks_home_e['Away Score'].astype(int).sum()) / 22
print(hawks_opp_avg_h_e)
hawks_avg_a_e = (hawks_away_e['Away Score'].astype(int).sum()) / 22
print(hawks_avg_a_e)
hawks_opp_avg_a_e = (hawks_away_e['Home Score'].astype(int).sum()) / 22
print(hawks_opp_avg_a_e)

113.4090909090909
118.68181818181819
108.95454545454545
117.5


In [50]:
# compare all home numbers for hawks

print(hawks_avg_h)
print(hawks_avg_h_p)
print(hawks_avg_h_m)
print(hawks_avg_h_c)
print(hawks_avg_h_e)

115.78048780487805
112.0
114.33333333333333
122.63636363636364
113.4090909090909


In [51]:
# compare all away numbers for hawks

print(hawks_avg_a)
print(hawks_avg_a_p)
print(hawks_avg_a_m)
print(hawks_avg_a_c)
print(hawks_avg_a_e)

110.90243902439025
111.2
107.66666666666667
115.54545454545455
108.95454545454545


In [52]:
# compare all opponent numbers for hawks at home

print(hawks_opp_avg_h)
print(hawks_opp_avg_h_p)
print(hawks_opp_avg_h_m)
print(hawks_opp_avg_h_c)
print(hawks_opp_avg_h_e)

119.8048780487805
126.4
108.0
122.27272727272727
118.68181818181819


In [53]:
# compare all opponent numbers for hawks away

print(hawks_opp_avg_a)
print(hawks_opp_avg_a_p)
print(hawks_opp_avg_a_m)
print(hawks_opp_avg_a_c)
print(hawks_opp_avg_a_e)

118.92682926829268
118.0
126.0
120.27272727272727
117.5


In [55]:
# comparing all these numbers do not seem to be providing a solid conclusion
# however, points differential can also be an interesting, and arguably better, factor to consider
# we can see here that the team does perform a lot better at home than away

hawks_diff_h = hawks_avg_h - hawks_opp_avg_h
print(hawks_diff_h)
hawks_diff_a = hawks_avg_a - hawks_opp_avg_a
print(hawks_diff_a)

-4.024390243902445
-8.024390243902431


In [59]:
# now explore the differential across every time zone

hawks_diff_h_p = hawks_avg_h_p - hawks_opp_avg_h_p
print(hawks_diff_h_p)
hawks_diff_a_p = hawks_avg_a_p - hawks_opp_avg_a_p
print(hawks_diff_a_p)
hawks_diff_h_m = hawks_avg_h_m - hawks_opp_avg_h_m
print(hawks_diff_h_m)
hawks_diff_a_m = hawks_avg_a_m - hawks_opp_avg_a_m
print(hawks_diff_a_m)
hawks_diff_h_c = hawks_avg_h_c - hawks_opp_avg_h_c
print(hawks_diff_h_c)
hawks_diff_a_c = hawks_avg_a_c - hawks_opp_avg_a_c
print(hawks_diff_a_c)
hawks_diff_h_e = hawks_avg_h_e - hawks_opp_avg_h_e
print(hawks_diff_h_e)
hawks_diff_a_e = hawks_avg_a_e - hawks_opp_avg_a_e
print(hawks_diff_a_e)

-14.400000000000006
-6.799999999999997
6.333333333333329
-18.33333333333333
0.36363636363637397
-4.7272727272727195
-5.2727272727272805
-8.545454545454547


In [77]:
# create a new dataframe to slowly append these data for each team
# this will end up being the final export of the notebook

hawks_data = [['ATLANTA HAWKS',
             hawks_avg_h, hawks_avg_h_p, hawks_avg_h_m, hawks_avg_h_c, hawks_avg_h_e,
             hawks_avg_a, hawks_avg_a_p, hawks_avg_a_m, hawks_avg_a_c, hawks_avg_a_e,
             hawks_opp_avg_h, hawks_opp_avg_h_p, hawks_opp_avg_h_m, hawks_opp_avg_h_c, hawks_opp_avg_h_e,
             hawks_opp_avg_a, hawks_opp_avg_a_p, hawks_opp_avg_a_m, hawks_opp_avg_a_c, hawks_opp_avg_a_e,
             hawks_diff_h, hawks_diff_h_p, hawks_diff_h_m, hawks_diff_h_c, hawks_diff_h_e,
             hawks_diff_a, hawks_diff_a_p, hawks_diff_a_m, hawks_diff_a_c, hawks_diff_a_e]]
df_1819 = pd.DataFrame(hawks_data, columns = ['TEAM',
                                              'AVG H', 'AVG H P', 'AVG H M', 'AVG H C', 'AVG H E',
                                              'AVG A', 'AVG A P', 'AVG A M', 'AVG A C', 'AVG A E',
                                              'OPP AVG H', 'OPP AVG H P', 'OPP AVG H M', 'OPP AVG H C', 'OPP AVG H E',
                                              'OPP AVG A', 'OPP AVG A P', 'OPP AVG A M', 'OPP AVG A C', 'OPP AVG A E',
                                              'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
                                              'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E'])

In [78]:
# double check the output
# ready to append the rest of the league!

df_1819

Unnamed: 0,TEAM,AVG H,AVG H P,AVG H M,AVG H C,AVG H E,AVG A,AVG A P,AVG A M,AVG A C,...,DIFF H,DIFF H P,DIFF H M,DIFF H C,DIFF H E,DIFF A,DIFF A P,DIFF A M,DIFF A C,DIFF A E
0,ATLANTA HAWKS,115.780488,112.0,114.333333,122.636364,113.409091,110.902439,111.2,107.666667,115.545455,...,-4.02439,-14.4,6.333333,0.363636,-5.272727,-8.02439,-6.8,-18.333333,-4.727273,-8.545455
