# 2018-19 NBA Season

In [3]:
%matplotlib inline

import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

# suppressing warnings can help keep notebook looking clean

import warnings
warnings.simplefilter(action='ignore')

In [4]:
# load in first team's data (Atlanta Hawks)
# sample it for quick examination

hawks = gpd.read_file("../data/18_19/1819 - hawks.csv")
hawks.sample(10)

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Home Score,Away Score,geometry
63,19,03/03/2019 12:30,United Center,Chicago Bulls,Atlanta Hawks,118,123,
2,1,21/10/2018 15:00,Quicken Loans Arena,Cleveland Cavaliers,Atlanta Hawks,111,133,
66,20,09/03/2019 16:00,State Farm Arena,Atlanta Hawks,Brooklyn Nets,112,114,
29,9,18/12/2018 16:30,State Farm Arena,Atlanta Hawks,Washington Wizards,118,110,
79,24,05/04/2019 16:00,Amway Center,Orlando Magic,Atlanta Hawks,149,113,
21,6,28/11/2018 16:00,Spectrum Center,Charlotte Hornets,Atlanta Hawks,108,94,
68,20,13/03/2019 16:30,State Farm Arena,Atlanta Hawks,Memphis Grizzlies,132,111,
47,15,26/01/2019 19:00,Moda Center,Portland Trail Blazers,Atlanta Hawks,120,111,
53,17,07/02/2019 16:30,State Farm Arena,Atlanta Hawks,Toronto Raptors,101,119,
51,16,02/02/2019 18:00,Talking Stick Resort Arena,Phoenix Suns,Atlanta Hawks,112,118,


In [5]:
# start by separating home and away statistics
# make sure there are 41 entries each

hawks_home = hawks[hawks['Home Team'] == 'Atlanta Hawks']
print(len(hawks_home))
hawks_away = hawks[hawks['Away Team'] == 'Atlanta Hawks']
print(len(hawks_away))

41
41


In [6]:
# calculate the standard statisics for the overall baseline

hawks_avg_h = (hawks_home['Home Score'].astype(int).sum()) / 41
print(hawks_avg_h)
hawks_opp_avg_h = (hawks_home['Away Score'].astype(int).sum()) / 41
print(hawks_opp_avg_h)
hawks_avg_a = (hawks_away['Away Score'].astype(int).sum()) / 41
print(hawks_avg_a)
hawks_opp_avg_a = (hawks_away['Home Score'].astype(int).sum()) / 41
print(hawks_opp_avg_a)

115.78048780487805
119.8048780487805
110.90243902439025
118.92682926829268


In [7]:
# set variables for teams in each time zone
# double check if it adds up to 30 teams
# these variables will be identical and can be reused later for all teams

pacific = ['Golden State Warriors', 'LA Clippers', 'Los Angeles Lakers', 'Portland Trail Blazers',
           'Sacramento Kings']
mountain = ['Denver Nuggets', 'Phoenix Suns', 'Utah Jazz']
central = ['Chicago Bulls', 'Dallas Mavericks', 'Milwaukee Bucks', 'Minnesota Timberwolves', 'New Orleans Pelicans',
          'San Antonio Spurs', 'Houston Rockets', 'Memphis Grizzlies', 'Oklahoma City Thunder']
eastern = ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', 'Toronto Raptors',
           'Washington Wizards', 'Cleveland Cavaliers', 'Detroit Pistons', 'Indiana Pacers', 'Miami Heat',
          'Orlando Magic', 'Philadelphia 76ers', 'New York Knicks']

print(len(pacific) + len(mountain) + len(central) + len(eastern))

30


In [8]:
# organize the eight subcategories:
# four time zone opponents, each with both home and away
# make sure it adds up to 82 total games

hawks_home_p = hawks_home[hawks_home['Away Team'].isin(pacific)]
hawks_away_p = hawks_away[hawks_away['Home Team'].isin(pacific)]
hawks_home_m = hawks_home[hawks_home['Away Team'].isin(mountain)]
hawks_away_m = hawks_away[hawks_away['Home Team'].isin(mountain)]
hawks_home_c = hawks_home[hawks_home['Away Team'].isin(central)]
hawks_away_c = hawks_away[hawks_away['Home Team'].isin(central)]
hawks_home_e = hawks_home[hawks_home['Away Team'].isin(eastern)]
hawks_away_e = hawks_away[hawks_away['Home Team'].isin(eastern)]

print(len(hawks_home_p))
print(len(hawks_away_p))
print(len(hawks_home_m))
print(len(hawks_away_m))
print(len(hawks_home_c))
print(len(hawks_away_c))
print(len(hawks_home_e))
print(len(hawks_away_e))

5
5
3
3
11
11
22
22


In [9]:
# calculate pacific statistics

hawks_avg_h_p = (hawks_home_p['Home Score'].astype(int).sum()) / len(hawks_home_p)
print(hawks_avg_h_p)
hawks_opp_avg_h_p = (hawks_home_p['Away Score'].astype(int).sum()) / len(hawks_home_p)
print(hawks_opp_avg_h_p)
hawks_avg_a_p = (hawks_away_p['Away Score'].astype(int).sum()) / len(hawks_away_p)
print(hawks_avg_a_p)
hawks_opp_avg_a_p = (hawks_away_p['Home Score'].astype(int).sum()) / len(hawks_away_p)
print(hawks_opp_avg_a_p)

112.0
126.4
111.2
118.0


In [10]:
# calculate mountain statistics

hawks_avg_h_m = (hawks_home_m['Home Score'].astype(int).sum()) / len(hawks_home_m)
print(hawks_avg_h_m)
hawks_opp_avg_h_m = (hawks_home_m['Away Score'].astype(int).sum()) / len(hawks_home_m)
print(hawks_opp_avg_h_m)
hawks_avg_a_m = (hawks_away_m['Away Score'].astype(int).sum()) / len(hawks_away_m)
print(hawks_avg_a_m)
hawks_opp_avg_a_m = (hawks_away_m['Home Score'].astype(int).sum()) / len(hawks_away_m)
print(hawks_opp_avg_a_m)

114.33333333333333
108.0
107.66666666666667
126.0


In [11]:
# calculate central statistics

hawks_avg_h_c = (hawks_home_c['Home Score'].astype(int).sum()) / len(hawks_home_c)
print(hawks_avg_h_c)
hawks_opp_avg_h_c = (hawks_home_c['Away Score'].astype(int).sum()) / len(hawks_home_c)
print(hawks_opp_avg_h_c)
hawks_avg_a_c = (hawks_away_c['Away Score'].astype(int).sum()) / len(hawks_away_c)
print(hawks_avg_a_c)
hawks_opp_avg_a_c = (hawks_away_c['Home Score'].astype(int).sum()) / len(hawks_away_c)
print(hawks_opp_avg_a_c)

122.63636363636364
122.27272727272727
115.54545454545455
120.27272727272727


In [12]:
# calculate eastern statisics

hawks_avg_h_e = (hawks_home_e['Home Score'].astype(int).sum()) / len(hawks_home_e)
print(hawks_avg_h_e)
hawks_opp_avg_h_e = (hawks_home_e['Away Score'].astype(int).sum()) / len(hawks_home_e)
print(hawks_opp_avg_h_e)
hawks_avg_a_e = (hawks_away_e['Away Score'].astype(int).sum()) / len(hawks_away_e)
print(hawks_avg_a_e)
hawks_opp_avg_a_e = (hawks_away_e['Home Score'].astype(int).sum()) / len(hawks_away_e)
print(hawks_opp_avg_a_e)

113.4090909090909
118.68181818181819
108.95454545454545
117.5


In [13]:
# compare all home numbers for hawks

print(hawks_avg_h)
print(hawks_avg_h_p)
print(hawks_avg_h_m)
print(hawks_avg_h_c)
print(hawks_avg_h_e)

115.78048780487805
112.0
114.33333333333333
122.63636363636364
113.4090909090909


In [14]:
# compare all away numbers for hawks

print(hawks_avg_a)
print(hawks_avg_a_p)
print(hawks_avg_a_m)
print(hawks_avg_a_c)
print(hawks_avg_a_e)

110.90243902439025
111.2
107.66666666666667
115.54545454545455
108.95454545454545


In [15]:
# compare all opponent numbers for hawks at home

print(hawks_opp_avg_h)
print(hawks_opp_avg_h_p)
print(hawks_opp_avg_h_m)
print(hawks_opp_avg_h_c)
print(hawks_opp_avg_h_e)

119.8048780487805
126.4
108.0
122.27272727272727
118.68181818181819


In [16]:
# compare all opponent numbers for hawks away

print(hawks_opp_avg_a)
print(hawks_opp_avg_a_p)
print(hawks_opp_avg_a_m)
print(hawks_opp_avg_a_c)
print(hawks_opp_avg_a_e)

118.92682926829268
118.0
126.0
120.27272727272727
117.5


In [17]:
# comparing all these numbers do not seem to be providing a solid conclusion
# however, points differential can also be an interesting, and arguably better, factor to consider
# we can see here that the team does perform a lot better at home than away

hawks_diff_h = hawks_avg_h - hawks_opp_avg_h
print(hawks_diff_h)
hawks_diff_a = hawks_avg_a - hawks_opp_avg_a
print(hawks_diff_a)

-4.024390243902445
-8.024390243902431


In [18]:
# now explore the differential across every time zone

hawks_diff_h_p = hawks_avg_h_p - hawks_opp_avg_h_p
print(hawks_diff_h_p)
hawks_diff_a_p = hawks_avg_a_p - hawks_opp_avg_a_p
print(hawks_diff_a_p)
hawks_diff_h_m = hawks_avg_h_m - hawks_opp_avg_h_m
print(hawks_diff_h_m)
hawks_diff_a_m = hawks_avg_a_m - hawks_opp_avg_a_m
print(hawks_diff_a_m)
hawks_diff_h_c = hawks_avg_h_c - hawks_opp_avg_h_c
print(hawks_diff_h_c)
hawks_diff_a_c = hawks_avg_a_c - hawks_opp_avg_a_c
print(hawks_diff_a_c)
hawks_diff_h_e = hawks_avg_h_e - hawks_opp_avg_h_e
print(hawks_diff_h_e)
hawks_diff_a_e = hawks_avg_a_e - hawks_opp_avg_a_e
print(hawks_diff_a_e)

-14.400000000000006
-6.799999999999997
6.333333333333329
-18.33333333333333
0.36363636363637397
-4.7272727272727195
-5.2727272727272805
-8.545454545454547


In [19]:
# assemble all the team data in a list to append to

data_1819 = [['Atlanta Hawks',
             hawks_avg_h, hawks_avg_h_p, hawks_avg_h_m, hawks_avg_h_c, hawks_avg_h_e,
             hawks_avg_a, hawks_avg_a_p, hawks_avg_a_m, hawks_avg_a_c, hawks_avg_a_e,
             hawks_opp_avg_h, hawks_opp_avg_h_p, hawks_opp_avg_h_m, hawks_opp_avg_h_c, hawks_opp_avg_h_e,
             hawks_opp_avg_a, hawks_opp_avg_a_p, hawks_opp_avg_a_m, hawks_opp_avg_a_c, hawks_opp_avg_a_e,
             hawks_diff_h, hawks_diff_h_p, hawks_diff_h_m, hawks_diff_h_c, hawks_diff_h_e,
             hawks_diff_a, hawks_diff_a_p, hawks_diff_a_m, hawks_diff_a_c, hawks_diff_a_e]]

In [20]:
# double check the output
# ready to append the rest of the league!

data_1819

[['Atlanta Hawks',
  115.78048780487805,
  112.0,
  114.33333333333333,
  122.63636363636364,
  113.4090909090909,
  110.90243902439025,
  111.2,
  107.66666666666667,
  115.54545454545455,
  108.95454545454545,
  119.8048780487805,
  126.4,
  108.0,
  122.27272727272727,
  118.68181818181819,
  118.92682926829268,
  118.0,
  126.0,
  120.27272727272727,
  117.5,
  -4.024390243902445,
  -14.400000000000006,
  6.333333333333329,
  0.36363636363637397,
  -5.2727272727272805,
  -8.024390243902431,
  -6.799999999999997,
  -18.33333333333333,
  -4.7272727272727195,
  -8.545454545454547]]

In [21]:
# in order to iterate through all the teams
# create a list of tuples
# make sure there are 29

teams = [('Boston Celtics', 'celtics'),
        ('Brooklyn Nets', 'nets'),
        ('Charlotte Hornets', 'hornets'),
        ('Cleveland Cavaliers', 'cavaliers'),
        ('Detroit Pistons', 'pistons'),
        ('Indiana Pacers', 'pacers'),
        ('Miami Heat', 'heat'),
        ('New York Knicks', 'knicks'),
        ('Orlando Magic', 'magic'),
        ('Philadelphia 76ers', '76ers'),
        ('Toronto Raptors', 'raptors'),
        ('Washington Wizards', 'wizards'),
        ('Chicago Bulls', 'bulls'),
        ('Dallas Mavericks', 'mavericks'),
        ('Houston Rockets', 'rockets'),
        ('Memphis Grizzlies', 'grizzlies'),
        ('Milwaukee Bucks', 'bucks'),
        ('Minnesota Timberwolves', 'timberwolves'),
        ('New Orleans Pelicans', 'pelicans'),
        ('Oklahoma City Thunder', 'thunder'),
        ('San Antonio Spurs', 'spurs'),
        ('Denver Nuggets', 'nuggets'),
        ('Phoenix Suns', 'suns'),
        ('Utah Jazz', 'jazz'),
        ('Golden State Warriors', 'warriors'),
        ('LA Clippers', 'clippers'),
        ('Los Angeles Lakers', 'lakers'),
        ('Portland Trail Blazers', 'blazers'),
        ('Sacramento Kings', 'kings')]

len(teams)

29

In [22]:
# for loop through the list using the same calculations

for i in range(len(teams)):
    
    # read through every teams' csv
    
    teamname = teams[i][1]
    filepath = '../data/18_19/1819 - ' + teamname + '.csv'
    file = gpd.read_file(filepath)
    
    # baseline
    
    home = file[file['Home Team'] == teams[i][0]]
    away = file[file['Away Team'] == teams[i][0]]
    avg_h = (home['Home Score'].astype(int).sum()) / 41
    opp_avg_h = (home['Away Score'].astype(int).sum()) / 41
    avg_a = (away['Away Score'].astype(int).sum()) / 41
    opp_avg_a = (away['Home Score'].astype(int).sum()) / 41
    
    # recall the same pacific, mountain, central, eastern variables
    
    home_p = home[home['Away Team'].isin(pacific)]
    away_p = away[away['Home Team'].isin(pacific)]
    home_m = home[home['Away Team'].isin(mountain)]
    away_m = away[away['Home Team'].isin(mountain)]
    home_c = home[home['Away Team'].isin(central)]
    away_c = away[away['Home Team'].isin(central)]
    home_e = home[home['Away Team'].isin(eastern)]
    away_e = away[away['Home Team'].isin(eastern)]
    
    # pacific
    
    avg_h_p = (home_p['Home Score'].astype(int).sum()) / len(home_p)
    opp_avg_h_p = (home_p['Away Score'].astype(int).sum()) / len(home_p)
    avg_a_p = (away_p['Away Score'].astype(int).sum()) / len(away_p)
    opp_avg_a_p = (away_p['Home Score'].astype(int).sum()) / len(away_p)
    
    # mountain
    
    avg_h_m = (home_m['Home Score'].astype(int).sum()) / len(home_m)
    opp_avg_h_m = (home_m['Away Score'].astype(int).sum()) / len(home_m)
    avg_a_m = (away_m['Away Score'].astype(int).sum()) / len(away_m)
    opp_avg_a_m = (away_m['Home Score'].astype(int).sum()) / len(away_m)
    
    # central
    
    avg_h_c = (home_c['Home Score'].astype(int).sum()) / len(home_c)
    opp_avg_h_c = (home_c['Away Score'].astype(int).sum()) / len(home_c)
    avg_a_c = (away_c['Away Score'].astype(int).sum()) / len(away_c)
    opp_avg_a_c = (away_c['Home Score'].astype(int).sum()) / len(away_c)
    
    # eastern
    
    avg_h_e = (home_e['Home Score'].astype(int).sum()) / len(home_e)
    opp_avg_h_e = (home_e['Away Score'].astype(int).sum()) / len(home_e)
    avg_a_e = (away_e['Away Score'].astype(int).sum()) / len(away_e)
    opp_avg_a_e = (away_e['Home Score'].astype(int).sum()) / len(away_e)
    
    # differential
    
    diff_h = avg_h - opp_avg_h
    diff_a = avg_a - opp_avg_a
    diff_h_p = avg_h_p - opp_avg_h_p
    diff_a_p = avg_a_p - opp_avg_a_p
    diff_h_m = avg_h_m - opp_avg_h_m
    diff_a_m = avg_a_m - opp_avg_a_m
    diff_h_c = avg_h_c - opp_avg_h_c
    diff_a_c = avg_a_c - opp_avg_a_c
    diff_h_e = avg_h_e - opp_avg_h_e
    diff_a_e = avg_a_e - opp_avg_a_e
    
    # compile all information into team_data variable
    
    team_data = [teams[i][0],
             avg_h, avg_h_p, avg_h_m, avg_h_c, avg_h_e,
             avg_a, avg_a_p, avg_a_m, avg_a_c, avg_a_e,
             opp_avg_h, opp_avg_h_p, opp_avg_h_m, opp_avg_h_c, opp_avg_h_e,
             opp_avg_a, opp_avg_a_p, opp_avg_a_m, opp_avg_a_c, opp_avg_a_e,
             diff_h, diff_h_p, diff_h_m, diff_h_c, diff_h_e,
             diff_a, diff_a_p, diff_a_m, diff_a_c, diff_a_e]
    
    # append team_data into data_1819
    
    data_1819.append(team_data)

In [23]:
# confirm for loop works as intended

data_1819

[['Atlanta Hawks',
  115.78048780487805,
  112.0,
  114.33333333333333,
  122.63636363636364,
  113.4090909090909,
  110.90243902439025,
  111.2,
  107.66666666666667,
  115.54545454545455,
  108.95454545454545,
  119.8048780487805,
  126.4,
  108.0,
  122.27272727272727,
  118.68181818181819,
  118.92682926829268,
  118.0,
  126.0,
  120.27272727272727,
  117.5,
  -4.024390243902445,
  -14.400000000000006,
  6.333333333333329,
  0.36363636363637397,
  -5.2727272727272805,
  -8.024390243902431,
  -6.799999999999997,
  -18.33333333333333,
  -4.7272727272727195,
  -8.545454545454547],
 ['Boston Celtics',
  114.09756097560975,
  113.8,
  98.0,
  113.3,
  116.6086956521739,
  110.6829268292683,
  113.6,
  112.66666666666667,
  112.9,
  108.82608695652173,
  107.3170731707317,
  116.8,
  107.66666666666667,
  108.5,
  104.69565217391305,
  108.58536585365853,
  110.2,
  115.66666666666667,
  107.5,
  107.78260869565217,
  6.7804878048780495,
  -3.0,
  -9.666666666666671,
  4.799999999999997

In [24]:
# create dataframe from list

df_1819 = pd.DataFrame(data_1819, columns = ['TEAM',
                                              'AVG H', 'AVG H P', 'AVG H M', 'AVG H C', 'AVG H E',
                                              'AVG A', 'AVG A P', 'AVG A M', 'AVG A C', 'AVG A E',
                                              'OPP AVG H', 'OPP AVG H P', 'OPP AVG H M', 'OPP AVG H C', 'OPP AVG H E',
                                              'OPP AVG A', 'OPP AVG A P', 'OPP AVG A M', 'OPP AVG A C', 'OPP AVG A E',
                                              'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
                                              'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E'])

In [23]:
# confirm dataframe

df_1819

Unnamed: 0,TEAM,AVG H,AVG H P,AVG H M,AVG H C,AVG H E,AVG A,AVG A P,AVG A M,AVG A C,...,DIFF H,DIFF H P,DIFF H M,DIFF H C,DIFF H E,DIFF A,DIFF A P,DIFF A M,DIFF A C,DIFF A E
0,Atlanta Hawks,115.780488,112.0,114.333333,122.636364,113.409091,110.902439,111.2,107.666667,115.545455,...,-4.02439,-14.4,6.333333,0.363636,-5.272727,-8.02439,-6.8,-18.333333,-4.727273,-8.545455
1,Boston Celtics,114.097561,113.8,98.0,113.3,116.608696,110.682927,113.6,112.666667,112.9,...,6.780488,-3.0,-9.666667,4.8,11.913043,2.097561,3.4,-3.0,5.4,1.043478
2,Brooklyn Nets,113.829268,114.0,112.333333,113.363636,114.227273,110.658537,118.8,104.666667,115.090909,...,2.536585,1.2,1.0,-0.818182,4.727273,-2.682927,-3.2,2.666667,-0.636364,-4.318182
3,Charlotte Hornets,113.146341,109.4,119.666667,112.090909,113.636364,108.341463,101.4,110.333333,107.1,...,3.682927,-6.4,6.0,0.454545,7.272727,-5.878049,-23.6,-5.333333,-3.6,-3.086957
4,Cleveland Cavaliers,102.731707,107.6,97.666667,98.636364,104.363636,103.780488,107.8,104.666667,105.0,...,-9.780488,-12.4,-10.666667,-10.454545,-8.727273,-11.878049,-10.8,-15.666667,-11.272727,-11.909091
5,Detroit Pistons,108.658537,104.6,117.333333,107.727273,108.863636,105.439024,107.4,101.333333,106.909091,...,2.560976,4.0,10.666667,1.363636,1.727273,-3.04878,-6.0,3.666667,-4.636364,-2.5
6,Indiana Pacers,108.682927,110.4,125.333333,107.545455,106.590909,107.341463,100.4,110.0,104.272727,...,7.634146,6.8,32.333333,6.454545,5.045455,1.463415,-9.2,12.0,-4.636364,5.5
7,Miami Heat,106.02439,111.0,107.333333,100.6,107.130435,105.390244,111.6,95.333333,105.454545,...,0.243902,-5.6,-1.666667,-2.5,2.956522,-0.707317,4.4,-8.666667,-1.545455,-0.363636
8,New York Knicks,105.902439,109.0,106.333333,109.090909,103.545455,103.243902,106.0,100.333333,102.4,...,-7.853659,-10.0,-19.0,-5.181818,-7.181818,-10.560976,-11.6,-16.666667,-9.1,-10.173913
9,Orlando Magic,109.341463,108.2,111.333333,111.2,108.521739,105.292683,104.2,93.0,101.545455,...,2.658537,-5.4,-0.333333,-0.4,6.130435,-1.243902,-6.2,-8.0,-1.090909,0.727273


In [24]:
# apply uppercase to team names to join with stadiums data later

df_1819['TEAM'] = df_1819['TEAM'].str.upper()
df_1819

Unnamed: 0,TEAM,AVG H,AVG H P,AVG H M,AVG H C,AVG H E,AVG A,AVG A P,AVG A M,AVG A C,...,DIFF H,DIFF H P,DIFF H M,DIFF H C,DIFF H E,DIFF A,DIFF A P,DIFF A M,DIFF A C,DIFF A E
0,ATLANTA HAWKS,115.780488,112.0,114.333333,122.636364,113.409091,110.902439,111.2,107.666667,115.545455,...,-4.02439,-14.4,6.333333,0.363636,-5.272727,-8.02439,-6.8,-18.333333,-4.727273,-8.545455
1,BOSTON CELTICS,114.097561,113.8,98.0,113.3,116.608696,110.682927,113.6,112.666667,112.9,...,6.780488,-3.0,-9.666667,4.8,11.913043,2.097561,3.4,-3.0,5.4,1.043478
2,BROOKLYN NETS,113.829268,114.0,112.333333,113.363636,114.227273,110.658537,118.8,104.666667,115.090909,...,2.536585,1.2,1.0,-0.818182,4.727273,-2.682927,-3.2,2.666667,-0.636364,-4.318182
3,CHARLOTTE HORNETS,113.146341,109.4,119.666667,112.090909,113.636364,108.341463,101.4,110.333333,107.1,...,3.682927,-6.4,6.0,0.454545,7.272727,-5.878049,-23.6,-5.333333,-3.6,-3.086957
4,CLEVELAND CAVALIERS,102.731707,107.6,97.666667,98.636364,104.363636,103.780488,107.8,104.666667,105.0,...,-9.780488,-12.4,-10.666667,-10.454545,-8.727273,-11.878049,-10.8,-15.666667,-11.272727,-11.909091
5,DETROIT PISTONS,108.658537,104.6,117.333333,107.727273,108.863636,105.439024,107.4,101.333333,106.909091,...,2.560976,4.0,10.666667,1.363636,1.727273,-3.04878,-6.0,3.666667,-4.636364,-2.5
6,INDIANA PACERS,108.682927,110.4,125.333333,107.545455,106.590909,107.341463,100.4,110.0,104.272727,...,7.634146,6.8,32.333333,6.454545,5.045455,1.463415,-9.2,12.0,-4.636364,5.5
7,MIAMI HEAT,106.02439,111.0,107.333333,100.6,107.130435,105.390244,111.6,95.333333,105.454545,...,0.243902,-5.6,-1.666667,-2.5,2.956522,-0.707317,4.4,-8.666667,-1.545455,-0.363636
8,NEW YORK KNICKS,105.902439,109.0,106.333333,109.090909,103.545455,103.243902,106.0,100.333333,102.4,...,-7.853659,-10.0,-19.0,-5.181818,-7.181818,-10.560976,-11.6,-16.666667,-9.1,-10.173913
9,ORLANDO MAGIC,109.341463,108.2,111.333333,111.2,108.521739,105.292683,104.2,93.0,101.545455,...,2.658537,-5.4,-0.333333,-0.4,6.130435,-1.243902,-6.2,-8.0,-1.090909,0.727273


In [30]:
# general overview of teamm differentials
# 29 of 30 teams (Chicago Bulls being the exception) perform better at home

df_diff = df_1819[['TEAM', 'DIFF H', 'DIFF A']]
comparison = df_diff['DIFF H'] > df_diff['DIFF A']
df_diff['H > A?'] = comparison
df_diff

Unnamed: 0,TEAM,DIFF H,DIFF A,H > A?
0,ATLANTA HAWKS,-4.02439,-8.02439,True
1,BOSTON CELTICS,6.780488,2.097561,True
2,BROOKLYN NETS,2.536585,-2.682927,True
3,CHARLOTTE HORNETS,3.682927,-5.878049,True
4,CLEVELAND CAVALIERS,-9.780488,-11.878049,True
5,DETROIT PISTONS,2.560976,-3.04878,True
6,INDIANA PACERS,7.634146,1.463415,True
7,MIAMI HEAT,0.243902,-0.707317,True
8,NEW YORK KNICKS,-7.853659,-10.560976,True
9,ORLANDO MAGIC,2.658537,-1.243902,True


In [51]:
e = [x.upper() for x in eastern]
df_e = df_1819[df_1819.TEAM.isin(e)]
df_e = df_e[['TEAM',
             'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
             'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E']]
overallh = df_e['DIFF H'].sum() / len(e)
overallhp = df_e['DIFF H P'].sum() / len(e)
overallhm = df_e['DIFF H M'].sum() / len(e)
overallhc = df_e['DIFF H C'].sum() / len(e)
overallhe = df_e['DIFF H E'].sum() / len(e)
overalla = df_e['DIFF A'].sum() / len(e)
overallap = df_e['DIFF A P'].sum() / len(e)
overallam = df_e['DIFF A M'].sum() / len(e)
overallac = df_e['DIFF A C'].sum() / len(e)
overallae = df_e['DIFF A E'].sum() / len(e)
print(overallh)
print(overallhp)
print(overallhm)
print(overallhc)
print(overallhe)
print(overalla)
print(overallap)
print(overallam)
print(overallac)
print(overallae)
print(df_e)

1.6772983114446545
-2.138461538461536
1.1538461538461517
0.3867132867132881
3.207661903314076
-3.544090056285174
-5.723076923076925
-3.8205128205128207
-3.0622377622377623
-3.241714806932197
                   TEAM    DIFF H  DIFF H P   DIFF H M   DIFF H C   DIFF H E  \
0         ATLANTA HAWKS -4.024390     -14.4   6.333333   0.363636  -5.272727   
1        BOSTON CELTICS  6.780488      -3.0  -9.666667   4.800000  11.913043   
2         BROOKLYN NETS  2.536585       1.2   1.000000  -0.818182   4.727273   
3     CHARLOTTE HORNETS  3.682927      -6.4   6.000000   0.454545   7.272727   
4   CLEVELAND CAVALIERS -9.780488     -12.4 -10.666667 -10.454545  -8.727273   
5       DETROIT PISTONS  2.560976       4.0  10.666667   1.363636   1.727273   
6        INDIANA PACERS  7.634146       6.8  32.333333   6.454545   5.045455   
7            MIAMI HEAT  0.243902      -5.6  -1.666667  -2.500000   2.956522   
8       NEW YORK KNICKS -7.853659     -10.0 -19.000000  -5.181818  -7.181818   
9        

In [52]:
c = [x.upper() for x in central]
df_c = df_1819[df_1819.TEAM.isin(c)]
df_c = df_c[['TEAM',
             'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
             'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E']]
overallh = df_c['DIFF H'].sum() / len(c)
overallhp = df_c['DIFF H P'].sum() / len(c)
overallhm = df_c['DIFF H M'].sum() / len(c)
overallhc = df_c['DIFF H C'].sum() / len(c)
overallhe = df_c['DIFF H E'].sum() / len(c)
overalla = df_c['DIFF A'].sum() / len(c)
overallap = df_c['DIFF A P'].sum() / len(c)
overallam = df_c['DIFF A M'].sum() / len(c)
overallac = df_c['DIFF A C'].sum() / len(c)
overallae = df_c['DIFF A E'].sum() / len(c)
print(overallh)
print(overallhp)
print(overallhm)
print(overallhc)
print(overallhe)
print(overalla)
print(overallap)
print(overallam)
print(overallac)
print(overallae)
print(df_c)

2.875338753387535
2.2959876543209896
3.7074074074074113
2.2313118979785633
3.254273504273504
-2.0731707317073136
-4.365432098765431
-3.4185185185185167
-2.2204585537918877
-0.5195868945868953
                      TEAM     DIFF H   DIFF H P   DIFF H M  DIFF H C  \
13           CHICAGO BULLS  -9.804878 -17.000000  -8.000000 -6.000000   
14        DALLAS MAVERICKS   2.341463  -0.111111  10.400000 -2.857143   
15         HOUSTON ROCKETS   7.121951   4.222222  12.500000  2.923077   
16       MEMPHIS GRIZZLIES   0.439024   0.900000   4.400000 -0.923077   
17         MILWAUKEE BUCKS  12.073171  18.400000   5.333333  6.444444   
18  MINNESOTA TIMBERWOLVES   2.878049   5.600000  -2.400000  3.384615   
19    NEW ORLEANS PELICANS  -0.878049  -1.125000  -1.500000  2.642857   
20   OKLAHOMA CITY THUNDER   4.853659   1.333333   2.833333  8.538462   
21       SAN ANTONIO SPURS   6.853659   8.444444   9.800000  5.928571   

     DIFF H E    DIFF A   DIFF A P   DIFF A M   DIFF A C  DIFF A E  
13  -9.9

In [53]:
m = [x.upper() for x in mountain]
df_m = df_1819[df_1819.TEAM.isin(m)]
df_m = df_m[['TEAM',
             'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
             'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E']]
overallh = df_m['DIFF H'].sum() / len(m)
overallhp = df_m['DIFF H P'].sum() / len(m)
overallhm = df_m['DIFF H M'].sum() / len(m)
overallhc = df_m['DIFF H C'].sum() / len(m)
overallhe = df_m['DIFF H E'].sum() / len(m)
overalla = df_m['DIFF A'].sum() / len(m)
overallap = df_m['DIFF A P'].sum() / len(m)
overallam = df_m['DIFF A M'].sum() / len(m)
overallac = df_m['DIFF A C'].sum() / len(m)
overallae = df_m['DIFF A E'].sum() / len(m)
print(overallh)
print(overallhp)
print(overallhm)
print(overallhc)
print(overallhe)
print(overalla)
print(overallap)
print(overallam)
print(overallac)
print(overallae)
print(df_m)

4.024390243902441
2.481481481481476
9.75
3.599999999999999
3.8205128205128176
-4.113821138211378
-5.303703703703704
-9.75
-4.38888888888889
-1.1538461538461509
              TEAM     DIFF H  DIFF H P  DIFF H M  DIFF H C   DIFF H E  \
22  DENVER NUGGETS  10.512195  8.777778     18.25       8.0  12.230769   
23    PHOENIX SUNS  -6.292683 -9.222222     -8.25      -3.2  -7.230769   
24       UTAH JAZZ   7.853659  7.888889     19.25       6.0   6.461538   

       DIFF A   DIFF A P  DIFF A M   DIFF A C  DIFF A E  
22  -2.609756  -4.000000     -5.25  -1.733333 -1.846154  
23 -12.390244 -14.800000    -31.50 -11.500000 -5.615385  
24   2.658537   2.888889      7.50   0.066667  4.000000  


In [54]:
p = [x.upper() for x in pacific]
df_p = df_1819[df_1819.TEAM.isin(p)]
df_p = df_p[['TEAM',
             'DIFF H', 'DIFF H P', 'DIFF H M', 'DIFF H C', 'DIFF H E',
             'DIFF A', 'DIFF A P', 'DIFF A M', 'DIFF A C', 'DIFF A E']]
overallh = df_p['DIFF H'].sum() / len(p)
overallhp = df_p['DIFF H P'].sum() / len(p)
overallhm = df_p['DIFF H M'].sum() / len(p)
overallhc = df_p['DIFF H C'].sum() / len(p)
overallhe = df_p['DIFF H E'].sum() / len(p)
overalla = df_p['DIFF A'].sum() / len(p)
overallap = df_p['DIFF A P'].sum() / len(p)
overallam = df_p['DIFF A M'].sum() / len(p)
overallac = df_p['DIFF A C'].sum() / len(p)
overallae = df_p['DIFF A E'].sum() / len(p)
print(overallh)
print(overallhp)
print(overallhm)
print(overallhc)
print(overallhe)
print(overalla)
print(overallap)
print(overallam)
print(overallac)
print(overallae)
print(df_p)

3.9121951219512168
-0.4964285714285722
5.5666666666666655
3.9838095238095237
5.723076923076926
-0.44878048780487917
0.44642857142857223
-2.9666666666666712
-2.4685714285714284
2.138461538461539
                      TEAM    DIFF H  DIFF H P   DIFF H M   DIFF H C  \
25   GOLDEN STATE WARRIORS  6.634146  5.875000  10.800000   4.666667   
26             LA CLIPPERS  3.292683 -2.875000   6.833333   2.857143   
27      LOS ANGELES LAKERS -0.170732 -4.750000   6.666667  -0.071429   
28  PORTLAND TRAIL BLAZERS  8.365854  4.142857   5.333333  10.666667   
29        SACRAMENTO KINGS  1.439024 -4.875000  -1.800000   1.800000   

    DIFF H E    DIFF A  DIFF A P   DIFF A M  DIFF A C  DIFF A E  
25  7.769231  6.292683  5.875000   9.666667  2.857143  8.692308  
26  5.923077 -1.585366  0.250000  -8.000000 -3.866667  2.384615  
27 -0.615385 -3.268293  3.625000 -11.200000 -1.533333 -6.461538  
28  9.384615  0.000000 -1.375000  -2.800000 -4.666667  7.307692  
29  6.153846 -3.682927 -6.142857  -2.500000