## Central Tendency in Python

### Setup

In [29]:
import pandas as pd
import requests
import numpy as np

download_url = "https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv"
target_csv_path = "nba_all_elo.csv"

response = requests.get(download_url)
response.raise_for_status()    # Check that the request was successful
with open(target_csv_path, "wb") as f:
    f.write(response.content)

nba = pd.read_csv("nba_all_elo.csv")

In [2]:
type(nba)

pandas.core.frame.DataFrame

In [3]:
#Let's Look at our Data 
nba.head()

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,pts,elo_i,elo_n,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,66,1300.0,1293.2767,40.29483,NYK,Knicks,68,1300.0,1306.7233,H,L,0.640065,
1,1,194611010TRH,NBA,1,1947,11/1/1946,1,0,NYK,Knicks,68,1300.0,1306.7233,41.70517,TRH,Huskies,66,1300.0,1293.2767,A,W,0.359935,
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,63,1300.0,1309.6521,42.012257,NYK,Knicks,47,1306.7233,1297.0712,H,W,0.631101,
3,2,194611020CHS,NBA,1,1947,11/2/1946,2,0,NYK,Knicks,47,1306.7233,1297.0712,40.692783,CHS,Stags,63,1300.0,1309.6521,A,L,0.368899,
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,33,1300.0,1279.6189,38.864048,WSC,Capitols,50,1300.0,1320.3811,H,L,0.640065,


In [4]:
nba.tail()

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,pts,elo_i,elo_n,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
126309,63155,201506110CLE,NBA,0,2015,6/11/2015,100,1,CLE,Cavaliers,82,1723.4149,1704.3949,60.309792,GSW,Warriors,103,1790.9591,1809.9791,H,L,0.546572,
126310,63156,201506140GSW,NBA,0,2015,6/14/2015,102,1,GSW,Warriors,104,1809.9791,1813.6349,68.013329,CLE,Cavaliers,91,1704.3949,1700.7391,H,W,0.765565,
126311,63156,201506140GSW,NBA,1,2015,6/14/2015,101,1,CLE,Cavaliers,91,1704.3949,1700.7391,60.010067,GSW,Warriors,104,1809.9791,1813.6349,A,L,0.234435,
126312,63157,201506170CLE,NBA,0,2015,6/16/2015,102,1,CLE,Cavaliers,97,1700.7391,1692.0859,59.290245,GSW,Warriors,105,1813.6349,1822.2881,H,L,0.48145,
126313,63157,201506170CLE,NBA,1,2015,6/16/2015,103,1,GSW,Warriors,105,1813.6349,1822.2881,68.519516,CLE,Cavaliers,97,1700.7391,1692.0859,A,W,0.51855,


In [5]:
len(nba)

126314

In [6]:
nba.shape

(126314, 23)

In [7]:
# iterating the columns
for col in nba.columns:
    print(col)

gameorder
game_id
lg_id
_iscopy
year_id
date_game
seasongame
is_playoffs
team_id
fran_id
pts
elo_i
elo_n
win_equiv
opp_id
opp_fran
opp_pts
opp_elo_i
opp_elo_n
game_location
game_result
forecast
notes


## Doing Central Tendency

In [10]:
nba.pts.mean()

102.72998242475101

In [8]:
nba.pts.median()

103.0

In [9]:
nba.pts.mode()

0    102
dtype: int64

## Your Turn - Answer the Questions Below

In [22]:
# What is the average for points scored in wins and losses?

winners = nba[nba.game_result == 'W']
losers = nba[nba.game_result == 'L']

print('The average points for winners: ', np.mean(winners['pts']))
print('The average points for losers: ', np.mean(losers['pts']))

The average points for winners:  108.11075573570625
The average points for losers:  97.34920911379578


In [26]:
# What is the average points scored for the Spurs? (Hint SAS is the team id)

spurs = nba[nba.team_id == 'SAS']

print('The average points for the Spurs: ', np.mean(spurs['pts']))

The average points for the Spurs:  103.93769559032717


In [36]:
# What is the median difference between Team Score and Opponent Score?

#spurs

print('The median difference between the Spurs and their opponents : ', spurs.pts.median()-spurs.opp_pts.median())


The median difference between the Spurs and their opponents :  3.0


In [45]:
# How has the average and median points scored for winning and losing teams changed by decade?
# 40's, 50's, 60's, 70's, 80's, 90's, 2000's, 2010's

# Data Frames
w40 = winners[((winners['year_id'] >= 1940) & (winners['year_id'] <= 1949))]
w50 = winners[((winners['year_id'] >= 1950) & (winners['year_id'] <= 1959))]
w60 = winners[((winners['year_id'] >= 1960) & (winners['year_id'] <= 1969))]
w70 = winners[((winners['year_id'] >= 1970) & (winners['year_id'] <= 1979))]
w80 = winners[((winners['year_id'] >= 1980) & (winners['year_id'] <= 1989))]
w90 = winners[((winners['year_id'] >= 1990) & (winners['year_id'] <= 1999))]
w00 = winners[((winners['year_id'] >= 2000) & (winners['year_id'] <= 2009))]
w10 = winners[((winners['year_id'] >= 2010) & (winners['year_id'] <= 2019))]

l40 = losers[((losers['year_id'] >= 1940) & (losers['year_id'] <= 1949))]
l50 = losers[((losers['year_id'] >= 1950) & (losers['year_id'] <= 1959))]
l60 = losers[((losers['year_id'] >= 1960) & (losers['year_id'] <= 1969))]
l70 = losers[((losers['year_id'] >= 1970) & (losers['year_id'] <= 1979))]
l80 = losers[((losers['year_id'] >= 1980) & (losers['year_id'] <= 1989))]
l90 = losers[((losers['year_id'] >= 1990) & (losers['year_id'] <= 1999))]
l00 = losers[((losers['year_id'] >= 2000) & (losers['year_id'] <= 2009))]
l10 = losers[((losers['year_id'] >= 2010) & (losers['year_id'] <= 2019))]

# median
print("Median for winners in the 40's:" , w40.pts.median())
print("Median for winners in the 50's:" , w50.pts.median())
print("Median for winners in the 60's:" , w60.pts.median())
print("Median for winners in the 70's:" , w70.pts.median())
print("Median for winners in the 80's:" , w80.pts.median())
print("Median for winners in the 90's:" , w90.pts.median())
print("Median for winners in the 2000's:" , w00.pts.median())
print("Median for winners in the 2010's:" , w10.pts.median())

print('\n')

print("Median for losers in the 40's:" , l40.pts.median())
print("Median for losers in the 50's:" , l50.pts.median())
print("Median for losers in the 60's:" , l60.pts.median())
print("Median for losers in the 70's:" , l70.pts.median())
print("Median for losers in the 80's:" , l80.pts.median())
print("Median for losers in the 90's:" , l90.pts.median())
print("Median for losers in the 2000's:" , l00.pts.median())
print("Median for losers in the 2010's:" , l10.pts.median())

print('\n')

# mean
print("Mean for winners in the 40's:" , w40.pts.mean())
print("Mean for winners in the 50's:" , w50.pts.mean())
print("Mean for winners in the 60's:" , w60.pts.mean())
print("Mean for winners in the 70's:" , w70.pts.mean())
print("Mean for winners in the 80's:" , w80.pts.mean())
print("Mean for winners in the 90's:" , w90.pts.mean())
print("Mean for winners in the 2000's:" , w00.pts.mean())
print("Mean for winners in the 2010's:" , w10.pts.mean())

print('\n')

print("Mean for losers in the 40's:" , l40.pts.mean())
print("Mean for losers in the 50's:" , l50.pts.mean())
print("Mean for losers in the 60's:" , l60.pts.mean())
print("Mean for losers in the 70's:" , l70.pts.mean())
print("Mean for losers in the 80's:" , l80.pts.mean())
print("Mean for losers in the 90's:" , l90.pts.mean())
print("Mean for losers in the 2000's:" , l00.pts.mean())
print("Mean for losers in the 2010's:" , l10.pts.mean())

Median for winners in the 40's: 79.0
Median for winners in the 50's: 95.0
Median for winners in the 60's: 119.0
Median for winners in the 70's: 114.0
Median for winners in the 80's: 114.0
Median for winners in the 90's: 106.0
Median for winners in the 2000's: 102.0
Median for winners in the 2010's: 104.0


Median for losers in the 40's: 69.0
Median for losers in the 50's: 84.0
Median for losers in the 60's: 109.0
Median for losers in the 70's: 104.0
Median for losers in the 80's: 103.0
Median for losers in the 90's: 95.0
Median for losers in the 2000's: 91.0
Median for losers in the 2010's: 93.0


Mean for winners in the 40's: 79.03068783068782
Mean for winners in the 50's: 95.40723226703756
Mean for winners in the 60's: 119.69831965611567
Mean for winners in the 70's: 114.43351467047178
Mean for winners in the 80's: 114.47016848364717
Mean for winners in the 90's: 106.59442060085837
Mean for winners in the 2000's: 102.14606741573034
Mean for winners in the 2010's: 104.63224708807748

