# Vanilla elo

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [53]:
from datetime import datetime, timedelta
from collections import defaultdict

In [4]:
## load data
raw_data = pd.read_csv("/Users/tejaswin.p/Downloads/data_nbaallelo.csv", )
print "Total rows:", len(raw_data)
print "Total columns:", len(raw_data.columns)

Total rows: 126314
Total columns: 23


In [5]:
raw_data.columns

Index([u'gameorder', u'game_id', u'lg_id', u'_iscopy', u'year_id',
       u'date_game', u'seasongame', u'is_playoffs', u'team_id', u'fran_id',
       u'pts', u'elo_i', u'elo_n', u'win_equiv', u'opp_id', u'opp_fran',
       u'opp_pts', u'opp_elo_i', u'opp_elo_n', u'game_location',
       u'game_result', u'forecast', u'notes'],
      dtype='object')

In [10]:
df_data = raw_data[[u'gameorder', u'game_id', u'lg_id', u'_iscopy', u'year_id',
       u'date_game', u'seasongame', u'is_playoffs', u'team_id', u'fran_id',
       u'pts', u'opp_id', u'opp_fran',
       u'opp_pts', u'game_location',
       u'game_result']][raw_data._iscopy==0].copy()

In [11]:
print "Data:", df_data.shape

Data: (63157, 16)


In [24]:
df_data.date_game.head(10)

0     11/1/1946
2     11/2/1946
4     11/2/1946
7     11/2/1946
9     11/2/1946
11    11/3/1946
12    11/4/1946
14    11/5/1946
17    11/5/1946
19    11/7/1946
Name: date_game, dtype: object

In [25]:
_d = "11/1/1946"
def string_to_datetime(dstring):
    m, d, y = dstring.strip().split('/')
    return datetime(int(y), int(m), int(d))

In [26]:
df_data["game_datetime"] = df_data.date_game.map(string_to_datetime)

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,pts,opp_id,opp_fran,opp_pts,game_location,game_result,game_datetime
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,66,NYK,Knicks,68,H,L,1946-11-01
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,63,NYK,Knicks,47,H,W,1946-11-02
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,33,WSC,Capitols,50,H,L,1946-11-02
7,4,194611020PRO,NBA,0,1947,11/2/1946,1,0,PRO,Steamrollers,59,BOS,Celtics,53,H,W,1946-11-02
9,5,194611020STB,NBA,0,1947,11/2/1946,1,0,STB,Bombers,56,PIT,Ironmen,51,H,W,1946-11-02


### Tracking the franchises through time.

In [33]:
df_data[["lg_id", "team_id", "fran_id", "year_id"]].drop_duplicates().sort_values(["fran_id", "year_id"]).head(10)

Unnamed: 0,lg_id,team_id,fran_id,year_id
702,NBA,BLB,Baltimore,1948
1137,NBA,BLB,Baltimore,1949
1909,NBA,BLB,Baltimore,1950
3079,NBA,BLB,Baltimore,1951
3841,NBA,BLB,Baltimore,1952
4562,NBA,BLB,Baltimore,1953
5296,NBA,BLB,Baltimore,1954
9,NBA,STB,Bombers,1947
734,NBA,STB,Bombers,1948
1151,NBA,STB,Bombers,1949


In [84]:
teams = set(df_data.fran_id).union(set(df_data.opp_fran))
print "Total different franchises:", len(teams)

Total different franchises: 53


### Computing elo.

In [85]:
df_data.head()

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,opp_id,opp_fran,opp_pts,game_location,game_result,game_datetime,prev_fran_elo,prev_opp_elo,new_fran_elo,new_opp_elo
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,NYK,Knicks,68,H,L,1946-11-01,1500.0,1500.0,1490.0,1510.0
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,NYK,Knicks,47,H,W,1946-11-02,1500.0,1510.0,1510.287744,1499.712256
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,WSC,Capitols,50,H,L,1946-11-02,1500.0,1500.0,1490.0,1510.0
7,4,194611020PRO,NBA,0,1947,11/2/1946,1,0,PRO,Steamrollers,...,BOS,Celtics,53,H,W,1946-11-02,1500.0,1500.0,1510.0,1490.0
9,5,194611020STB,NBA,0,1947,11/2/1946,1,0,STB,Bombers,...,PIT,Ironmen,51,H,W,1946-11-02,1500.0,1500.0,1510.0,1490.0


In [86]:
# first, set everything to 0
df_data["prev_fran_elo"] = 0.0
df_data["prev_opp_elo"] = 0.0
df_data["new_fran_elo"] = 0.0
df_data["new_opp_elo"] = 0.0

In [87]:
# next, for all the FIRST games of the season, set the previous ELO rating to 1500
vanilla_elo = defaultdict(list)
for fran in teams:
    vanilla_elo[fran].append(1500)

In [88]:
def update_elo(home, away, outcome, k=20):
    exp_home = 1.0 / (1.0 + 10**((away - home)/400.0) )
    exp_away = 1.0 - exp_home
    
    r_home = home + k*(outcome - exp_home)
    r_away = away + k*(1 - outcome - exp_away)
    
    return r_home, r_away

In [89]:
from time import time

In [90]:
_StartTime = time()
_counter = 0
for ix,row in df_data.sort_values("game_datetime").iterrows():
    _counter += 1
    if _counter%5000==0:
        print "Completed %d games."%_counter
    
    outcome = 1 if row["game_result"]=='W' else 0
    
    fran, opp = row["fran_id"], row["opp_fran"]
    prev_fran_elo, prev_opp_elo = vanilla_elo[fran][-1], vanilla_elo[opp][-1]
    
    new_fran_elo, new_opp_elo = update_elo(prev_fran_elo, prev_opp_elo, outcome)
    vanilla_elo[fran].append(new_fran_elo)
    vanilla_elo[opp].append(new_opp_elo)
    
    df_data.loc[ix, "prev_fran_elo"] = prev_fran_elo
    df_data.loc[ix, "prev_opp_elo"] = prev_opp_elo
    
    df_data.loc[ix, "new_fran_elo"] = new_fran_elo
    df_data.loc[ix, "new_opp_elo"] = new_opp_elo

print "Completed %d games."%_counter
print "\nDone.", "Time :", (time()-_StartTime)/60.0

Completed 5000 games.
Completed 10000 games.
Completed 15000 games.
Completed 20000 games.
Completed 25000 games.
Completed 30000 games.
Completed 35000 games.
Completed 40000 games.
Completed 45000 games.
Completed 50000 games.
Completed 55000 games.
Completed 60000 games.
Completed 63157 games.

Done. Time : 2.09809026718


In [91]:
df_data.sort_values("game_datetime").head(25)

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,opp_id,opp_fran,opp_pts,game_location,game_result,game_datetime,prev_fran_elo,prev_opp_elo,new_fran_elo,new_opp_elo
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,NYK,Knicks,68,H,L,1946-11-01,1500.0,1500.0,1490.0,1510.0
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,NYK,Knicks,47,H,W,1946-11-02,1500.0,1510.0,1510.287744,1499.712256
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,WSC,Capitols,50,H,L,1946-11-02,1500.0,1500.0,1490.0,1510.0
7,4,194611020PRO,NBA,0,1947,11/2/1946,1,0,PRO,Steamrollers,...,BOS,Celtics,53,H,W,1946-11-02,1500.0,1500.0,1510.0,1490.0
9,5,194611020STB,NBA,0,1947,11/2/1946,1,0,STB,Bombers,...,PIT,Ironmen,51,H,W,1946-11-02,1500.0,1500.0,1510.0,1490.0
11,6,194611030CLR,NBA,0,1947,11/3/1946,1,0,CLR,Rebels,...,TRH,Huskies,60,H,W,1946-11-03,1500.0,1490.0,1509.712256,1480.287744
12,7,194611040PIT,NBA,0,1947,11/4/1946,2,0,PIT,Ironmen,...,WSC,Capitols,71,H,L,1946-11-04,1490.0,1510.0,1480.575011,1519.424989
14,8,194611050BOS,NBA,0,1947,11/5/1946,2,0,BOS,Celtics,...,CHS,Stags,57,H,L,1946-11-05,1490.0,1510.287744,1480.583265,1519.704478
17,9,194611050DTF,NBA,0,1947,11/5/1946,2,0,DTF,Falcons,...,STB,Bombers,53,H,L,1946-11-05,1490.0,1510.0,1480.575011,1519.424989
19,10,194611070GSW,NBA,0,1947,11/7/1946,1,0,PHW,Warriors,...,PIT,Ironmen,75,H,W,1946-11-07,1500.0,1480.575011,1509.441486,1471.133526


In [93]:
sorted(
    [(k,v[-1]) for k,v in vanilla_elo.iteritems()], reverse=True, key=lambda x: x[1]
)

[('Warriors', 1838.4004693273725),
 ('Spurs', 1734.0129955616374),
 ('Cavaliers', 1729.9401228056909),
 ('Clippers', 1725.4892593191162),
 ('Rockets', 1721.763919053845),
 ('Grizzlies', 1689.6744384883434),
 ('Hawks', 1640.9349415637537),
 ('Thunder', 1628.7855075441337),
 ('Mavericks', 1622.1097941848834),
 ('Colonels', 1609.8813665977987),
 ('Bulls', 1606.8632887299254),
 ('Trailblazers', 1595.157507671492),
 ('Wizards', 1591.2140961176096),
 ('Pelicans', 1585.9411137391965),
 ('Stags', 1567.7626395624277),
 ('Pacers', 1563.766264297583),
 ('Jazz', 1562.1789222290483),
 ('Packers', 1557.9122669334454),
 ('Nets', 1552.2851267765707),
 ('Celtics', 1547.9920659663067),
 ('Raptors', 1539.126409613078),
 ('Olympians', 1532.5492414340606),
 ('Suns', 1529.6868165153774),
 ('Heat', 1518.3032170200295),
 ('Bucks', 1503.4002806396702),
 ('Rebels', 1499.7795259081445),
 ('Spirits', 1485.5844349336662),
 ('Stars', 1475.929636977781),
 ('Pistons', 1470.542235394355),
 ('Bombers', 1468.30818147226