In [1]:
import numpy as np
import pandas as pd
import os, glob, dill
from collections import defaultdict, Counter
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
class game():
    def __init__(self, df):
        df=df[df['period']<=4]
        self.teams = list(df.team.dropna().unique())
        self.regscoring = dict(df[(df['shot_distance'].fillna(-1) >= 0) & (df['result'] == 'made')]\
                               .groupby('team')['points'].sum())
        self.shots = dict(list(df[(df['shot_distance'].fillna(-1) >= 0) & (df['result'] == 'made')]\
                               [['team','player','shot_distance','points']]\
                               .groupby('team')[['player','shot_distance','points']]))
        self.fts = dict(list( df[(df['event_type'] == 'free throw') & (df['result'] == 'made')]\
                             [['team','player']].dropna().groupby(['team'])))
        self.reScores = {}
        self.regScores = {}
        self.pScores = []
        
    def newScores(self, func, fts = True):
        for team in self.teams:
            self.shots[team]['new'] = self.shots[team]['shot_distance'].apply(lambda x: func(x))
            self.reScores[team] = sum(self.shots[team]['new'])
            self.regScores[team] = self.regscoring[team]
            if fts:
                ft = self.fts[team].count()['team']
                self.reScores[team] += ft
                self.regScores[team] += ft
    
    def emitData(self):
        scr = self.reScores.items()
        return [(scr[0][0], 1.* (scr[0][1]-scr[1][1])/max([scr[0][1], scr[1][1]]),\
                 scr[0][1], scr[1][1], self.regScores[scr[0][0]],  self.regScores[scr[1][0]])\
                ,(scr[1][0], 1.* (scr[1][1]-scr[0][1])/max([scr[0][1],scr[1][1]]),\
                  scr[1][1], scr[0][1], self.regScores[scr[1][0]],  self.regScores[scr[0][0]])]#\
                #, self.pScores
    
    def playerScoring(self, fts = True):
        self.pScores = []
        for team in self.teams:
            pts = Counter(dict(self.shots[team].groupby('player')['new'].sum()))
            if fts:
                pts += Counter(dict(self.fts[team].groupby('player').count()['team']))
            self.pScores.extend(pts.items())
    
    def playGame(self, func, fts = True):
        self.newScores(func, fts)
        #self.playerScoring(fts)
        return self.emitData()
                

In [None]:
w = dict(list(games[0].shots['ATL'][['player','shot_distance','points']].groupby('player')))
z = dict(games[0].fts['ATL'].groupby('player').count()['team'])
ww = {}
for i in set(w.keys() + z.keys()):
    ww[i] = {'shot_distance' : list(w[i]['shot_distance']), 'points' : list(w[i]['points']) } 

In [183]:
Counter(dict(games[0].shots['ATL'].groupby('player')['new'].sum()))

Counter({'Al Horford': 15.01770665097248,
         'Dennis Schroder': 18.479269768736305,
         'Jeff Teague': 16.915143192790932,
         'Kyle Korver': 8.741861107603075,
         'Lamar Patterson': 2.9522754455936653,
         'Mike Scott': 4.133821410565254,
         'Paul Millsap': 17.999045470948293,
         'Thabo Sefolosha': 2.066910705282627,
         'Tiago Splitter': 4.0})

In [3]:
games = []

for csv in glob.glob('2015-16/*.csv'):
    g1 = pd.read_csv('{}'.format(csv))
    if g1['data_set'].unique()[0] != '2015-2016 Regular Season':
        continue
    else:
        try:
            games.append(game(g1))

        except:
            print csv

In [None]:
def reScoreSeason(games, func, fts=True):
    west = ['GSW','LAC','SAS','HOU','POR','DAL','OKC','PHX','MEM','UTA','SAC','DEN','NOP','LAL','MIN']
    scores = defaultdict(list)
    players = defaultdict(list)
    for i in games:
        s = i.playGame(func, fts)
        for j in s:
            scores[j[0]].append(j[1:]) 
#         for j in t:
#             if j[0] in scoringLeaders:
#                 players[j[0]].append(j[1])
    
    newScores = []
    for key in scores.keys():
        newScores.append((key, sum([1 for i in scores[key] if i[0] > 0 ]),
                      sum([1 for i in scores[key] if i[0] < 0]),
                      sum([1 for i in scores[key] if i[0] == 0]),
                      round(np.mean([i[1] for i in scores[key]]),1),
                      round(np.mean([i[1]-i[3] for i in scores[key]]),1),
                      round(np.mean([i[2] for i in scores[key]]),1),
                      round(np.mean([i[2]-i[4] for i in scores[key]]),1),
                      1 if key in west else 0 ))
    newScores.sort(key= lambda (name, a,b,c,d, e,f,g,h): (h,-a,-c,b, name ,d, e,f,g))
#     newPlayers = []
    
#     for key in players.keys():
#         if len(players[key]) >= 70:
#             newPlayers.append((key, round(np.mean(players[key]), 2)))
            
    return newScores#, sorted(newPlayers, key = lambda (i,j): (-j,i))
    
    

In [92]:
reScoreSeason(games, lambda x: 2 + np.log(1+1.73*x/25) , True)

[('CLE', 53, 29, 0, 112.0, 8.5, 107.0, 9.6, 0),
 ('IND', 52, 30, 0, 112.2, 10.9, 108.9, 9.6, 0),
 ('TOR', 49, 33, 0, 111.1, 9.0, 107.2, 9.5, 0),
 ('ATL', 48, 34, 0, 110.6, 8.4, 107.9, 9.6, 0),
 ('MIA', 48, 34, 0, 110.1, 10.9, 108.1, 10.5, 0),
 ('BOS', 45, 37, 0, 114.5, 9.4, 111.7, 9.8, 0),
 ('CHI', 43, 39, 0, 111.8, 11.1, 112.7, 10.7, 0),
 ('WAS', 42, 40, 0, 114.5, 10.7, 114.5, 10.4, 0),
 ('ORL', 41, 41, 0, 112.5, 11.4, 111.8, 9.1, 0),
 ('CHA', 40, 42, 0, 111.3, 8.9, 110.0, 10.0, 0),
 ('DET', 40, 42, 0, 109.4, 8.5, 110.9, 10.5, 0),
 ('MIL', 36, 46, 0, 107.6, 9.4, 110.6, 7.9, 0),
 ('NYK', 36, 46, 0, 109.4, 11.8, 110.4, 10.0, 0),
 ('BKN', 29, 53, 0, 109.8, 11.4, 115.0, 9.3, 0),
 ('PHI', 10, 72, 0, 104.1, 7.1, 117.6, 10.6, 0),
 ('SAS', 69, 13, 0, 116.0, 12.5, 103.2, 10.4, 1),
 ('GSW', 67, 15, 0, 122.7, 8.9, 113.9, 10.6, 1),
 ('OKC', 54, 28, 0, 119.7, 10.3, 111.3, 9.1, 1),
 ('LAC', 51, 31, 0, 114.5, 10.8, 109.4, 9.8, 1),
 ('MEM', 45, 37, 0, 108.7, 10.3, 109.1, 8.4, 1),
 ('POR', 42, 40, 0, 

In [None]:
reScoreSeason(games, lambda x: 2. + np.log(3+2.73*x/20) , True)