In [1]:
import numpy as np
import pandas as pd
import os, glob, dill
from collections import defaultdict, Counter
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
class game():
    def __init__(self, df):
        df=df[df['period']<=4]
        self.teams = list(df.team.dropna().unique())
        self.regscoring = dict(df[(df['shot_distance'].fillna(-1) >= 0) & (df['result'] == 'made')]\
                               .groupby('team')['points'].sum())
        self.shots = dict(list(df[(df['shot_distance'].fillna(-1) >= 0) & (df['result'] == 'made')]\
                               [['team','player','shot_distance','points']]\
                               .groupby('team')[['player','shot_distance','points']]))
        self.fts = dict(list( df[(df['event_type'] == 'free throw') & (df['result'] == 'made')]\
                             [['team','player']].dropna().groupby(['team'])))
        self.reScores = {}
        self.regScores = {}
        self.pScores = []
        
    def newScores(self, func, fts = True):
        for team in self.teams:
            self.shots[team]['new'] = self.shots[team]['shot_distance'].apply(lambda x: func(x))
            self.reScores[team] = sum(self.shots[team]['new'])
            self.regScores[team] = self.regscoring[team]
            if fts:
                ft = self.fts[team].count()['team']
                self.reScores[team] += ft
                self.regScores[team] += ft
    
    def emitData(self):
        scr = self.reScores.items()
        return [(scr[0][0], 1.* (scr[0][1]-scr[1][1])/max([scr[0][1], scr[1][1]]),\
                 scr[0][1], scr[1][1], self.regScores[scr[0][0]],  self.regScores[scr[1][0]])\
                ,(scr[1][0], 1.* (scr[1][1]-scr[0][1])/max([scr[0][1],scr[1][1]]),\
                  scr[1][1], scr[0][1], self.regScores[scr[1][0]],  self.regScores[scr[0][0]])]#\
                #, self.pScores
    
    def playerScoring(self, fts = True):
        self.pScores = []
        for team in self.teams:
            pts = Counter(dict(self.shots[team].groupby('player')['new'].sum()))
            if fts:
                pts += Counter(dict(self.fts[team].groupby('player').count()['team']))
            self.pScores.extend(pts.items())
    
    def playGame(self, func, fts = True):
        self.newScores(func, fts)
        #self.playerScoring(fts)
        return self.emitData()
                

In [55]:
def collectPlayers(game):
    ww = {}
    for team in game.teams:
        w = dict(list(game.shots[team][['player','shot_distance','points']].groupby('player')))
        z = dict(game.fts[team].groupby('player').count()['team'])

        for i in set(w.keys() + z.keys()):
            try:
                a = list(w[i]['shot_distance'])
                b = list(w[i]['points'])
            except:
                a, b = [], []
            try:
                c = z[i]
            except:
                c = 0
            ww[i] = {'shot_distance' : a, 'points' : b, 'fts': c, 'games': 1 } 
            
    return ww

players = defaultdict(list)
for game in games:
    pscores = collectPlayers(game)
    for i in pscores.keys():
        players[i].append({'fts':pscores[i]['fts'],'shot_distance':pscores[i]['shot_distance'], 'points': pscores[i]['points']})

In [59]:
scoringQualified = {k: v for k, v in players.iteritems() if len(v) >=70}

In [60]:
len(scoringQualified)

143

In [74]:
def playerScoring(pts, func, fts = True):
    r = []
    for player,games in pts.iteritems():
        r.append((player,\
                  round(np.mean([sum(map( lambda x: func(x) , i['shot_distance']))\
                           + i['fts'] if fts else 0 for i in games ]),1)))
    return sorted(r, key = lambda (i,j): (-j,i))
            

In [78]:
%timeit playerScoring(scoringQualified, lambda x:  np.log(3+2.73*x/20))

10 loops, best of 3: 107 ms per loop


In [3]:
games = []

for csv in glob.glob('2015-16/*.csv'):
    g1 = pd.read_csv('{}'.format(csv))
    if g1['data_set'].unique()[0] != '2015-2016 Regular Season':
        continue
    else:
        try:
            games.append(game(g1))

        except:
            print csv

In [4]:
def reScoreSeason(games, func, fts=True):
    west = ['GSW','LAC','SAS','HOU','POR','DAL','OKC','PHX','MEM','UTA','SAC','DEN','NOP','LAL','MIN']
    scores = defaultdict(list)
    players = defaultdict(list)
    for i in games:
        s = i.playGame(func, fts)
        for j in s:
            scores[j[0]].append(j[1:]) 
#         for j in t:
#             if j[0] in scoringLeaders:
#                 players[j[0]].append(j[1])
    
    newScores = []
    for key in scores.keys():
        newScores.append((key, sum([1 for i in scores[key] if i[0] > 0 ]),
                      sum([1 for i in scores[key] if i[0] < 0]),
                      sum([1 for i in scores[key] if i[0] == 0]),
                      round(np.mean([i[1] for i in scores[key]]),1),
                      round(np.mean([i[1]-i[3] for i in scores[key]]),1),
                      round(np.mean([i[2] for i in scores[key]]),1),
                      round(np.mean([i[2]-i[4] for i in scores[key]]),1),
                      1 if key in west else 0 ))
    newScores.sort(key= lambda (name, a,b,c,d, e,f,g,h): (h,-a,-c,b, name ,d, e,f,g))
#     newPlayers = []
    
#     for key in players.keys():
#         if len(players[key]) >= 70:
#             newPlayers.append((key, round(np.mean(players[key]), 2)))
            
    return newScores#, sorted(newPlayers, key = lambda (i,j): (-j,i))
    
    

In [92]:
reScoreSeason(games, lambda x: 2 + np.log(1+1.73*x/25) , True)

[('CLE', 53, 29, 0, 112.0, 8.5, 107.0, 9.6, 0),
 ('IND', 52, 30, 0, 112.2, 10.9, 108.9, 9.6, 0),
 ('TOR', 49, 33, 0, 111.1, 9.0, 107.2, 9.5, 0),
 ('ATL', 48, 34, 0, 110.6, 8.4, 107.9, 9.6, 0),
 ('MIA', 48, 34, 0, 110.1, 10.9, 108.1, 10.5, 0),
 ('BOS', 45, 37, 0, 114.5, 9.4, 111.7, 9.8, 0),
 ('CHI', 43, 39, 0, 111.8, 11.1, 112.7, 10.7, 0),
 ('WAS', 42, 40, 0, 114.5, 10.7, 114.5, 10.4, 0),
 ('ORL', 41, 41, 0, 112.5, 11.4, 111.8, 9.1, 0),
 ('CHA', 40, 42, 0, 111.3, 8.9, 110.0, 10.0, 0),
 ('DET', 40, 42, 0, 109.4, 8.5, 110.9, 10.5, 0),
 ('MIL', 36, 46, 0, 107.6, 9.4, 110.6, 7.9, 0),
 ('NYK', 36, 46, 0, 109.4, 11.8, 110.4, 10.0, 0),
 ('BKN', 29, 53, 0, 109.8, 11.4, 115.0, 9.3, 0),
 ('PHI', 10, 72, 0, 104.1, 7.1, 117.6, 10.6, 0),
 ('SAS', 69, 13, 0, 116.0, 12.5, 103.2, 10.4, 1),
 ('GSW', 67, 15, 0, 122.7, 8.9, 113.9, 10.6, 1),
 ('OKC', 54, 28, 0, 119.7, 10.3, 111.3, 9.1, 1),
 ('LAC', 51, 31, 0, 114.5, 10.8, 109.4, 9.8, 1),
 ('MEM', 45, 37, 0, 108.7, 10.3, 109.1, 8.4, 1),
 ('POR', 42, 40, 0, 

In [8]:
reScoreSeason(games, lambda x: 2. + np.log(3+2.73*x/20) , True)

[('CLE', 53, 29, 0, 149.2, 45.7, 142.5, 45.1, 0),
 ('IND', 52, 30, 0, 149.0, 47.7, 144.9, 45.6, 0),
 ('TOR', 50, 32, 0, 146.6, 44.5, 142.4, 44.7, 0),
 ('BOS', 49, 33, 0, 152.6, 47.5, 148.1, 46.2, 0),
 ('ATL', 47, 35, 0, 148.0, 45.8, 143.7, 45.4, 0),
 ('ORL', 46, 36, 0, 150.6, 49.5, 148.9, 46.3, 0),
 ('MIA', 45, 37, 0, 147.6, 48.3, 144.0, 46.4, 0),
 ('WAS', 43, 39, 0, 152.7, 48.9, 151.8, 47.6, 0),
 ('MIL', 41, 41, 0, 145.5, 47.3, 147.3, 44.7, 0),
 ('CHI', 38, 44, 0, 148.8, 48.1, 150.7, 48.7, 0),
 ('DET', 38, 44, 0, 146.1, 45.2, 148.5, 48.1, 0),
 ('CHA', 36, 46, 0, 146.6, 44.2, 146.3, 46.3, 0),
 ('NYK', 33, 49, 0, 144.6, 47.0, 147.2, 46.8, 0),
 ('BKN', 29, 53, 0, 146.9, 48.5, 154.7, 49.1, 0),
 ('PHI', 14, 68, 0, 139.4, 42.4, 156.1, 49.1, 0),
 ('SAS', 70, 12, 0, 154.8, 51.4, 137.9, 45.1, 1),
 ('GSW', 64, 18, 0, 163.3, 49.6, 151.4, 48.1, 1),
 ('OKC', 52, 30, 0, 159.7, 50.2, 148.8, 46.6, 1),
 ('LAC', 51, 31, 0, 151.0, 47.4, 144.9, 45.4, 1),
 ('MEM', 46, 36, 0, 144.5, 46.1, 143.7, 43.0, 1),
