# Free Throws!

In [6]:
import numpy as np
import pandas as pd
import os, glob
from collections import defaultdict

In [3]:
df = pd.read_csv('2013-14/[2013-11-01]-0021300023-TOR@ATL.csv')

The following code turns a game log into usable data

In [4]:
def scoring(dfIn):
    '''
    Given a dataframe, we calculate how the game would go under rules where you get a single free thow for all fouls.
    Games that are tied at the end of regulation are considered ties. 
    The output is a list of stats for each team as well as the total number of free thows and the number of free throws
    that would not be there under the new rule. The team lists are a tuple of (team name, win/loss/tie, score, number
    of free throws under this new rule, additional free throws under current rules, opponent score under new rules,
    score under current rules, opponent score under current rules, total fts, made fts, fts under new rule, number of
    made fts under new rule) and are slightly redundant in places
    '''
    teams = dfIn['team'].dropna().unique()
    df = dfIn[dfIn['period']<=4]
    field = df[(df['event_type'] != 'free throw') & (df['result'] == 'made')][['team','points']].groupby('team').sum()
    
    fts = df[(df['event_type'] == 'free throw') & (df['result'] == 'made')]
    ft1 = fts[(fts['type'] == 'Free Throw 1 of 1')  | (fts['type'] == 'Free Throw Technical')][['team','points']].groupby('team').sum()
    ft2 = 2*fts[(fts['type'] == 'Free Throw 1 of 2') | (fts['type'] == 'Free Throw Clear Path 1 of 2') | (fts['type'] == 'Free Throw Flagrant 1 of 2')][['team','points']].groupby('team').sum()
    ft3 = 3*fts[(fts['type'] == 'Free Throw 1 of 3') | (fts['type'] == 'Free Throw Flagrant 1 of 3')][['team','points']].groupby('team').sum()
    score = field.add(ft1, fill_value=0).add(ft2, fill_value=0).add(ft3, fill_value=0)
    score = list(score['points'].iteritems())
    
    
    Fts = df[(df['event_type'] == 'free throw')]
    teamFts = Fts[['team','date']].groupby('team').count()
    teamFts = list(teamFts['date'].iteritems())
    totalFts = Fts['date'].count()
    extraFts = Fts[(Fts['type'] == 'Free Throw Flagrant 3 of 3')| (fts['type'] == 'Free Throw Clear Path 2 of 2')|(Fts['type'] == 'Free Throw Flagrant 2 of 3')|(Fts['type'] == 'Free Throw Flagrant 2 of 2')|(Fts['type'] == 'Free Throw 2 of 2')|(Fts['type'] == 'Free Throw 2 of 3')|(Fts['type'] == 'Free Throw 3 of 3')] 
    extraFTs = extraFts[['team','date']].groupby('team').count()
    teamExtraFts = list(extraFTs['date'].iteritems())
    if len(teamExtraFts) < 2:
        if teamExtraFts[0][0] != teams[0]:
                teamExtraFts.append((i,0))
        else:
            teamExtraFts=[(teams[0],0)].append(teamExtraFts)
    
    regfinal = list(df.groupby('team').sum()['points'].iteritems())
    
    total = list(df[(df['period']<=4) & (df['event_type'] == 'free throw') ][['team','date']].dropna().groupby(['team']).count()['date'].iteritems())
    totalMade = list(df[(df['period']<=4) & (df['event_type'] == 'free throw') & (df['result'] == 'made')][['team','date']].dropna().groupby(['team']).count()['date'].iteritems())
    insystem = list(df[(df['period']<=4) & (df['event_type'] == 'free throw') & (df['num'] ==1 )][['team','date']].dropna().groupby(['team']).count()['date'].iteritems())
    insystemMade = list(df[(df['period']<=4) & (df['event_type'] == 'free throw') & (df['num'] ==1 ) & (df['result'] == 'made')][['team','date']].dropna().groupby(['team']).count()['date'].iteritems())
    
    if score[0][1] > score[1][1]:
        return [(score[0][0], 1, score[0][1], teamFts[0][1], teamExtraFts[0][1],
                 score[1][1],regfinal[0][1],regfinal[1][1] , total[0][1], totalMade[0][1],
                 insystem[0][1], insystemMade[0][1]),
                (score[1][0], 0, score[1][1], teamFts[1][1], teamExtraFts[1][1],
                 score[0][1],regfinal[1][1],regfinal[0][1], total[1][1],totalMade[1][1],insystem[1][1],insystemMade[1][1])], totalFts, extraFts['date'].count()
    elif score[0][1] < score[1][1]:
        return [(score[0][0], 0, score[0][1], teamFts[0][1], teamExtraFts[0][1],
                 score[1][1],regfinal[0][1],regfinal[1][1], total[0][1], totalMade[0][1],
                 insystem[0][1], insystemMade[0][1]),
                (score[1][0], 1, score[1][1],teamFts[1][1], teamExtraFts[1][1],
                 score[0][1],regfinal[1][1],regfinal[0][1], total[1][1],totalMade[1][1],insystem[1][1],insystemMade[1][1])], totalFts, extraFts['date'].count()
    else:
        return [(score[0][0], 0.5, score[0][1], teamFts[0][1], teamExtraFts[0][1],
                 score[1][1],regfinal[0][1],regfinal[1][1], total[0][1], totalMade[0][1],
                 insystem[0][1], insystemMade[0][1]),
                (score[1][0], 0.5, score[1][1], teamFts[1][1], teamExtraFts[1][1],score[0][1],
                 regfinal[1][1],regfinal[0][1], total[1][1],totalMade[1][1],insystem[1][1],insystemMade[1][1])],totalFts, extraFts['date'].count()

For a given year I process all the CSV files and group the data for each team together.

In [876]:
extrafts = []
totalfts = []
scores = defaultdict(list)

for csv in glob.glob('2013-14/*.csv'):
    g1 = pd.read_csv('{}'.format(csv))
    if g1['data_set'].unique()[0] != '2013-2014 Regular Season':
        continue
    else:
        try:
            s, u, t = scoring(g1)
            extrafts.append(t)
            totalfts.append(u)
            for i in s:
                scores[i[0]].append(i[1:])

        except:
            print csv

We make it into summary statistics and then into a nice table we can put on reddit

In [877]:
newScores=[]
west = ['GSW','LAC','SAS','HOU','POR','DAL','OKC','PHX','MEM','UTA','SAC','DEN','NOP','LAL','MIN']
year=2014

for key in scores.keys():
    newScores.append([key,sum([1 for i in scores[key] if i[0] ==1 ]),
                      sum([1 for i in scores[key] if i[0] == 0]),
                      sum([1 for i in scores[key] if i[0] == 0.5]),
                      round(np.mean([i[1] for i in scores[key]]),1),
                      round(np.mean([i[1]-i[5] for i in scores[key]]),1),
                      round(np.mean([i[4] for i in scores[key]]),1),
                      round(np.mean([i[4]-i[6] for i in scores[key]]),1),
                      round(np.mean([i[2]-i[3] for i in scores[key]]),1),
                      round(np.mean([-i[3] for i in scores[key]]),1),
                      round(100.* sum([i[10] for i in scores[key]])/sum([i[9] for i in scores[key]]),2),
                      round(100.* sum([i[10] for i in scores[key]])/sum([i[9] for i in scores[key]])-100.* sum([i[8] for i in scores[key]])/sum([i[7] for i in scores[key]]),2),
                      1 if key in west else 0 ])

In [878]:
s=pd.DataFrame(sorted(newScores, key = lambda (a,b,c,d,f,g,h,i,j,k,l,m,e): (-e,-b,-d,a,c,f,g,h,i,j,l,m,k)),
             columns=['Team','Wins','Losses','Ties','PPG','Delta PPG','OP PPG','Delta OP PPG','FTPG','extra FTPF','FT%','Real FT%', 'West?']).sort_values(['West?','Wins','Ties'],ascending=False)

In [879]:
for i in s.as_matrix()[:,:-1]:
    print '|'.join(map(str,i)[:4])+"|{} ({})|{} ({})|{} ({})|{} ({})".format(*map(str,i)[4:])

SAS|61|19|2|104.9 (0.0)|96.7 (-0.6)|11.2 (-8.7)|78.24 (-0.23)
OKC|58|22|2|105.1 (-0.5)|98.7 (-0.4)|13.5 (-11.4)|78.84 (-1.68)
POR|56|24|2|105.7 (0.1)|101.2 (-0.5)|12.5 (-10.5)|81.71 (0.27)
LAC|55|24|3|106.8 (-0.7)|100.1 (-0.5)|15.9 (-13.0)|71.29 (-1.74)
GSW|49|30|3|102.9 (-0.6)|98.4 (-0.3)|11.5 (-9.5)|72.98 (-2.44)
HOU|49|30|3|105.9 (-0.9)|102.1 (-0.3)|16.7 (-14.1)|68.59 (-2.52)
MEM|48|31|3|95.0 (-0.5)|93.0 (-0.8)|11.3 (-8.8)|71.58 (-2.49)
DAL|48|32|2|103.3 (-0.6)|101.3 (-0.3)|11.6 (-9.3)|76.44 (-3.21)
PHX|47|32|3|104.5 (-0.5)|102.0 (-0.4)|13.3 (-10.9)|73.67 (-2.19)
MIN|40|39|3|105.6 (-0.3)|102.7 (-0.5)|15.3 (-12.4)|77.14 (-0.71)
DEN|33|44|5|103.3 (-0.7)|105.6 (-0.4)|14.4 (-11.8)|69.67 (-2.91)
NOP|31|50|1|98.4 (-0.4)|101.4 (-0.3)|13.0 (-10.2)|75.85 (-1.19)
SAC|27|53|2|99.2 (-0.4)|102.3 (-0.4)|15.1 (-11.9)|74.6 (-1.48)
LAL|26|53|3|102.3 (-0.5)|108.7 (-0.4)|12.4 (-10.0)|73.52 (-2.21)
UTA|25|57|0|94.1 (-0.4)|101.4 (-0.2)|12.3 (-9.5)|73.21 (-1.24)
MIA|51|25|6|101.2 (-0.2)|95.9 (-0.5)|12.9 