In [1]:
import csv
from random import randint, seed
from tqdm import tqdm
from __future__ import division

In [2]:
# seed(41)

In [3]:
ppf = []
with open('data/plays-per-fumble.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        row['2010'] = int(row['2010'])
        row['2011'] = int(row['2011'])
        row['2012'] = int(row['2012'])
        row['2013'] = int(row['2013'])
        row['2014'] = int(row['2014'])
        ppf.append(row)

Data from Cousera, via AdvancedFootballAnalytics:

In [4]:
ppf

[{'2010': 89, '2011': 80, '2012': 89, '2013': 49, '2014': 81, 'Team': 'NE'},
 {'2010': 44, '2011': 42, '2012': 66, '2013': 73, '2014': 64, 'Team': 'BLT'},
 {'2010': 31, '2011': 77, '2012': 45, '2013': 96, '2014': 39, 'Team': 'CAR'},
 {'2010': 42, '2011': 52, '2012': 41, '2013': 83, '2014': 67, 'Team': 'SD'},
 {'2010': 45, '2011': 66, '2012': 61, '2013': 54, '2014': 47, 'Team': 'GB'},
 {'2010': 40, '2011': 63, '2012': 81, '2013': 38, '2014': 52, 'Team': 'NYG'},
 {'2010': 50, '2011': 60, '2012': 53, '2013': 51, '2014': 51, 'Team': 'CIN'},
 {'2010': 43, '2011': 60, '2012': 31, '2013': 64, '2014': 63, 'Team': 'PITT'},
 {'2010': 65, '2011': 60, '2012': 39, '2013': 53, '2014': 44, 'Team': 'KC'},
 {'2010': 44, '2011': 37, '2012': 50, '2013': 57, '2014': 66, 'Team': 'JACK'},
 {'2010': 32, '2011': 47, '2012': 45, '2013': 63, '2014': 63, 'Team': 'CLEV'},
 {'2010': 57, '2011': 46, '2012': 62, '2013': 41, '2014': 42, 'Team': 'SEA'},
 {'2010': 49, '2011': 38, '2012': 50, '2013': 48, '2014': 53, 'Te

In [5]:
for team in ppf:
    team['mean'] = (team['2010']+team['2011']+team['2012']+team['2013']+team['2014']) / 5

In [6]:
years = ['2010','2011','2012','2013','2014']

In [7]:
def resample_once():
    rand_ppf = [(randint(0,len(ppf)-1),randint(0,len(years)-1)) for _ in range(0, len(years))]
    samp_ppf = [ppf[t][years[y]] for t,y in rand_ppf]
    return sum(samp_ppf)/len(samp_ppf)

In [8]:
resample_n_times = lambda x: [resample_once() for _ in tqdm(range(0,x))]

In [9]:
result = resample_n_times(100000)

100%|██████████| 100000/100000 [00:02<00:00, 43487.70it/s]


In [10]:
team_pvals = [
    (
        team['Team'],
        team['mean'],
        1-sum(team['mean'] > m for m in result)/len(result)
    ) for team in ppf
]
sorted(team_pvals, key=lambda x: x[1], reverse=True)

[('NE', 77.6, 0.00012999999999996348),
 ('BLT', 57.8, 0.12419000000000002),
 ('CAR', 57.6, 0.12960000000000005),
 ('SD', 57.0, 0.14766),
 ('NYG', 54.8, 0.23036),
 ('GB', 54.6, 0.2389),
 ('CIN', 53.0, 0.31610000000000005),
 ('PITT', 52.2, 0.35928000000000004),
 ('KC', 52.2, 0.35928000000000004),
 ('JACK', 50.8, 0.44269000000000003),
 ('CLEV', 50.0, 0.4918),
 ('SEA', 49.6, 0.51686),
 ('CHIC', 47.6, 0.64209),
 ('SF', 47.2, 0.66652),
 ('NYJ', 46.4, 0.71475),
 ('DEN', 46.0, 0.73761),
 ('TB', 45.4, 0.7703599999999999),
 ('TENN', 44.6, 0.81218),
 ('OAK', 44.2, 0.83148),
 ('MIA', 44.2, 0.83148),
 ('BUFF', 43.6, 0.85867),
 ('PHIL', 41.8, 0.92301),
 ('WASH', 37.2, 0.99362)]