In [1]:
import sys
import os
import io
import re
import time
import itertools
import requests
import datetime
import tempfile
import numpy as np
import pandas as pd
import cupy as cp
from tqdm import tqdm

In [2]:
tqdm_kwargs = { 'ncols': 90, 'file': sys.stderr }

In [3]:
SOURCE_URL = 'https://bbstat.herokuapp.com'

In [4]:
COMPILED_PATH = os.path.abspath('compiled')

In [5]:
def getcsv(route):
    """
    Request csv data from endpoint and return as StringIO instance
    """
    # create request
    res = requests.get(f'{SOURCE_URL}/{route}', headers={'Accept':'text/csv'})
    # if request fails, report error
    if res.status_code != 200:
        print(f"HTTP [{res.status_code}] {route}", file=sys.stderr)
        return None
    # return StringIO instance
    return io.StringIO(res.text)

In [6]:
def process_pitching(year, *periods):
    # stats we are going to output
    statkeys = ['W', 'L', 'SV', 'R', 'ER', 'IP', 'BF', 'S', 'D', 'T', 'HR', 'BB', 'HBP', 'IBB', 'K', 'BK', 'WP', 'PO', 'GDP']
    # fetch game starting pitcher data for the last two seasons
    starters = pd.concat([
        pd.read_csv(getcsv(f'lineups/{year-1}'), usecols=['gid', 'team', 'home', 'gameNumber', 'pitcher']),
        pd.read_csv(getcsv(f'lineups/{year}'), usecols=['gid', 'team', 'home', 'gameNumber', 'pitcher']),
    ]).rename(
        columns={'pitcher': 'pid'}
    ).sort_values(['gid', 'home']).drop(columns=['home'])
    # fetch pitching performance by game for the last two seasons
    pitching = pd.concat([pd.read_csv(getcsv(f'pitching/player/games/{y}')) for y in (year-1,year)])
    # merge starting pitcher data with pitching stats data
    pitching = starters.merge(
        pitching,
        how='left',
        on=['gid','team','gameNumber','pid'],
        validate='1:1',
    ).fillna(0)
    # extract numpy ndarrays
    gids = pitching['gid'].values.astype(str)
    teams = pitching['team'].values.astype(str)
    pids = pitching['pid'].values.astype(str)
    gameNumbers = cp.array(pitching['gameNumber'].values.astype(int))
    pidshash = cp.array([hash(x) for x in pids])
    stats = cp.array(pitching[statkeys].values.astype(int))
    # derive date column from game id
    dates = cp.array([int(gid[:8]) for gid in gids])
    # get mask for games only from current year
    m = dates >= (year * 10000)
    # get the same mask for non-gpu numpy arrays
    mnp = np.array([bool(x) for x in m])
    # iterate through each period
    for period in periods:
        # initialize output row array
        output = []
        # iterate through season games
        for date,gid,team,gameNumber,pid in tqdm(zip(dates[m], gids[mnp], teams[mnp], gameNumbers[m], pids[mnp]), total = int(m.sum()), desc = f'{year} ({period}) pitching', leave=True, **tqdm_kwargs):
            # fetch the starting pitchers past starts
            pastStarts = stats[
                ((dates < date) | ((dates == date) & (gameNumbers < gameNumber))) & (pidshash == hash(pid))
            ][-period:]
            # the number of past starts found for the starting pitcher
            n = len(pastStarts)
            # append output row
            output.append([
                gid,
                team,
                gameNumber,
                pid,
                n,
                *((int(x) for x in pastStarts.sum(axis=0)) if n else [0]*len(statkeys))
            ])
        # yield period and dataframe
        yield (period, pd.DataFrame(
            output,
            columns=['gid', 'team', 'gameNumber', 'ppid', 'pn'] + [f'p_{k}' for k in statkeys],
        ))


In [7]:
def process_batting(year, *periods):
    # stats we are going to output
    statkeys = ['O', 'E', 'S', 'D', 'T', 'HR', 'BB', 'IBB', 'HBP', 'K', 'I', 'SH', 'SF', 'GDP', 'R', 'RBI', 'SB', 'CS', 'PO']
    # fetch game starting lineup data for the last two seasons
    starters = pd.concat([
        pd.read_csv(getcsv(f'lineups/{year-1}')).drop(['pitcher']+[f'pos{i}' for i in range(1,10)], axis=1),
        pd.read_csv(getcsv(f'lineups/{year}')).drop(['pitcher']+[f'pos{i}' for i in range(1,10)], axis=1),
    ])

    # retrieve games for iteration later
    games = starters.sort_values(['gid','home'])[['gid','team','gameNumber']]
    games = games[games.gid.str.startswith(str(year))].values
    # add dates to games ndarray
    games = np.concatenate([
        games,
        np.array([[int(x[:8])] for x in games[:,0]]),
    ], axis=1)
    # transform starting lineup dataframe
    starters = pd.concat([starters[['gid', 'home', 'team', 'gameNumber', f'pid{i}']].rename(
        columns={f'pid{i}':'pid'}
    ) for i in range(1, 10)]).sort_values(['gid', 'home']).drop(columns=['home'])

    # fetch batting performance by game for the last two seasons
    batting = pd.concat([
        pd.read_csv(getcsv(f'batting/player/games/{year-1}')),
        pd.read_csv(getcsv(f'batting/player/games/{year}')),
    ])
    # merge starting pitcher data with pitching stats data
    batting = starters.merge(
        batting,
        how='left',
        on=['gid','team','gameNumber','pid'],
        validate='1:1',
    ).fillna(0)
    # cupy array of hashed game ids
    gids = cp.array([hash(x) for x in batting['gid'].values])
    # cupy array of hashed team ids
    teams = cp.array([hash(x) for x in batting['team'].values])
    # cupy array of gameNumbers
    gameNumbers = cp.array(batting['gameNumber'].values.astype(int))
    # cupy array of hashed player ids
    pids = cp.array([hash(x) for x in batting['pid'].values])
    # derive date column from game id
    dates = cp.array([int(gid[:8]) for gid in batting['gid'].values])
    # target stats in cupy array form
    stats = cp.array(batting[statkeys].values.astype(int))
    # index used for when there are more games than periods
    inx = cp.arange(0, stats.shape[0])
    # iterate through each period
    for period in periods:
        # initialize output row array
        output = []
        # iterate through season games
        for (gid, team, gameNumber, date) in tqdm(games, total = games.shape[0], desc = f'{year} ({period}) batting ', leave=True, **tqdm_kwargs):
            mask = ~(gids != 0)
            datemask = (dates < date) | ((dates == date) & (gameNumbers < gameNumber))
            for pid in pids[(gids == hash(gid)) & (teams == hash(team))]:
                pidMask = datemask & (pids == pid)
                if pidMask.sum() > period:
                    pidMask = pidMask & (inx >= inx[pidMask][-period])
                mask = mask | pidMask
            # fetch the starting lineup players past starts
            pastStarts = stats[mask]
            # append output row
            output.append([
                gid,
                team,
                gameNumber,
                *((int(x) for x in pastStarts.sum(axis=0)) if len(pastStarts) else [0]*len(statkeys)),
            ])
        # return dataframe
        yield (period, pd.DataFrame(
            output,
            columns=['gid', 'team', 'gameNumber'] + [f'b_{k}' for k in statkeys],
        ))



In [8]:
def process_defense(year, *periods):
    # stats we are going to output
    statkeys = ['UR','TUR','P','A','E','PB']
    # fetching batting data by game
    defense = pd.read_csv(getcsv(f'defense/team/games/{year}'))
    # extract numpy arrays from pandas dataframe
    index = defense[['gid','team','gameNumber']].values
    stats = defense[statkeys].values.astype(int)
    # iterate through each period
    for period in periods:
        # initialize output row array
        output = []
        # iterate through batting data tuples
        for gid,team,gameNumber in tqdm(index, total = index.shape[0], desc = f'{year} ({period}) defense ', **tqdm_kwargs):
            # find past games and sum the data
            prevGames = stats[(index[:,2] < gameNumber) & (index[:,1] == team)][-period:]
            # the number of past games found for this team
            n = prevGames.shape[0]
            # append output row
            output.append([
                gid,
                team,
                gameNumber,
                n,
                *(prevGames.sum(axis=0) if n else [0]*len(statkeys))
            ])
        # return dataframe
        yield (period, pd.DataFrame(
            output,
            columns=['gid', 'team', 'gameNumber', 'dn'] + [f'd{k}' for k in statkeys],
        ))



In [9]:
def process_scores(year, *periods):
    # fetch score data for the given season
    scores = pd.read_csv(getcsv(f'scores/{year}'))
    # get length of scores dataframe
    N = scores.values.shape[0]
    # extract numpy ndarray (object) from dataframe index columns
    index = scores[['gid','team','gameNumber','opp','home']].values
    # extract numpy ndarray (int) from dataframe data columns
    stats = np.concatenate([
        (scores['score'].values-scores['opp_score'].values).reshape((N, 1)),
        (scores['score'].values > scores['opp_score'].values).astype(int).reshape((N, 1)),
        scores[['score','opp_score','lob']].values.astype(int),
    ],axis=1)
    # iterate through each period
    for period in periods:
        # initialize output row array
        output = []
        # iterate through score rows
        for (gid,team,gameNumber,opp,home),spread in tqdm(zip(index,stats[:,0]), total = N, desc = f'{year} ({period}) scores  ', **tqdm_kwargs):
            # create previous games mask
            prev = (index[:,2] < gameNumber) & (index[:,1] == team)
            # extend mask to filter previous games vs current opponent
            prev_vs = prev & (index[:,3] == opp)
            # apply mask & get previous games stats
            prev = stats[prev][-period:]
            # apply mask & get previous games vs stats
            prev_vs = stats[prev_vs][-period:]
            # add row to output
            output.append([
                gid,
                team,
                gameNumber,
                opp,
                home,
                spread,
                prev.shape[0], # the number of past games found for this team
                prev[:,1].sum(), # the number of wins
                prev[:,2].sum(), # runs scored by this team
                prev[:,3].sum(), # runs scored against this team
                prev[:,4].sum(), # runners left on base
                prev_vs.shape[0], # the number of past games played against opponent
                prev_vs[:,1].sum(), # wins vs this opponent
                prev_vs[:,2].sum(), # runs scored by this team against this opponent
                prev_vs[:,3].sum(), # runs scored by this opponent against this team
            ])
        # return dataframe
        yield (period, pd.DataFrame(
            output,
            columns=['gid','team','gameNumber','opp','home','spread','n','wins', 'scored','allowed','lob','n_vs','wins_vs', 'scored_vs', 'allowed_vs']
        ))

In [10]:
def _merge(tempdir, year, period):
    merge_args = { 'how': 'inner', 'left_index': True, 'right_index': True }
    index = ['gid', 'team', 'gameNumber']
    return pd.read_csv(
        os.path.join(tempdir, f'{year}_{period}_scores.csv'), index_col=index,
    ).merge(
        pd.read_csv(os.path.join(tempdir, f'{year}_{period}_defense.csv'), index_col=index),
        ** merge_args,
    ).merge(
        pd.read_csv(os.path.join(tempdir, f'{year}_{period}_pitching.csv'), index_col=index),
        ** merge_args,
    ).merge(
        pd.read_csv(os.path.join(tempdir, f'{year}_{period}_batting.csv'), index_col=index),
        ** merge_args,
    )

In [11]:
def _compile(year, *periods):
    with tempfile.TemporaryDirectory() as tempdir:
        # scores
        for p, df in process_scores(year, *periods):
            df.to_csv(os.path.join(tempdir, f'{year}_{p}_scores.csv'), index=False)
        # defense
        for p, df in process_defense(year, *periods):
            df.to_csv(os.path.join(tempdir, f'{year}_{p}_defense.csv'), index=False)
        # pitching
        for p, df in process_pitching(year, *periods):
            df.to_csv(os.path.join(tempdir, f'{year}_{p}_pitching.csv'), index=False)
        # batting
        for p, df in process_batting(year, *periods):
            df.to_csv(os.path.join(tempdir, f'{year}_{p}_batting.csv'), index=False)
        # merge
        for p in periods:
            outfile = os.path.join(COMPILED_PATH, f'{year}_{p}.csv')
            _merge(tempdir, year, p).to_csv(outfile)
            print(f'wrote {outfile}')

In [15]:
periods = [20,30,40,50,60,70,80]

In [14]:
for year in range(2001, 2010):
    _compile(year, *periods)

2001 (20) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3349.41it/s]
2001 (30) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3315.82it/s]
2001 (40) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3336.71it/s]
2001 (50) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3485.71it/s]
2001 (60) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3320.63it/s]
2001 (70) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3317.10it/s]
2001 (80) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3479.13it/s]
2001 (20) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5315.13it/s]
2001 (30) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5690.74it/s]
2001 (40) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5685.72it/s]
2001 (50) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5646.00it/s]

wrote /home/lc3637/bbstats/compiled/2001_20.csv
wrote /home/lc3637/bbstats/compiled/2001_30.csv
wrote /home/lc3637/bbstats/compiled/2001_40.csv
wrote /home/lc3637/bbstats/compiled/2001_50.csv
wrote /home/lc3637/bbstats/compiled/2001_60.csv
wrote /home/lc3637/bbstats/compiled/2001_70.csv
wrote /home/lc3637/bbstats/compiled/2001_80.csv


2002 (20) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3208.86it/s]
2002 (30) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3491.26it/s]
2002 (40) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3488.50it/s]
2002 (50) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3337.96it/s]
2002 (60) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3489.72it/s]
2002 (70) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3478.35it/s]
2002 (80) scores  : 100%|███████████████████████████| 4852/4852 [00:01<00:00, 3315.12it/s]
2002 (20) defense : 100%|███████████████████████████| 4852/4852 [00:00<00:00, 5411.07it/s]
2002 (30) defense : 100%|███████████████████████████| 4852/4852 [00:00<00:00, 5683.57it/s]
2002 (40) defense : 100%|███████████████████████████| 4852/4852 [00:00<00:00, 5334.20it/s]
2002 (50) defense : 100%|███████████████████████████| 4852/4852 [00:00<00:00, 5684.40it/s]

wrote /home/lc3637/bbstats/compiled/2002_20.csv
wrote /home/lc3637/bbstats/compiled/2002_30.csv
wrote /home/lc3637/bbstats/compiled/2002_40.csv
wrote /home/lc3637/bbstats/compiled/2002_50.csv
wrote /home/lc3637/bbstats/compiled/2002_60.csv
wrote /home/lc3637/bbstats/compiled/2002_70.csv
wrote /home/lc3637/bbstats/compiled/2002_80.csv


2003 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3371.61it/s]
2003 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3471.84it/s]
2003 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3200.02it/s]
2003 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3333.48it/s]
2003 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3481.41it/s]
2003 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3324.71it/s]
2003 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3308.16it/s]
2003 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 4961.21it/s]
2003 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5679.72it/s]
2003 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5215.49it/s]
2003 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5676.70it/s]

wrote /home/lc3637/bbstats/compiled/2003_20.csv
wrote /home/lc3637/bbstats/compiled/2003_30.csv
wrote /home/lc3637/bbstats/compiled/2003_40.csv
wrote /home/lc3637/bbstats/compiled/2003_50.csv
wrote /home/lc3637/bbstats/compiled/2003_60.csv
wrote /home/lc3637/bbstats/compiled/2003_70.csv
wrote /home/lc3637/bbstats/compiled/2003_80.csv


2004 (20) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3234.68it/s]
2004 (30) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3313.86it/s]
2004 (40) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3324.48it/s]
2004 (50) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3485.80it/s]
2004 (60) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3326.44it/s]
2004 (70) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3336.16it/s]
2004 (80) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3470.25it/s]
2004 (20) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5256.14it/s]
2004 (30) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5296.49it/s]
2004 (40) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5633.92it/s]
2004 (50) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5655.90it/s]

wrote /home/lc3637/bbstats/compiled/2004_20.csv
wrote /home/lc3637/bbstats/compiled/2004_30.csv
wrote /home/lc3637/bbstats/compiled/2004_40.csv
wrote /home/lc3637/bbstats/compiled/2004_50.csv
wrote /home/lc3637/bbstats/compiled/2004_60.csv
wrote /home/lc3637/bbstats/compiled/2004_70.csv
wrote /home/lc3637/bbstats/compiled/2004_80.csv


2005 (20) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3366.14it/s]
2005 (30) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3329.37it/s]
2005 (40) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3330.52it/s]
2005 (50) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3485.43it/s]
2005 (60) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3488.47it/s]
2005 (70) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3487.72it/s]
2005 (80) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3487.70it/s]
2005 (20) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5349.85it/s]
2005 (30) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5682.14it/s]
2005 (40) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5274.71it/s]
2005 (50) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5652.05it/s]

wrote /home/lc3637/bbstats/compiled/2005_20.csv
wrote /home/lc3637/bbstats/compiled/2005_30.csv
wrote /home/lc3637/bbstats/compiled/2005_40.csv
wrote /home/lc3637/bbstats/compiled/2005_50.csv
wrote /home/lc3637/bbstats/compiled/2005_60.csv
wrote /home/lc3637/bbstats/compiled/2005_70.csv
wrote /home/lc3637/bbstats/compiled/2005_80.csv


2006 (20) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3241.50it/s]
2006 (30) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3335.43it/s]
2006 (40) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3487.90it/s]
2006 (50) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3322.44it/s]
2006 (60) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3491.54it/s]
2006 (70) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3489.25it/s]
2006 (80) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3483.89it/s]
2006 (20) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5355.77it/s]
2006 (30) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5636.22it/s]
2006 (40) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5658.82it/s]
2006 (50) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5673.48it/s]

wrote /home/lc3637/bbstats/compiled/2006_20.csv
wrote /home/lc3637/bbstats/compiled/2006_30.csv
wrote /home/lc3637/bbstats/compiled/2006_40.csv
wrote /home/lc3637/bbstats/compiled/2006_50.csv
wrote /home/lc3637/bbstats/compiled/2006_60.csv
wrote /home/lc3637/bbstats/compiled/2006_70.csv
wrote /home/lc3637/bbstats/compiled/2006_80.csv


2007 (20) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3238.81it/s]
2007 (30) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3487.83it/s]
2007 (40) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3428.45it/s]
2007 (50) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3395.53it/s]
2007 (60) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3348.02it/s]
2007 (70) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3329.50it/s]
2007 (80) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3488.40it/s]
2007 (20) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5021.45it/s]
2007 (30) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5671.06it/s]
2007 (40) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5028.76it/s]
2007 (50) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5276.31it/s]

wrote /home/lc3637/bbstats/compiled/2007_20.csv
wrote /home/lc3637/bbstats/compiled/2007_30.csv
wrote /home/lc3637/bbstats/compiled/2007_40.csv
wrote /home/lc3637/bbstats/compiled/2007_50.csv
wrote /home/lc3637/bbstats/compiled/2007_60.csv
wrote /home/lc3637/bbstats/compiled/2007_70.csv
wrote /home/lc3637/bbstats/compiled/2007_80.csv


2008 (20) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3366.16it/s]
2008 (30) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3333.41it/s]
2008 (40) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3227.37it/s]
2008 (50) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3265.82it/s]
2008 (60) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3469.50it/s]
2008 (70) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3345.70it/s]
2008 (80) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3486.20it/s]
2008 (20) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5287.41it/s]
2008 (30) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5277.96it/s]
2008 (40) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5681.86it/s]
2008 (50) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5671.59it/s]

wrote /home/lc3637/bbstats/compiled/2008_20.csv
wrote /home/lc3637/bbstats/compiled/2008_30.csv
wrote /home/lc3637/bbstats/compiled/2008_40.csv
wrote /home/lc3637/bbstats/compiled/2008_50.csv
wrote /home/lc3637/bbstats/compiled/2008_60.csv
wrote /home/lc3637/bbstats/compiled/2008_70.csv
wrote /home/lc3637/bbstats/compiled/2008_80.csv


2009 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3241.50it/s]
2009 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3487.41it/s]
2009 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3478.84it/s]
2009 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3256.68it/s]
2009 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3483.11it/s]
2009 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3485.03it/s]
2009 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3481.17it/s]
2009 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5282.22it/s]
2009 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5680.59it/s]
2009 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5667.94it/s]
2009 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5664.50it/s]

wrote /home/lc3637/bbstats/compiled/2009_20.csv
wrote /home/lc3637/bbstats/compiled/2009_30.csv
wrote /home/lc3637/bbstats/compiled/2009_40.csv
wrote /home/lc3637/bbstats/compiled/2009_50.csv
wrote /home/lc3637/bbstats/compiled/2009_60.csv
wrote /home/lc3637/bbstats/compiled/2009_70.csv
wrote /home/lc3637/bbstats/compiled/2009_80.csv


In [16]:
for year in range(2010, 2019):
    _compile(year, *periods)

2010 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3368.89it/s]
2010 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3326.53it/s]
2010 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3488.58it/s]
2010 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3486.25it/s]
2010 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3486.90it/s]
2010 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3488.39it/s]
2010 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3325.27it/s]
2010 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 4895.72it/s]
2010 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5677.21it/s]
2010 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5654.46it/s]
2010 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5651.54it/s]

wrote /home/lc3637/bbstats/compiled/2010_20.csv
wrote /home/lc3637/bbstats/compiled/2010_30.csv
wrote /home/lc3637/bbstats/compiled/2010_40.csv
wrote /home/lc3637/bbstats/compiled/2010_50.csv
wrote /home/lc3637/bbstats/compiled/2010_60.csv
wrote /home/lc3637/bbstats/compiled/2010_70.csv
wrote /home/lc3637/bbstats/compiled/2010_80.csv


2011 (20) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3246.05it/s]
2011 (30) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3188.17it/s]
2011 (40) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3326.42it/s]
2011 (50) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3194.32it/s]
2011 (60) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3467.92it/s]
2011 (70) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3338.85it/s]
2011 (80) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3329.96it/s]
2011 (20) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 4985.61it/s]
2011 (30) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5262.43it/s]
2011 (40) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5676.12it/s]
2011 (50) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5671.73it/s]

wrote /home/lc3637/bbstats/compiled/2011_20.csv
wrote /home/lc3637/bbstats/compiled/2011_30.csv
wrote /home/lc3637/bbstats/compiled/2011_40.csv
wrote /home/lc3637/bbstats/compiled/2011_50.csv
wrote /home/lc3637/bbstats/compiled/2011_60.csv
wrote /home/lc3637/bbstats/compiled/2011_70.csv
wrote /home/lc3637/bbstats/compiled/2011_80.csv


2012 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3226.55it/s]
2012 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3483.75it/s]
2012 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3483.74it/s]
2012 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3338.26it/s]
2012 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3331.41it/s]
2012 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3483.58it/s]
2012 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3344.73it/s]
2012 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5359.92it/s]
2012 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5649.94it/s]
2012 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5670.61it/s]
2012 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5668.00it/s]

wrote /home/lc3637/bbstats/compiled/2012_20.csv
wrote /home/lc3637/bbstats/compiled/2012_30.csv
wrote /home/lc3637/bbstats/compiled/2012_40.csv
wrote /home/lc3637/bbstats/compiled/2012_50.csv
wrote /home/lc3637/bbstats/compiled/2012_60.csv
wrote /home/lc3637/bbstats/compiled/2012_70.csv
wrote /home/lc3637/bbstats/compiled/2012_80.csv


2013 (20) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3218.97it/s]
2013 (30) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3322.52it/s]
2013 (40) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3479.90it/s]
2013 (50) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3478.38it/s]
2013 (60) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3478.50it/s]
2013 (70) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3484.31it/s]
2013 (80) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3333.48it/s]
2013 (20) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5262.01it/s]
2013 (30) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5667.09it/s]
2013 (40) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5660.90it/s]
2013 (50) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5658.30it/s]

wrote /home/lc3637/bbstats/compiled/2013_20.csv
wrote /home/lc3637/bbstats/compiled/2013_30.csv
wrote /home/lc3637/bbstats/compiled/2013_40.csv
wrote /home/lc3637/bbstats/compiled/2013_50.csv
wrote /home/lc3637/bbstats/compiled/2013_60.csv
wrote /home/lc3637/bbstats/compiled/2013_70.csv
wrote /home/lc3637/bbstats/compiled/2013_80.csv


2014 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3206.26it/s]
2014 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3332.31it/s]
2014 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3479.82it/s]
2014 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3484.07it/s]
2014 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3480.19it/s]
2014 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3316.53it/s]
2014 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3188.52it/s]
2014 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5368.69it/s]
2014 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5262.90it/s]
2014 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5663.56it/s]
2014 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5653.12it/s]

wrote /home/lc3637/bbstats/compiled/2014_20.csv
wrote /home/lc3637/bbstats/compiled/2014_30.csv
wrote /home/lc3637/bbstats/compiled/2014_40.csv
wrote /home/lc3637/bbstats/compiled/2014_50.csv
wrote /home/lc3637/bbstats/compiled/2014_60.csv
wrote /home/lc3637/bbstats/compiled/2014_70.csv
wrote /home/lc3637/bbstats/compiled/2014_80.csv


2015 (20) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3238.77it/s]
2015 (30) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3481.10it/s]
2015 (40) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3476.51it/s]
2015 (50) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3333.31it/s]
2015 (60) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3481.58it/s]
2015 (70) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3477.40it/s]
2015 (80) scores  : 100%|███████████████████████████| 4858/4858 [00:01<00:00, 3330.95it/s]
2015 (20) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5401.65it/s]
2015 (30) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5670.34it/s]
2015 (40) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5668.55it/s]
2015 (50) defense : 100%|███████████████████████████| 4858/4858 [00:00<00:00, 5664.77it/s]

wrote /home/lc3637/bbstats/compiled/2015_20.csv
wrote /home/lc3637/bbstats/compiled/2015_30.csv
wrote /home/lc3637/bbstats/compiled/2015_40.csv
wrote /home/lc3637/bbstats/compiled/2015_50.csv
wrote /home/lc3637/bbstats/compiled/2015_60.csv
wrote /home/lc3637/bbstats/compiled/2015_70.csv
wrote /home/lc3637/bbstats/compiled/2015_80.csv


2016 (20) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3338.26it/s]
2016 (30) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3319.24it/s]
2016 (40) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3482.65it/s]
2016 (50) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3485.59it/s]
2016 (60) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3314.19it/s]
2016 (70) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3315.69it/s]
2016 (80) scores  : 100%|███████████████████████████| 4856/4856 [00:01<00:00, 3481.93it/s]
2016 (20) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 4966.08it/s]
2016 (30) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5671.85it/s]
2016 (40) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5257.14it/s]
2016 (50) defense : 100%|███████████████████████████| 4856/4856 [00:00<00:00, 5652.87it/s]

wrote /home/lc3637/bbstats/compiled/2016_20.csv
wrote /home/lc3637/bbstats/compiled/2016_30.csv
wrote /home/lc3637/bbstats/compiled/2016_40.csv
wrote /home/lc3637/bbstats/compiled/2016_50.csv
wrote /home/lc3637/bbstats/compiled/2016_60.csv
wrote /home/lc3637/bbstats/compiled/2016_70.csv
wrote /home/lc3637/bbstats/compiled/2016_80.csv


2017 (20) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3205.78it/s]
2017 (30) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3299.02it/s]
2017 (40) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3474.53it/s]
2017 (50) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3324.63it/s]
2017 (60) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3478.15it/s]
2017 (70) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3473.72it/s]
2017 (80) scores  : 100%|███████████████████████████| 4860/4860 [00:01<00:00, 3458.77it/s]
2017 (20) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5326.75it/s]
2017 (30) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5657.94it/s]
2017 (40) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5660.10it/s]
2017 (50) defense : 100%|███████████████████████████| 4860/4860 [00:00<00:00, 5660.30it/s]

wrote /home/lc3637/bbstats/compiled/2017_20.csv
wrote /home/lc3637/bbstats/compiled/2017_30.csv
wrote /home/lc3637/bbstats/compiled/2017_40.csv
wrote /home/lc3637/bbstats/compiled/2017_50.csv
wrote /home/lc3637/bbstats/compiled/2017_60.csv
wrote /home/lc3637/bbstats/compiled/2017_70.csv
wrote /home/lc3637/bbstats/compiled/2017_80.csv


2018 (20) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3351.99it/s]
2018 (30) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3471.22it/s]
2018 (40) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3448.45it/s]
2018 (50) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3468.64it/s]
2018 (60) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3313.84it/s]
2018 (70) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3330.76it/s]
2018 (80) scores  : 100%|███████████████████████████| 4862/4862 [00:01<00:00, 3477.77it/s]
2018 (20) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5119.03it/s]
2018 (30) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5665.69it/s]
2018 (40) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5657.30it/s]
2018 (50) defense : 100%|███████████████████████████| 4862/4862 [00:00<00:00, 5649.14it/s]

wrote /home/lc3637/bbstats/compiled/2018_20.csv
wrote /home/lc3637/bbstats/compiled/2018_30.csv
wrote /home/lc3637/bbstats/compiled/2018_40.csv
wrote /home/lc3637/bbstats/compiled/2018_50.csv
wrote /home/lc3637/bbstats/compiled/2018_60.csv
wrote /home/lc3637/bbstats/compiled/2018_70.csv
wrote /home/lc3637/bbstats/compiled/2018_80.csv
