# Scrape NFL Boxscores using espn_scraper

In [66]:
# import libraries
import pandas as pd
import numpy as np
import json
import pickle
import espn_scraper as espn

#### Set User Defined Fields

current_week is self explanatory.

num_weeks defines how many previous weeks to include in the weekly matchup rankings analysis.  For example, if it's currently week 9 and we want to use the previous 4 weeks to create our rankings then set num_weeks to 4 and the agg_week function will sum all teams' stats during weeks 5, 6, 7, and 8.

In [123]:
# set current_week
current_week = 6

# set num_weeks
num_weeks = 4

#### Scrape NFL Scoreboard URLs for the 2020 season

In [3]:
# scrape all scoreboard URLs from the NFL's 2020 season
scoreboard_urls = espn.get_all_scoreboard_urls("nfl", 2021)

# pickle list of scoreboard urls so we don't have to rescrape ESPN.  There are limits to how many times we can scrape
with open("scoreboard_urls_list.txt", "wb") as fp:
    pickle.dump(scoreboard_urls, fp)

https://www.espn.com/nfl/scoreboard/_/year/2021/seasontype/2/week/1?xhr=1


In [4]:
# open scoreboard_urls_list.txt
with open("scoreboard_urls_list.txt", "rb") as fp:   # Unpickling
    scoreboard_urls = pickle.load(fp)

In [5]:
# grab each NFL game id using our scoreboard URL list
game_ids = []
for scoreboard_url in scoreboard_urls:
    
    # using cached_data so we don't have to rescrape ESPN
    # cached_data saves a JSON object in a local directory
    data = espn.get_url(scoreboard_url, cached_path = "cached_data")
    for event in data['content']['sbData']['events']:
        if event['id'] not in game_ids:
            game_ids.append(event['id'])

In [15]:
len(list(game_ids[:1]))

1

In [16]:
for i in game_ids[:1]:
    print(i)

401326605


In [98]:
# grab game URL
url = espn.get_game_url("boxscore", "nfl", 401326408) # 401326605 - preseason); 401326383 - regular season
    
# convert to JSON object
#json_data = espn.get_url(url, cached_path = "cached_json")
json_data = espn.get_url(url)

https://www.espn.com/nfl/boxscore?gameId=401326408&xhr=1


In [99]:
json_data['gamepackageJSON']['header']['week']

5

In [102]:
test_matchup = []

if json_data['gamepackageJSON']['header']['week'] == current_week:
    temp_matchup_data = []
    temp_matchup_data.append(json_data['gamepackageJSON']['header']['week'])
    temp_matchup_data.append('@' + json_data['__gamepackage__']['awayTeam']['team']['abbreviation'])
    temp_matchup_data.append(json_data['__gamepackage__']['homeTeam']['team']['abbreviation'])
    test_matchup.append(temp_matchup_data)

In [103]:
test_matchup

[[5, '@BUF', 'KC']]

In [55]:
json_data['gamepackageJSON']['header']['season']['type']

2

In [79]:
#json_data['gamepackageJSON']['header']['week']
json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'][8]['descriptions']

['Field Goals Made/Attempts',
 'Field Goal Percentage',
 'Longest Field Goal Made',
 'Extra Points Made/Attempts',
 'Kicking Points']

In [52]:
json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'][0]['name']

'passing'

In [86]:
if json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'][8]['totals']:
    print('boo')
else:
    print('yay!')

yay!


#### Scrape Team & Boxscore Stats

In [139]:
# initialize list for boxscore stats and respective column names
stat_data = []
column_names = []

# set columns_ran to False since we only need to grab the column names once.  will flip to True once we have column names
columns_ran = False

# initialize list for current week's matchups
matchup_data = []

# loop through each game and create JSON object to parse for data
for i in game_ids:
    
    # grab game URL
    url = espn.get_game_url("boxscore", "nfl", i)
    
    
    # convert to JSON object
    #json_data = espn.get_url(url, cached_path = "cached_json")
    json_data = espn.get_url(url)
    
    if json_data['gamepackageJSON']['header']['season']['type'] < 2:
        continue
    
    # if game is a current week game then grab matchups data
    if json_data['gamepackageJSON']['header']['week'] == current_week:
        temp_matchup_data = []
        temp_matchup_data.append(json_data['gamepackageJSON']['header']['week'])
        temp_matchup_data.append(json_data['__gamepackage__']['awayTeam']['team']['abbreviation'])
        temp_matchup_data.append(False)
        temp_matchup_data.append(json_data['__gamepackage__']['homeTeam']['team']['abbreviation'])
        matchup_data.append(temp_matchup_data) 
        
    # if there are no game stats or it's a current week game then skip to next game
    if json_data['gamepackageJSON']['header']['week'] >= current_week or \
        len(json_data['gamepackageJSON']['boxscore']['teams'][0]['statistics']) == 0:
        continue
        
    # grab game stats for each team    
    else:   
        # if columns_ran equals false then create list of column names
        if columns_ran == False:
            # create column names using team stats data
            for stat in range(0, len(json_data['gamepackageJSON']['boxscore']['teams'][0]['statistics'])):
                column_names.append(json_data['gamepackageJSON']['boxscore']['teams'][0]['statistics'][stat]['label'])
    
            # create column names using box score data.  some are duplicates of the columns created above
            for stat in range(0, len(json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'])):
    
                # grab all stat names except for punting stats
                if json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'][stat]['name'] not in 'punting':
                    column_names += json_data['gamepackageJSON']['boxscore']['players'][0]['statistics'][stat]['descriptions']
        
            # flip columns_ran to True once we have all column names
            columns_ran = True
        
        # loop through each team
        for team in range(0, len(json_data['gamepackageJSON']['boxscore']['teams'])):
            
            # initialize temporary list for team & boxscore stats
            temp_stat_data = []
            
            # grab week, team name, and team abbreviation
            temp_stat_data.append(json_data['gamepackageJSON']['header']['week'])
            temp_stat_data.append(json_data['gamepackageJSON']['boxscore']['teams'][team]['team']['displayName'])
            temp_stat_data.append(json_data['gamepackageJSON']['boxscore']['teams'][team]['team']['abbreviation'])
            
            # determine home game (1) vs away game(0)
            if json_data['gamepackageJSON']['boxscore']['teams'][team]['team']['displayName'] == \
               json_data['__gamepackage__']['homeTeam']['team']['displayName']:
                
                # add 1 to temp_stat_data to denote home game
                temp_stat_data.append(1)
                
                # add away team opponent
                temp_stat_data.append(json_data['__gamepackage__']['awayTeam']['team']['abbreviation'])
            else:
                
                # add 0 to temp_stat_data to denote away game
                temp_stat_data.append(0)
                
                # add home team opponent
                temp_stat_data.append(json_data['__gamepackage__']['homeTeam']['team']['abbreviation'])
            
            # loop through team stats data and add them to temp_stat_data
            for stat in range(0, len(json_data['gamepackageJSON']['boxscore']['teams'][team]['statistics'])):
                temp_stat_data.append(json_data['gamepackageJSON']['boxscore']['teams'][team]['statistics'][stat]['displayValue'])
            
            # loop through box score data and add them to temp_stat_data
            for stat in range(0, len(json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'])):
                
                # grab all stat values except for punting stat values
                if json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'][stat]['name'] not in 'punting':
                    
                    # only grab stat values that exist, otherwise, add 0 for any missing stat values
                    #if len(json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'][stat]['totals']) != 0:
                    if json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'][stat]['totals']:
                        temp_stat_data += \
                            json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'][stat]['totals']
                    else:
                        temp_stat_data += [0] * \
                            len(json_data['gamepackageJSON']['boxscore']['players'][team]['statistics'][stat]['descriptions'])
            
            # append each team's stats to stat_data
            stat_data.append(temp_stat_data)

https://www.espn.com/nfl/boxscore?gameId=401326605&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326352&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326606&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326621&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401327515&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401332403&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326617&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401333579&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401330875&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326921&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401333568&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326351&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326604&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401329166&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401333584&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401329163&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401333578&xhr=1
https://www.espn.com/nfl/boxsco

https://www.espn.com/nfl/boxscore?gameId=401326425&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326429&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326426&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326427&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326428&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326424&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326431&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326430&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326432&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326433&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326434&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326435&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326436&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326437&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326438&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326439&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326440&xhr=1
https://www.espn.com/nfl/boxsco

https://www.espn.com/nfl/boxscore?gameId=401326568&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326569&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326570&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326571&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326572&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326577&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326573&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326574&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326575&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326576&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326578&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326579&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326580&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326582&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326581&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326583&xhr=1
https://www.espn.com/nfl/boxscore?gameId=401326584&xhr=1
https://www.espn.com/nfl/boxsco

#### Clean Data & Create Dataframe 

In [125]:
# add columns to column list
column_names[:0] = ['week', 'team_name', 'team_abv', 'home', 'oppn']

# create column index list to select columns we need for analysis.  there are duplicates so we don't need them all
keep_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 23, 24, 29, 30, 31, 33, 34, 35, 38, 39, 40, 41, 42, 
             43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 
             70, 71, 72, 73, 74, 75, 76
            ]

# create list of categorical columns
chr_list = ['team_name', 'team_abv', 'oppn']

# create list of boolean columns
bool_list = ['home']

# create list of integer columns
int_list = ['week', '1st_dwn_tot', '1st_dwn_pass', '1st_dwn_rush', '1st_dwn_pen', '3rd_dwn_con', '3rd_dwn_att', 
            '4th_dwn_con', '4th_dwn_att', 'redzone_con', 'redzone_att', 'plays_tot', 'yrds_tot','drives_tot','penalties',
            'pen_yrds', 'sacks_taken', 'sck_yrds_lost', 'tol_fumble', 'fumble_lost','pass_comp', 'pass_att', 
            'tot_yrds_pass', 'pass_tds', 'int_thrown', 'rush_att', 'tot_rush_yrds', 'rush_tds', 'long_rush', 'tot_rec', 
            'rec_targets', 'tot_rec_yrds','long_rec', 'tot_tackle', 'solo_tackle', 'tot_sck', 'pass_def', 'qb_hits', 
            'def_tds', 'def_ints', 'def_int_yrds', 'def_int_tds', 'fumble_rec', 'tot_kick_ret', 'kick_ret_yrds',
            'long_kick_ret', 'kick_ret_tds', 'tot_punt_ret', 'punt_ret_yrds', 'long_punt_ret', 'punt_ret_tds', 'fg_made', 
            'fg_att','long_fg', 'xp_made', 'xp_att', 'kick_pts', 'time_of_poss_min','time_of_poss_sec']

# create list of float columns
float_list = ['yrds_per_play','yrds_per_pass','yrds_per_rush', 'yrds_per_rec', 'tack_for_loss', 'yrds_per_kick',
              'yrds_per_punt','fg_perc']

In [126]:
# create dataframe using stat_data and column_names
tm_game_data = pd.DataFrame(data = stat_data)#, columns = column_names)


# select columns in keep_list by index.  can't select by name due to duplicate column names
tm_game_data = tm_game_data.iloc[:, keep_list]

# rename columns
tm_game_data.columns = ['week', 'team_name',  'team_abv', 'home', 'oppn', '1st_dwn_tot', '1st_dwn_pass', '1st_dwn_rush', 
                        '1st_dwn_pen', '3rd_dwn_eff', '4th_dwn_eff', 'plays_tot', 'yrds_tot', 'yrds_per_play', 
                        'drives_tot', 'yrds_per_pass', 'redzone_eff', 'pen_yrds', 'time_of_poss', 'pass_comp_att',
                        'tot_yrds_pass', 'pass_tds', 'int_thrown', 'sck_yrds_lost', 'rush_att', 'tot_rush_yrds', 
                        'yrds_per_rush', 'rush_tds', 'long_rush', 'tot_rec', 'tot_rec_yrds', 'yrds_per_rec', 'long_rec', 
                        'rec_targets', 'tol_fumble', 'fumble_lost', 'fumble_rec', 'tot_tackle', 'solo_tackle', 'tot_sck', 
                        'tack_for_loss', 'pass_def', 'qb_hits', 'def_tds', 'def_ints', 'def_int_yrds', 'def_int_tds', 
                        'tot_kick_ret', 'kick_ret_yrds', 'yrds_per_kick', 'long_kick_ret', 'kick_ret_tds', 'tot_punt_ret', 
                        'punt_ret_yrds', 'yrds_per_punt', 'long_punt_ret', 'punt_ret_tds', 'fg_att_made', 'fg_perc', 
                        'long_fg', 'xp_att_made', 'kick_pts']

# split columns so we can calculate rates if necessary
tm_game_data[['3rd_dwn_con','3rd_dwn_att']] = tm_game_data['3rd_dwn_eff'].str.split('-',expand=True)
tm_game_data[['4th_dwn_con','4th_dwn_att']] = tm_game_data['4th_dwn_eff'].str.split('-',expand=True)
tm_game_data[['redzone_con','redzone_att']] = tm_game_data['redzone_eff'].str.split('-',expand=True)
tm_game_data[['penalties','pen_yrds']] = tm_game_data['pen_yrds'].str.split('-',expand=True)
tm_game_data[['pass_comp','pass_att']] = tm_game_data['pass_comp_att'].str.split('/',expand=True)
tm_game_data[['sacks_taken','sck_yrds_lost']] = tm_game_data['sck_yrds_lost'].str.split('-',expand=True)
tm_game_data[['fg_made','fg_att']] = tm_game_data['fg_att_made'].str.split('/',expand=True)
tm_game_data[['xp_made','xp_att']] = tm_game_data['xp_att_made'].str.split('/',expand=True)
tm_game_data[['time_of_poss_min','time_of_poss_sec']] = tm_game_data['time_of_poss'].str.split(':',expand=True)

null_columns = ['3rd_dwn_con','3rd_dwn_att', '4th_dwn_con','4th_dwn_att', 'redzone_con','redzone_att',
               'penalties','pen_yrds','pass_comp','pass_att','sacks_taken','sck_yrds_lost', 'fg_made','fg_att',
               'xp_made','xp_att', 'time_of_poss_min','time_of_poss_sec']

tm_game_data[null_columns] = tm_game_data[null_columns].fillna(0)

# drop split columns
tm_game_data.drop(['3rd_dwn_eff', '4th_dwn_eff', 'redzone_eff', 'pass_comp_att', 'fg_att_made', 'xp_att_made', 
                   'time_of_poss'], axis=1, inplace = True)

# reorder columns
tm_game_data = tm_game_data[['week', 'team_name', 'team_abv', 'home', 'oppn', '1st_dwn_tot', '1st_dwn_pass', '1st_dwn_rush',
                             '1st_dwn_pen', '3rd_dwn_con', '3rd_dwn_att', '4th_dwn_con', '4th_dwn_att', 'redzone_con', 
                             'redzone_att', 'plays_tot', 'yrds_tot', 'yrds_per_play', 'drives_tot', 'time_of_poss_min', 
                             'time_of_poss_sec', 'penalties', 'pen_yrds', 'sacks_taken', 'sck_yrds_lost', 'tol_fumble', 
                             'fumble_lost', 'pass_comp', 'pass_att', 'tot_yrds_pass', 'yrds_per_pass', 'pass_tds', 
                             'int_thrown', 'rush_att', 'tot_rush_yrds', 'yrds_per_rush', 'rush_tds', 'long_rush', 'tot_rec',
                             'rec_targets', 'tot_rec_yrds', 'yrds_per_rec', 'long_rec', 'tot_tackle', 'solo_tackle', 
                             'tot_sck', 'tack_for_loss', 'pass_def', 'qb_hits', 'def_tds', 'def_ints', 'def_int_yrds', 
                             'def_int_tds', 'fumble_rec', 'tot_kick_ret', 'kick_ret_yrds', 'yrds_per_kick', 'long_kick_ret',
                             'kick_ret_tds', 'tot_punt_ret', 'punt_ret_yrds', 'yrds_per_punt', 'long_punt_ret', 
                             'punt_ret_tds', 'fg_made', 'fg_att', 'fg_perc', 'long_fg', 'xp_made', 'xp_att', 'kick_pts']]

display(tm_game_data.head())

# update data types using lists created above
tm_game_data[chr_list] = tm_game_data[chr_list].astype('category')
tm_game_data[bool_list] = tm_game_data[bool_list].astype('bool')
tm_game_data[int_list] = tm_game_data[int_list].astype('int64')
tm_game_data[float_list] = tm_game_data[float_list].astype('float')

# merge tm_game_data dataframe with itself to create stats allowed per team
tm_game_data = tm_game_data.merge(tm_game_data, how = 'left', left_on = ['week', 'oppn'], right_on = ['week', 'team_abv'])

# drop duplicate columns
tm_game_data.drop(['team_name_y', 'team_abv_y', 'home_y', 'oppn_y'], axis=1, inplace = True)

# rename columns
tm_game_data.columns = ['week', 'team_name', 'team_abv', 'home', 'oppn', '1st_dwn_tot', '1st_dwn_pass', '1st_dwn_rush', 
                        '1st_dwn_pen', '3rd_dwn_con', '3rd_dwn_att', '4th_dwn_con', '4th_dwn_att', 'redzone_con', 
                        'redzone_att', 'plays_tot', 'yrds_tot', 'yrds_per_play', 'drives_tot', 'time_of_poss_min', 
                        'time_of_poss_sec', 'penalties', 'pen_yrds', 'sacks_taken', 'sck_yrds_lost', 'tol_fumble', 
                        'fumble_lost', 'pass_comp', 'pass_att', 'tot_yrds_pass', 'yrds_per_pass', 'pass_tds', 'int_thrown',
                        'rush_att', 'tot_rush_yrds', 'yrds_per_rush', 'rush_tds', 'long_rush', 'tot_rec', 'rec_targets', 
                        'tot_rec_yrds', 'yrds_per_rec', 'long_rec', 'tot_tackle', 'solo_tackle', 'tot_sck', 'tack_for_loss',
                        'pass_def', 'qb_hits', 'def_tds', 'def_ints', 'def_int_yrds', 'def_int_tds', 'fumble_rec', 
                        'tot_kick_ret', 'kick_ret_yrds', 'yrds_per_kick', 'long_kick_ret', 'kick_ret_tds', 'tot_punt_ret',
                        'punt_ret_yrds', 'yrds_per_punt', 'long_punt_ret', 'punt_ret_tds', 'fg_made', 'fg_att', 'fg_perc', 
                        'long_fg', 'xp_made', 'xp_att', 'kick_pts', '1st_dwn_tot_alw', '1st_dwn_pass_alw', 
                        '1st_dwn_rush_alw', '1st_dwn_pen_alw', '3rd_dwn_con_alw', '3rd_dwn_att_alw', '4th_dwn_con_alw', 
                        '4th_dwn_att_alw', 'redzone_con_alw', 'redzone_att_alw', 'plays_tot_alw', 'yrds_tot_alw', 
                        'yrds_per_play_alw', 'drives_tot_alw', 'time_of_poss_min_alw', 'time_of_poss_sec_alw', 
                        'penalties_alw', 'pen_yrds_alw', 'sacks_taken_alw', 'sck_yrds_lost_alw', 'tol_fumble_alw', 
                        'fumble_lost_alw', 'pass_comp_alw', 'pass_att_alw', 'tot_yrds_pass_alw', 'yrds_per_pass_alw', 
                        'pass_tds_alw', 'int_thrown_alw', 'rush_att_alw', 'tot_rush_yrds_alw', 'yrds_per_rush_alw', 
                        'rush_tds_alw', 'long_rush_alw', 'tot_rec_alw', 'rec_targets_alw', 'tot_rec_yrds_alw', 
                        'yrds_per_rec_alw', 'long_rec_alw', 'tot_tackle_alw', 'solo_tackle_alw', 'tot_sck_alw', 
                        'tack_for_loss_alw', 'pass_def_alw', 'qb_hits_alw', 'def_tds_alw', 'def_ints_alw', 
                        'def_int_yrds_alw', 'def_int_tds_alw', 'fumble_rec_alw', 'tot_kick_ret_alw', 'kick_ret_yrds_alw', 
                        'yrds_per_kick_alw', 'long_kick_ret_alw', 'kick_ret_tds_alw', 'tot_punt_ret_alw', 
                        'punt_ret_yrds_alw', 'yrds_per_punt_alw', 'long_punt_ret_alw', 'punt_ret_tds_alw', 'fg_made_alw', 
                        'fg_att_alw', 'fg_perc_alw', 'long_fg_alw', 'xp_made_alw', 'xp_att_alw', 'kick_pts_alw'
                 ]

tm_game_data.head()

Unnamed: 0,week,team_name,team_abv,home,oppn,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,...,yrds_per_punt,long_punt_ret,punt_ret_tds,fg_made,fg_att,fg_perc,long_fg,xp_made,xp_att,kick_pts
0,1,Dallas Cowboys,DAL,0,TB,30,21,4,5,9,...,7.0,12,0,3,5,60.0,48,2,3,11
1,1,Tampa Bay Buccaneers,TB,1,DAL,24,22,1,1,5,...,10.5,14,0,1,1,100.0,36,4,4,7
2,1,Philadelphia Eagles,PHI,0,ATL,24,13,10,1,6,...,4.8,11,0,1,1,100.0,43,3,3,6
3,1,Atlanta Falcons,ATL,1,PHI,19,8,7,4,3,...,5.5,7,0,2,2,100.0,27,0,0,6
4,1,Pittsburgh Steelers,PIT,0,BUF,16,9,4,3,4,...,5.0,5,0,3,3,100.0,45,2,2,11


Unnamed: 0,week,team_name,team_abv,home,oppn,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,...,yrds_per_punt_alw,long_punt_ret_alw,punt_ret_tds_alw,fg_made_alw,fg_att_alw,fg_perc_alw,long_fg_alw,xp_made_alw,xp_att_alw,kick_pts_alw
0,1,Dallas Cowboys,DAL,False,TB,30,21,4,5,9,...,10.5,14,0,1,1,100.0,36,4,4,7
1,1,Tampa Bay Buccaneers,TB,True,DAL,24,22,1,1,5,...,7.0,12,0,3,5,60.0,48,2,3,11
2,1,Philadelphia Eagles,PHI,False,ATL,24,13,10,1,6,...,5.5,7,0,2,2,100.0,27,0,0,6
3,1,Atlanta Falcons,ATL,True,PHI,19,8,7,4,3,...,4.8,11,0,1,1,100.0,43,3,3,6
4,1,Pittsburgh Steelers,PIT,False,BUF,16,9,4,3,4,...,1.0,1,0,3,3,100.0,42,1,1,10


In [127]:
tm_game_data.tail()

Unnamed: 0,week,team_name,team_abv,home,oppn,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,...,yrds_per_punt_alw,long_punt_ret_alw,punt_ret_tds_alw,fg_made_alw,fg_att_alw,fg_perc_alw,long_fg_alw,xp_made_alw,xp_att_alw,kick_pts_alw
155,5,Arizona Cardinals,ARI,True,SF,20,11,8,1,3,...,3.0,5,0,1,1,100.0,47,1,1,4
156,5,Buffalo Bills,BUF,False,KC,20,12,7,1,5,...,0.0,0,0,2,2,100.0,54,2,2,8
157,5,Kansas City Chiefs,KC,True,BUF,29,16,8,5,5,...,7.0,7,0,1,1,100.0,30,5,5,8
158,5,Indianapolis Colts,IND,False,BAL,29,16,10,3,5,...,0.0,0,0,1,1,100.0,23,0,0,3
159,5,Baltimore Ravens,BAL,True,IND,30,22,7,1,3,...,10.0,13,0,2,4,50.0,43,1,2,7


In [83]:
#tm_game_data.columns[tm_game_data.isnull().any()]

tm_game_data.loc[tm_game_data['xp_att'].isnull()]

Unnamed: 0,week,team_name,team_abv,home,oppn,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,...,yrds_per_punt,long_punt_ret,punt_ret_tds,fg_made,fg_att,fg_perc,long_fg,xp_made,xp_att,kick_pts
16,1,New York Jets,NYJ,False,CAR,16,10,4,2,4,...,15.0,15,0,,,0,0,,,0
41,2,Miami Dolphins,MIA,True,BUF,13,9,2,2,8,...,0.5,1,0,,,0,0,,,0
84,3,New York Jets,NYJ,False,DEN,11,10,1,0,4,...,0.0,0,0,,,0,0,,,0
100,4,Houston Texans,HOU,False,BUF,6,4,2,0,1,...,6.0,6,0,,,0,0,,,0


In [44]:
tm_game_data.describe().to_csv('tm_game_data_audit_2021.csv', index = False)

In [128]:
tm_game_data.to_csv('tm_game_data_2021.csv', index = False)

In [3]:
tm_game_data = pd.read_csv("tm_game_data_2021.csv")
tm_game_data.head()

Unnamed: 0,week,team_name,team_abv,home,oppn,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,...,yrds_per_punt_alw,long_punt_ret_alw,punt_ret_tds_alw,fg_made_alw,fg_att_alw,fg_perc_alw,long_fg_alw,xp_made_alw,xp_att_alw,kick_pts_alw
0,1,Houston Texans,HOU,False,KC,21,13,8,0,4,...,0.0,0,0,2,2,100.0,29,4,4,10
1,1,Kansas City Chiefs,KC,True,HOU,28,16,9,3,7,...,19.0,19,0,0,1,0.0,0,2,2,2
2,1,Seattle Seahawks,SEA,False,ATL,22,15,6,1,3,...,8.0,8,0,2,2,100.0,49,1,2,7
3,1,Atlanta Falcons,ATL,True,SEA,28,23,4,1,7,...,15.0,15,0,1,1,100.0,42,5,5,8
4,1,New York Jets,NYJ,False,BUF,15,8,4,3,4,...,13.6,23,0,2,4,50.0,22,3,3,9


In [10]:
tm_game_data.columns[tm_game_data.isna().any()].tolist()

[]

In [None]:
tm_game_data[tm_game_data['def_tds'] >= 3]

In [None]:
tm_game_data[["fumble_lost", "fumble_rec_alw"]].head()

In [None]:
tm_game_data['week'].max()

In [None]:
new_list = ['pass_td_per_gm', '']
tm_game_data.columns.extend()

In [None]:
tm_game_data[tm_game_data['team_abv'] == 'KC']

In [None]:
tm_game_data[tm_game_data['week'] == 1]

In [None]:
[i for i in range(4, tm_game_data['week'].max() + 1)]

#### Aggregate Team & Boxscore Stats

In [134]:
# create agg_week function to sum all stats within the user defined time frame
def agg_week(weekly_boxscore, num_weeks):
    '''
    Definition:
        This function sums all stats in a weekly boxscore dataframe which is subset by the user defined field, num_weeks.
    Parameters:
        weekly_boxscore = dataframe of all weekly boxscore stats
        num_weeks = user defined field to determine the number of previous weeks by which to subset the weekly_boxscore df.
                    see  User Defined Fields at the top of this notebook for more info.
    '''
    
    # create num_weeks based on max week number in data frame
    num_weeks_limit = weekly_boxscore["week"].max() - num_weeks
    
    
    # subset weekly_boxscore dataframe by max week number and num_weeks
    weekly_boxscore = weekly_boxscore[(weekly_boxscore["week"] <= weekly_boxscore["week"].max()) & \
                                      (weekly_boxscore["week"] > num_weeks_limit)]
    
    # create dataframe to show number of games each team has played
    gp_df = weekly_boxscore['team_abv'].value_counts().sort_index().reset_index()    
    
    # sum team stats
    weekly_boxscore = weekly_boxscore.groupby(['team_abv']).sum().sort_values(['team_abv']).reset_index()
    
    # add games played column
    weekly_boxscore['gp'] = gp_df['team_abv']
    return weekly_boxscore

In [144]:
# create prior_weeks dataframe using the tm_game_data dataframe
prior_weeks = agg_week(tm_game_data, num_weeks)

# aggregate qb stats
prior_weeks['pass_td_per_gm'] = prior_weeks['pass_tds'] / prior_weeks['gp']
prior_weeks['pass_td_alw_per_gm'] = prior_weeks['pass_tds_alw'] / prior_weeks['gp']
prior_weeks['pass_yrd_per_gm'] = prior_weeks['tot_yrds_pass'] / prior_weeks['gp']
prior_weeks['pass_yrd_alw_per_gm'] = prior_weeks['tot_yrds_pass_alw'] / prior_weeks['gp']
prior_weeks['pass_yrd_per_pass'] = prior_weeks['tot_yrds_pass'] / prior_weeks['pass_att']
prior_weeks['pass_yrd_alw_per_pass_alw'] = prior_weeks['tot_yrds_pass_alw'] / prior_weeks['pass_att_alw']
prior_weeks['pass_1st_down_per_gm'] = prior_weeks['1st_dwn_pass'] / prior_weeks['gp']
prior_weeks['pass_1st_down_alw_per_gm'] = prior_weeks['1st_dwn_pass_alw'] / prior_weeks['gp']

# aggregate rb stats
prior_weeks['rush_td_per_gm'] = prior_weeks['rush_tds'] / prior_weeks['gp']
prior_weeks['rush_td_alw_per_gm'] = prior_weeks['rush_tds_alw'] / prior_weeks['gp']
prior_weeks['rush_yrd_per_gm'] = prior_weeks['tot_rush_yrds'] / prior_weeks['gp']
prior_weeks['rush_yrd_alw_per_gm'] = prior_weeks['tot_rush_yrds_alw'] / prior_weeks['gp']
prior_weeks['rush_yrd_per_rush'] = prior_weeks['tot_rush_yrds'] / prior_weeks['rush_att']
prior_weeks['rush_yrd_alw_per_rush_alw'] = prior_weeks['tot_rush_yrds_alw'] / prior_weeks['rush_att_alw']
prior_weeks['rush_1st_down_per_gm'] = prior_weeks['1st_dwn_rush'] / prior_weeks['gp']
prior_weeks['rush_1st_down_alw_per_gm'] = prior_weeks['1st_dwn_rush_alw'] / prior_weeks['gp']

# aggregate wr/te stats
prior_weeks['rec_yrd_per_gm'] = prior_weeks['tot_rec_yrds'] / prior_weeks['gp']
prior_weeks['rec_yrd_alw_per_gm'] = prior_weeks['tot_rec_yrds_alw'] / prior_weeks['gp']
prior_weeks['rec_yrd_per_tar'] = prior_weeks['tot_rec_yrds'] / prior_weeks['rec_targets']
prior_weeks['rec_yrd_alw_per_tar_alw'] = prior_weeks['tot_rec_yrds_alw'] / prior_weeks['rec_targets_alw']
prior_weeks['rec_tar_per_gm'] = prior_weeks['rec_targets'] / prior_weeks['gp']
prior_weeks['rec_tar_alw_per_gm'] = prior_weeks['rec_targets_alw'] / prior_weeks['gp']
prior_weeks['rec_per_gm'] = prior_weeks['tot_rec'] / prior_weeks['gp']
prior_weeks['rec_alw_per_gm'] = prior_weeks['tot_rec_alw'] / prior_weeks['gp']

# aggregate def stats
prior_weeks['def_st_td_per_gm'] = (prior_weeks['def_tds'] + prior_weeks['kick_ret_tds'] + prior_weeks['punt_ret_tds']) / \
                                   prior_weeks['gp']
prior_weeks['def_st_td_alw_per_gm'] = (prior_weeks['def_tds_alw'] + prior_weeks['kick_ret_tds_alw'] + \
                                       prior_weeks['punt_ret_tds_alw']) / prior_weeks['gp']
prior_weeks['fumble_per_gm'] = prior_weeks['fumble_rec'] / prior_weeks['gp']
prior_weeks['fumble_lost_per_gm'] = prior_weeks['fumble_lost'] / prior_weeks['gp']
prior_weeks['int_per_gm'] = prior_weeks['def_ints'] / prior_weeks['gp']
prior_weeks['int_alw_per_gm'] = prior_weeks['def_ints_alw'] / prior_weeks['gp']
prior_weeks['sacks_per_gm'] = prior_weeks['tot_sck'] / prior_weeks['gp']
prior_weeks['sacks_taken_per_gm'] = prior_weeks['sacks_taken'] / prior_weeks['gp']

# aggregate kick points stats
prior_weeks['kck_pts_per_gm'] = prior_weeks['kick_pts'] / prior_weeks['gp']
prior_weeks['kck_pts_alw_per_gm'] = prior_weeks['kick_pts_alw'] / prior_weeks['gp']

# aggregate kick & punt returns stats
prior_weeks['return_yrds_per_gm'] = (prior_weeks['kick_ret_yrds'] + prior_weeks['punt_ret_yrds']) / prior_weeks['gp']
prior_weeks['return_yrds_alw_per_gm'] = (prior_weeks['kick_ret_yrds_alw'] + prior_weeks['punt_ret_yrds_alw']) / \
                                        prior_weeks['gp']


prior_weeks.head()

Unnamed: 0,team_abv,week,home,1st_dwn_tot,1st_dwn_pass,1st_dwn_rush,1st_dwn_pen,3rd_dwn_con,3rd_dwn_att,4th_dwn_con,...,fumble_per_gm,fumble_lost_per_gm,int_per_gm,int_alw_per_gm,sacks_per_gm,sacks_taken_per_gm,kck_pts_per_gm,kck_pts_alw_per_gm,return_yrds_per_gm,return_yrds_alw_per_gm
0,ARI,14,2.0,91,52,36,3,15,41,3,...,1.75,0.25,1.0,0.75,1.5,2.0,8.75,5.5,25.5,20.25
1,ATL,14,2.0,89,61,16,12,30,58,2,...,0.75,0.75,0.25,0.75,2.0,1.25,6.25,5.5,79.75,87.75
2,BAL,14,2.0,96,57,36,3,17,46,2,...,1.0,0.25,0.5,0.75,2.25,2.5,8.25,4.5,58.25,60.25
3,BUF,14,2.0,96,52,37,7,26,51,2,...,2.25,0.25,2.25,0.5,3.0,0.5,10.5,2.75,28.5,57.5
4,CAR,14,2.0,97,55,29,13,22,54,3,...,1.0,0.0,0.75,1.5,2.5,3.25,7.0,4.25,37.5,77.0


In [138]:
prior_weeks[['team_abv', 'pass_td_per_gm']].head()

Unnamed: 0,team_abv,pass_td_per_gm
0,ARI,1.5
1,ATL,2.5
2,BAL,1.75
3,BUF,2.75
4,CAR,1.25


In [48]:
prior_weeks.describe().to_csv('prior_weeks_audit.csv', index = False)
prior_weeks.columns[prior_weeks.isna().any()].tolist()

[]

In [None]:
prior_weeks[prior_weeks['xp_att'] <= 1]

In [None]:
prior_weeks[["1st_dwn_pass", "1st_dwn_tot"]].tail()

In [None]:
n = 36
#new_list = prior_weeks.columns[-n:]
#new_list

new_list = ['test', 'test1']
new_list

In [None]:
#newnew_list = prior_weeks.columns.values.tolist()
#print(newnew_list)
#newnewnew_list = newnew_list + new_list
#print(newnewnew_list)

prior_weeks.columns.values.tolist() + new_list

In [141]:
# create dataframe of current week matchups using matchup_data
this_week = pd.DataFrame(columns = ['week', 'team_abv', 'home', 'oppn'], data = matchup_data)
this_week_temp = this_week.copy()
this_week_temp = this_week_temp[['week', 'oppn', 'home', 'team_abv']]
this_week_temp['home'] = True
this_week_temp.columns = ['week', 'team_abv', 'home', 'oppn']
this_week = this_week.append(this_week_temp, ignore_index=True)
this_week

Unnamed: 0,week,team_abv,home,oppn
0,6,TB,False,PHI
1,6,MIA,False,JAX
2,6,GB,False,CHI
3,6,CIN,False,DET
4,6,HOU,False,IND
5,6,LAR,False,NYG
6,6,KC,False,WSH
7,6,MIN,False,CAR
8,6,LAC,False,BAL
9,6,ARI,False,CLE


In [142]:
this_week.to_csv('this_week_2021.csv', index = False)

In [2]:
this_week = pd.read_csv('this_week.csv')
this_week.head()

Unnamed: 0,week,team_abv,home,oppn
0,16,MIN,False,NO
1,16,TB,False,DET
2,16,MIA,False,LV
3,16,CLE,False,NYJ
4,16,SF,False,ARI


In [132]:
# create function to grab team or boxscore stat from prior_weeks dataframe
def get_values_list(prior_weeks_df, team, column):  
    '''
    Definition:
        This function returns a numpy array of stat values which are ordered by the current week's matchups
    Parameters:
        prior_weeks = dataframe of previous weeks' summed stats
        team = column name of either the team's abbreviation or their opponent's team abbreviation depending on which
               team's stat we want to return
        column = column name of the stat we want to return
    '''
    
    data_list = [prior_weeks_df.loc[prior_weeks_df['team_abv'] == i, column].iloc[0] for i in team.tolist()]
    
#     # initialize list for team & boxscore stats
#     data_list = []
    
#     # loop through all teams in this week's matchups
#     for i in team.tolist():
        
#         # grab team's index location
#         team_idx = prior_weeks_df.index[prior_weeks_df['team_abv'] == i].tolist()[0]
        
#         # grab team's boxscore stat
#         data_value = prior_weeks_df.at[team_idx, column]
        
#         #data_value = prior_weeks_df.loc[prior_weeks_df['team_abv'] == i, column].iloc[0]
        
#         # append to list
#         data_list.append(data_value)
    
    # return numpy array so we can perform calculations
    return np.array(data_list)

In [None]:
type(prior_weeks.index[prior_weeks['team_abv'] == 'LAR'][0])

In [137]:
#prior_weeks['1st_dwn_pass'].loc[prior_weeks['team_abv'] == 'KC']
prior_weeks.loc[prior_weeks['team_abv'] == 'KC', '1st_dwn_pass'].iloc[0]

67

In [143]:
get_values_list(prior_weeks, this_week['team_abv'], 'pass_td_per_gm')[0]

2.75

#### Aggregate stat to compare how well a team plays vs how well other teams have played against their current week's opponent

In [145]:
# qb
# multiply how many TDs thrown per game by team and how many passing TDs allowed per game by opponent
this_week['pass_td'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_td_per_gm') * 
                                 get_values_list(prior_weeks, this_week['oppn'], 'pass_td_alw_per_gm'))

# multiply how many yards per pass by team and how many yards per pass allowed by opponent
this_week['pass_yrd_per_pass'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_yrd_per_pass') * 
                                           get_values_list(prior_weeks, this_week['oppn'], 'pass_yrd_alw_per_pass_alw'))

# multiply how many passing 1st downs per game by team and how many passing 1st downs per game allowed by opponent
this_week['pass_1st_dwn'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_1st_down_per_gm') * 
                                      get_values_list(prior_weeks, this_week['oppn'], 'pass_1st_down_alw_per_gm'))

# multiply passing yards per game by team and passing yards per game allowed by opponent
this_week['pass_yrd'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_yrd_per_gm') * 
                                  get_values_list(prior_weeks, this_week['oppn'], 'pass_yrd_alw_per_gm'))

# rb
# multiply rushing TDs per game by team and rushing TDs allowed per game by opponent
this_week['rush_td'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rush_td_per_gm') * 
                                 get_values_list(prior_weeks, this_week['oppn'], 'rush_td_alw_per_gm'))

# multiply how many yards per rush by team and how many yards per rush allowed by opponent
this_week['rush_yrd_per_rush'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rush_yrd_per_rush') * 
                                           get_values_list(prior_weeks, this_week['oppn'], 'rush_yrd_alw_per_rush_alw'))

# multiply how many rushing 1st downs per game by team and how many rushing 1st downs per game allowed by opponent
this_week['rush_1st_down'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rush_1st_down_per_gm') * 
                                       get_values_list(prior_weeks, this_week['oppn'], 'rush_1st_down_alw_per_gm'))

# multiply rushing yards per game by team and rushing yards per game allowed by opponent
this_week['rush_yrd'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rush_yrd_per_gm') * 
                                  get_values_list(prior_weeks, this_week['oppn'], 'rush_yrd_alw_per_gm'))

# wr
# multiply passing TDs per game by team and passing TDs allowed per game by opponent
this_week['rec_td'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_td_per_gm') * 
                                get_values_list(prior_weeks, this_week['oppn'], 'pass_td_alw_per_gm'))

# multiply receiving yards per game by team and receiving yards per game allowed by opponent
this_week['rec_yrd_per_gm'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rec_yrd_per_gm') * 
                                        get_values_list(prior_weeks, this_week['oppn'], 'rec_yrd_alw_per_gm'))

# multiply receiving yards per target by team and receiving yards per target allowed by opponent
this_week['rec_yrd_per_tar'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rec_yrd_per_tar') * 
                                         get_values_list(prior_weeks, this_week['oppn'], 'rec_yrd_alw_per_tar_alw'))

# multiply receptions per game by team and receptions per game allowed by opponent
this_week['rec_per_gm'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'rec_per_gm') * 
                                    get_values_list(prior_weeks, this_week['oppn'], 'rec_alw_per_gm'))

# multiply how many receiving 1st downs per game by team and how many receiving 1st downs per game allowed by opponent
this_week['rec_1st_down'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'pass_1st_down_per_gm') * 
                                      get_values_list(prior_weeks, this_week['oppn'], 'pass_1st_down_alw_per_gm'))

# def
# multiply def and st TDs per game by team and def and st TDs allowed per game by opponent
this_week['def_st_td'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'def_st_td_per_gm') * 
                                   get_values_list(prior_weeks, this_week['oppn'], 'def_st_td_alw_per_gm'))

# multiply def sacks per game by team and sacks taken per game by opponent
this_week['def_sack'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'sacks_per_gm') * 
                                  get_values_list(prior_weeks, this_week['oppn'], 'sacks_taken_per_gm'))

# multiply def interceptions per game by team and def interceptions allowed per game by opponent
this_week['def_int'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'int_per_gm') * 
                                 get_values_list(prior_weeks, this_week['oppn'], 'int_alw_per_gm'))

# multiply def fumble recoveries per game by team and fumbles lost per game by opponent
this_week['def_fbml'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'fumble_per_gm') * 
                                  get_values_list(prior_weeks, this_week['oppn'], 'fumble_lost_per_gm'))

# multiply passing/rushing/def/st TDs allowed per game by team and passing/rushing/def/st TDs per game by opponent
this_week['def_st_td_alw'] = pd.Series((get_values_list(prior_weeks, this_week['team_abv'], 'pass_td_alw_per_gm') +
                                        get_values_list(prior_weeks, this_week['team_abv'], 'rush_td_alw_per_gm') + 
                                        get_values_list(prior_weeks, this_week['team_abv'], 'def_st_td_alw_per_gm')) * 
                                        (get_values_list(prior_weeks, this_week['oppn'], 'pass_td_per_gm') + 
                                        get_values_list(prior_weeks, this_week['oppn'], 'rush_td_per_gm') + 
                                        get_values_list(prior_weeks, this_week['oppn'], 'def_st_td_per_gm')))

# multiply passing/rushing/return yards allowed per game by team and passing/rushing/return yards per game by opponent
this_week['def_st_yrd_alw'] = pd.Series((get_values_list(prior_weeks, this_week['team_abv'], 'pass_yrd_alw_per_gm') +
                                         get_values_list(prior_weeks, this_week['team_abv'], 'rush_yrd_alw_per_gm') + 
                                         get_values_list(prior_weeks, this_week['team_abv'], 'return_yrds_alw_per_gm')) * 
                                         (get_values_list(prior_weeks, this_week['oppn'], 'pass_yrd_per_gm') + 
                                         get_values_list(prior_weeks, this_week['oppn'], 'rush_yrd_per_gm') + 
                                         get_values_list(prior_weeks, this_week['oppn'], 'return_yrds_per_gm')))

# st
# multiply kick points per game by team and kick points allowed per game by opponent
this_week['kck_pts'] = pd.Series(get_values_list(prior_weeks, this_week['team_abv'], 'kck_pts_per_gm') * 
                                 get_values_list(prior_weeks, this_week['oppn'], 'kck_pts_alw_per_gm'))

# misc
# calculate redzone differential
# (conversions divided attempts) minus the inverse (1 minus allowed conversions divided by allowed attempts)
this_week['rz_diff'] = pd.Series((get_values_list(prior_weeks, this_week['team_abv'], 'redzone_con') /
                                 get_values_list(prior_weeks, this_week['team_abv'], 'redzone_att')) -
                                 (1 - get_values_list(prior_weeks, this_week['oppn'], 'redzone_con_alw') /
                                 get_values_list(prior_weeks, this_week['oppn'], 'redzone_att_alw')))

# calculate turnover differential
# (interceptions plus fumbles) minus opponent's (interceptions thrown plus fumbles lost)
this_week['to_diff'] = pd.Series((get_values_list(prior_weeks, this_week['team_abv'], 'def_ints') +
                                 get_values_list(prior_weeks, this_week['team_abv'], 'fumble_rec')) -
                                 (get_values_list(prior_weeks, this_week['oppn'], 'int_thrown') +
                                 get_values_list(prior_weeks, this_week['oppn'], 'fumble_lost')))

this_week.head()

Unnamed: 0,week,team_abv,home,oppn,pass_td,pass_yrd_per_pass,pass_1st_dwn,pass_yrd,rush_td,rush_yrd_per_rush,...,rec_1st_down,def_st_td,def_sack,def_int,def_fbml,def_st_td_alw,def_st_yrd_alw,kck_pts,rz_diff,to_diff
0,6,TB,False,PHI,6.875,52.360647,228.0,71649.0,1.0,16.717507,...,228.0,0.125,5.0625,0.9375,0.1875,6.1875,148489.25,55.25,0.488722,4
1,6,MIA,False,JAX,1.25,44.685691,143.4375,55198.0,0.875,14.691855,...,143.4375,0.125,3.5,0.625,1.125,12.75,182595.375,34.875,0.166667,0
2,6,GB,False,CHI,3.125,48.187894,163.125,54482.4375,0.375,16.918754,...,163.125,0.0,9.375,0.625,0.0,4.8125,121585.0625,58.5,-0.055556,6
3,6,CIN,False,DET,3.375,70.461748,120.9375,56642.4375,0.5,16.659259,...,120.9375,0.0,6.875,0.5,0.0,3.5,134402.5,43.9375,0.675325,-2
4,6,HOU,False,IND,4.125,55.404385,123.25,48167.1875,0.0625,10.32098,...,123.25,0.0,3.9375,0.375,0.75,7.3125,184767.75,13.75,0.313725,5


In [53]:
this_week.describe().to_csv('this_week_audit.csv', index = False)
this_week.columns[this_week.isna().any()].tolist()

[]

In [None]:
this_week.columns.values.tolist()

In [147]:
this_week_rank = pd.read_csv('this_week_2021.csv')

In [148]:
# create dataframe of current weeks matchups
# this_week_rank = pd.DataFrame(columns = ['week', 'team_abv', 'home', 'oppn'], data = matchup_data)
# this_week_temp = this_week_rank.copy()
# this_week_temp = this_week_temp[['week', 'oppn', 'home', 'team_abv']]
# this_week_temp.columns = ['week', 'team_abv', 'home', 'oppn']
# this_week_temp['home'] = True
# this_week_rank = this_week_rank.append(this_week_temp, ignore_index=True)

# create list of columns to rank in decending order
ascending_false = ['pass_td', 'pass_yrd_per_pass', 'pass_1st_dwn', 'pass_yrd', 'rush_td', 'rush_yrd_per_rush', 
                   'rush_1st_down', 'rush_yrd', 'rec_td', 'rec_yrd_per_tar', 'rec_1st_down', 'rec_yrd_per_gm', 
                   'rec_per_gm', 'def_st_td', 'def_sack', 'def_int', 'def_fbml', 'kck_pts', 'rz_diff', 'to_diff']

# create list of columns to rank in ascending order
ascending_true = ['def_st_td_alw', 'def_st_yrd_alw']

# rank all columns in ascending_false
for i in ascending_false:
    this_week_rank[i] = this_week[i].rank(method='average', ascending = False)

# rank all columns in ascending_false
for i in ascending_true:
    this_week_rank[i] = this_week[i].rank(method='average', ascending = True)
    
this_week_rank

Unnamed: 0,week,team_abv,home,oppn,pass_td,pass_yrd_per_pass,pass_1st_dwn,pass_yrd,rush_td,rush_yrd_per_rush,...,rec_per_gm,def_st_td,def_sack,def_int,def_fbml,kck_pts,rz_diff,to_diff,def_st_td_alw,def_st_yrd_alw
0,6,TB,False,PHI,2.0,15.0,2.0,9.0,10.0,20.0,...,4.0,2.5,14.0,5.5,18.5,9.0,4.0,9.5,14.0,8.0
1,6,MIA,False,JAX,24.5,22.0,14.0,18.0,14.0,25.0,...,9.0,2.5,24.0,11.0,2.0,22.0,20.0,22.5,24.0,16.0
2,6,GB,False,CHI,10.5,20.0,11.0,19.0,22.5,19.0,...,16.0,16.5,4.0,11.0,26.0,6.0,25.0,4.5,5.5,2.0
3,6,CIN,False,DET,9.0,1.0,23.0,17.0,20.0,21.0,...,26.0,16.5,7.0,15.0,26.0,17.0,1.0,25.5,1.0,3.0
4,6,HOU,False,IND,6.0,11.0,22.0,23.0,27.0,28.0,...,23.0,16.5,21.0,18.5,5.0,27.0,12.0,7.0,16.0,18.0
5,6,LAR,False,NYG,3.5,3.0,6.0,3.0,15.5,16.0,...,7.0,16.5,18.5,8.0,22.0,3.0,10.0,16.5,9.5,21.0
6,6,KC,False,WSH,1.0,12.0,1.0,4.0,22.5,4.0,...,5.5,16.5,26.0,11.0,22.0,5.0,7.0,25.5,26.0,24.0
7,6,MIN,False,CAR,14.5,24.0,27.0,27.0,28.0,6.0,...,25.0,16.5,2.0,3.0,26.0,20.0,6.0,22.5,9.5,13.0
8,6,LAC,False,BAL,3.5,10.0,5.0,5.0,10.0,8.0,...,8.0,16.5,11.0,8.0,18.5,26.0,17.0,12.5,19.0,28.0
9,6,ARI,False,CLE,13.0,14.0,24.0,16.0,3.0,24.0,...,20.0,16.5,18.5,23.0,9.0,23.0,3.0,2.0,17.5,23.0


In [55]:
this_week_rank.describe().to_csv('this_week_rank_audit.csv', index = False)
this_week_rank.columns[this_week_rank.isna().any()].tolist()

[]

In [149]:
this_week_rank_avg = pd.read_csv('this_week_2021.csv')

In [150]:
# create dataframe of current weeks matchups
# this_week_rank_avg = pd.DataFrame(columns = ['week', 'team_abv', 'home', 'oppn'], data = matchup_data)
# #this_week_rank_avg['oppn'] = '@' + this_week_rank_avg['oppn'].astype(str)
# this_week_temp = this_week_rank_avg.copy()
# this_week_temp = this_week_temp[['week', 'oppn', 'home', 'team_abv']]
# this_week_temp.columns = ['week', 'team_abv', 'home', 'oppn']
# this_week_temp['home'] = True
# this_week_rank_avg = this_week_rank_avg.append(this_week_temp, ignore_index=True)

# add "@" to oppn column since all opponents are the home teams due to how the schedule is scraped from ESPN
#this_week_rank_avg['oppn'] = '@' + this_week_rank_avg['oppn'].astype(str)

this_week_rank_avg.loc[this_week_rank_avg['home'] == False, 'oppn'] = '@' + this_week_rank_avg['oppn'].astype(str)
this_week_rank_avg.loc[this_week_rank_avg['home'] == True, 'team_abv'] = '@' + this_week_rank_avg['team_abv'].astype(str)


# group by QB, RB, WR/TE, DEF, and ST using row means
this_week_rank_avg['QB'] = this_week_rank.iloc[:, [4, 5, 6, 7, 22, 23]].mean(axis=1)
this_week_rank_avg['RB'] = this_week_rank.iloc[:, [8, 9, 10, 11, 22, 23]].mean(axis=1)
this_week_rank_avg['WRTE'] = this_week_rank.iloc[:, [12, 13, 14, 15, 16, 22, 23]].mean(axis=1)
this_week_rank_avg['DEF'] = this_week_rank.iloc[:, [17, 18, 19, 20, 22, 23, 24, 25]].mean(axis=1)
this_week_rank_avg['KICK'] = this_week_rank.iloc[:, [21, 22, 23]].mean(axis=1)

this_week_rank_avg.round({'QB': 1, 'RB': 1, 'WRTE': 1, 'DEF': 1, 'KICK': 1})

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
0,6,TB,False,@PHI,6.9,11.2,6.9,9.5,7.5
1,6,MIA,False,@JAX,20.2,21.6,18.6,15.2,21.5
2,6,GB,False,@CHI,15.0,17.7,13.9,11.8,11.8
3,6,CIN,False,@DET,12.8,17.9,14.8,11.9,14.5
4,6,HOU,False,@IND,13.5,21.2,14.9,14.2,15.3
5,6,LAR,False,@NYG,7.0,13.0,7.4,15.2,9.8
6,6,KC,False,@WSH,8.4,12.2,8.7,19.8,12.5
7,6,MIN,False,@CAR,20.2,15.6,21.0,12.3,16.2
8,6,LAC,False,@BAL,8.8,13.3,8.4,16.3,18.5
9,6,ARI,False,@CLE,12.0,11.1,12.7,14.1,9.3


In [57]:
this_week_rank_avg.describe().to_csv('this_week_rank_avg.csv', index = False)

In [151]:
this_week_rank_avg[['week', 'team_abv', 'oppn', 'QB']].sort_values('QB')

Unnamed: 0,week,team_abv,oppn,QB
0,6,TB,@PHI,6.916667
5,6,LAR,@NYG,7.0
6,6,KC,@WSH,8.416667
8,6,LAC,@BAL,8.833333
22,6,@BAL,LAC,9.75
20,6,@WSH,KC,10.0
13,6,BUF,@TEN,10.0
26,6,@PIT,SEA,10.583333
14,6,@PHI,TB,11.166667
19,6,@NYG,LAR,11.25


In [16]:
this_week_rank_avg.sort_values('QB')

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
30,16,@GB,True,TEN,5.666667,9.0,6.142857,17.5625,11.0
3,16,CLE,False,@NYJ,6.666667,15.083333,6.571429,15.3125,14.0
17,16,@DET,True,TB,7.416667,20.333333,6.642857,24.6875,17.666667
1,16,TB,False,@DET,7.916667,17.666667,8.357143,17.6875,15.833333
29,16,@DAL,True,PHI,8.583333,21.583333,8.357143,15.125,7.833333
14,16,TEN,False,@GB,8.666667,4.916667,10.714286,20.0,8.666667
9,16,CHI,False,@JAX,9.333333,5.75,11.428571,16.1875,11.166667
6,16,ATL,False,@KC,9.416667,23.333333,10.785714,13.4375,9.0
22,16,@KC,True,ATL,10.666667,19.916667,9.571429,17.125,16.333333
25,16,@JAX,True,CHI,11.583333,16.0,11.357143,17.25,19.5


In [60]:
this_week_rank_avg.sort_values('RB')

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
14,16,TEN,False,@GB,8.666667,4.916667,10.714286,20.0,8.666667
9,16,CHI,False,@JAX,9.333333,5.75,11.428571,16.1875,11.166667
15,16,BUF,False,@NE,19.666667,7.583333,19.285714,10.0,15.0
30,16,@GB,True,TEN,5.666667,9.0,6.142857,17.5625,11.0
13,16,PHI,False,@DAL,20.583333,9.0,20.785714,17.25,19.666667
26,16,@BAL,True,NYG,20.666667,9.083333,21.857143,14.5,15.333333
16,16,@NO,True,MIN,16.75,9.666667,19.071429,11.375,17.0
2,16,MIA,False,@LV,15.5,10.416667,16.714286,10.875,13.5
24,16,@WSH,True,CAR,16.416667,11.5,14.071429,9.4375,7.333333
19,16,@NYJ,True,CLE,18.833333,12.666667,19.857143,21.375,17.166667


In [61]:
this_week_rank_avg.sort_values('WRTE')

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
30,16,@GB,True,TEN,5.666667,9.0,6.142857,17.5625,11.0
3,16,CLE,False,@NYJ,6.666667,15.083333,6.571429,15.3125,14.0
17,16,@DET,True,TB,7.416667,20.333333,6.642857,24.6875,17.666667
1,16,TB,False,@DET,7.916667,17.666667,8.357143,17.6875,15.833333
29,16,@DAL,True,PHI,8.583333,21.583333,8.357143,15.125,7.833333
22,16,@KC,True,ATL,10.666667,19.916667,9.571429,17.125,16.333333
14,16,TEN,False,@GB,8.666667,4.916667,10.714286,20.0,8.666667
6,16,ATL,False,@KC,9.416667,23.333333,10.785714,13.4375,9.0
25,16,@JAX,True,CHI,11.583333,16.0,11.357143,17.25,19.5
9,16,CHI,False,@JAX,9.333333,5.75,11.428571,16.1875,11.166667


In [62]:
this_week_rank_avg.sort_values('DEF')

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
24,16,@WSH,True,CAR,16.416667,11.5,14.071429,9.4375,7.333333
15,16,BUF,False,@NE,19.666667,7.583333,19.285714,10.0,15.0
2,16,MIA,False,@LV,15.5,10.416667,16.714286,10.875,13.5
16,16,@NO,True,MIN,16.75,9.666667,19.071429,11.375,17.0
12,16,LAR,False,@SEA,23.5,16.333333,22.142857,13.125,19.333333
6,16,ATL,False,@KC,9.416667,23.333333,10.785714,13.4375,9.0
28,16,@SEA,True,LAR,24.916667,21.583333,24.214286,13.8125,16.166667
4,16,SF,False,@ARI,15.916667,17.166667,14.642857,14.375,15.166667
26,16,@BAL,True,NYG,20.666667,9.083333,21.857143,14.5,15.333333
27,16,@HOU,True,CIN,19.75,29.583333,18.928571,14.5625,21.166667


In [63]:
this_week_rank_avg.sort_values('KICK')

Unnamed: 0,week,team_abv,home,oppn,QB,RB,WRTE,DEF,KICK
24,16,@WSH,True,CAR,16.416667,11.5,14.071429,9.4375,7.333333
29,16,@DAL,True,PHI,8.583333,21.583333,8.357143,15.125,7.833333
14,16,TEN,False,@GB,8.666667,4.916667,10.714286,20.0,8.666667
6,16,ATL,False,@KC,9.416667,23.333333,10.785714,13.4375,9.0
30,16,@GB,True,TEN,5.666667,9.0,6.142857,17.5625,11.0
9,16,CHI,False,@JAX,9.333333,5.75,11.428571,16.1875,11.166667
2,16,MIA,False,@LV,15.5,10.416667,16.714286,10.875,13.5
3,16,CLE,False,@NYJ,6.666667,15.083333,6.571429,15.3125,14.0
15,16,BUF,False,@NE,19.666667,7.583333,19.285714,10.0,15.0
4,16,SF,False,@ARI,15.916667,17.166667,14.642857,14.375,15.166667


In [None]:
import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different
        # imported names vs. system/pip names. Unfortunately,
        # there is no systematic way to get pip names from
        # a package's imported name. You'll have to add
        # exceptions to this list manually!
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]

        yield name
imports = list(set(get_imports()))

# The only way I found to get the version of the root package
# from only the name of the package is to cross-check the names 
# of installed packages vs. imported packages
requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

In [42]:
matchups_df = pd.read_csv("matchups_df.csv")
matchups_df.head()

Unnamed: 0.1,Unnamed: 0,week,owner_team_name,owner,score,win,opp_owner_team_name,opp_owner,opp_score
0,0,1,Sticky Icky,T-$,166.6,1,Happy Rock Homewreckers,Blainer,139.2
1,1,1,Happy Rock Homewreckers,Blainer,139.2,0,Sticky Icky,T-$,166.6
2,2,1,Bench Don't Kill My Vibe,Padge,190.0,1,Bud Lathrop Drive,Farmer,149.9
3,3,1,Bud Lathrop Drive,Farmer,149.9,0,Bench Don't Kill My Vibe,Padge,190.0
4,4,1,Springfield Atoms,Duvi,147.1,0,Pixel Whippers,Sembower,164.8
