In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd

from scraping_functions import get_table_from_game_comments, get_weeks_in_year, get_games_in_week
from parsing_functions import (get_punt_plays_from_df, get_team_punters, 
                               assign_team_to_punts, get_losing_team)
from scoring_functions import (get_yrds_to_endzone, get_field_position_score,
                               get_yrds_to_go_score, get_gamescore_multilpier, 
                               get_gameclock_multiplier, get_surrender_index)

In [None]:
baseline_punt_df = pd.DataFrame()
error_games_base = []
baseline_years = [2010, 2011, 2012, 2013, 2014]
for year in baseline_years:
    week_urls = get_weeks_in_year(year)
    for week_url in week_urls:
        week = int(week_url.split('_')[1].split('.')[0])
        print(f'{year}, {week}')
        if year <2021:
            is_playoffs = week > 18
        else:
            is_playoffs = week > 17
            
        game_urls = get_games_in_week(week_url)
        
        for game_url in game_urls:
            try:
                pbp_df = get_table_from_game_comments(game_url, 'pbp')
                losing_team = get_losing_team(pbp_df)
                punt_df = get_punt_plays_from_df(pbp_df)
                punters_df = get_table_from_game_comments(game_url, 'kicking', header_row=1)
                punt_df = assign_team_to_punts(punt_df, punters_df)
                punt_df = get_yrds_to_endzone(punt_df)
                punt_df = get_field_position_score(punt_df)
                punt_df = get_yrds_to_go_score(punt_df)
                punt_df = get_gamescore_multilpier(punt_df)
                punt_df = get_gameclock_multiplier(punt_df, year, is_playoffs )
                punt_df = get_surrender_index(punt_df)
            except ValueError:
                error_games_base.append(game_url)
                continue
            baseline_punt_df = baseline_punt_df.append(punt_df)


2010, 13
2010, 6
2010, 10
2010, 11
2010, 5
2010, 2
2010, 17
2010, 21
2010, 18
2010, 12
2010, 8
2010, 3
2010, 14
2010, 9
2010, 7
2010, 16
2010, 1
2010, 19
2010, 15
2010, 20
2010, 4
2011, 18
2011, 16
2011, 20
2011, 9
2011, 3
2011, 4
2011, 2
2011, 7
2011, 14
2011, 11
2011, 12
2011, 6
2011, 15
2011, 5
2011, 13
2011, 10
2011, 17
2011, 8
2011, 21
2011, 19
2011, 1
2012, 2
2012, 19
2012, 18
2012, 11
2012, 3
2012, 21
2012, 4
2012, 8
2012, 15
2012, 6
2012, 17
2012, 1
2012, 20
2012, 10
2012, 5
2012, 16
2012, 7
2012, 12
2012, 14
2012, 9
2012, 13
2013, 19
2013, 20
2013, 5
2013, 1
2013, 17
2013, 13
2013, 21
2013, 14
2013, 3
2013, 9
2013, 7


In [None]:
print(f'{len(error_games_base)} could not be parsed')

In [None]:
cowardly_punt_threshold = baseline_punt_df.surrender_index.quantile(0.9)

In [None]:
games = 0
losing_games = 0
losing_games_with_cowardly_punts = 0
games_with_cowardly_punts = 0
error_games_test = []

test_years = [2015, 2016, 2017, 2018, 2019]
for year in test_years:
    week_urls = get_weeks_in_year(year)
    for week_url in week_urls:
        week = int(week_url.split('_')[1].split('.')[0])
        print(f'{year}, {week}')
        if year <2021:
            is_playoffs = week > 18
        else:
            is_playoffs = week > 17
            
        game_urls = get_games_in_week(week_url)
        
        for game_url in game_urls:
            try:
                pbp_df = get_table_from_game_comments(game_url, 'pbp')
                losing_team = get_losing_team(pbp_df)
                punt_df = get_punt_plays_from_df(pbp_df)
                punters_df = get_table_from_game_comments(game_url, 'kicking', header_row=1)
                punt_df = assign_team_to_punts(punt_df, punters_df)
                punt_df = get_yrds_to_endzone(punt_df)
                punt_df = get_field_position_score(punt_df)
                punt_df = get_yrds_to_go_score(punt_df)
                punt_df = get_gamescore_multilpier(punt_df)
                punt_df = get_gameclock_multiplier(punt_df, year, is_playoffs )
                punt_df = get_surrender_index(punt_df)
            except ValueError:
                error_games_test.append(game_url)
                continue
            
            cowardly_punts = punt_df[punt_df['surrender_index']>cowardly_punt_threshold]
            
            # we're counting team-games here, so each game counts
            # once for each time
            games += 2
            if losing_team is not None:
                losing_games += 1
                
            if len(cowardly_punts)>0:
                games_with_cowardly_punts += len(cowardly_punts['Tm'].unique())
                
                if losing_team in cowardly_punts['Tm'].unique():
                    losing_games_with_cowardly_punts += 1
            


In [None]:
print(f'{len(error_games_test)} could not be parsed')

In [None]:
games 

In [None]:
losing_games 

In [None]:
losing_games_with_cowardly_punts 

In [None]:
games_with_cowardly_punts 

In [None]:
baseline_punt_df.surrender_index.quantile(0.0)