In [1]:
from parsing_functions import get_table_from_game_comments, get_punt_plays_from_df, get_team_punters

In [73]:
game_url = 'https://www.pro-football-reference.com/boxscores/202109090tam.htm'

pbp_df = get_table_from_game_comments(game_url, 'pbp')
punt_df = get_punt_plays_from_df(pbp_df)
punters_df = get_table_from_game_comments(game_url, 'kicking', header_row=1)

In [74]:
get_team_punters(punters_df, 'TAM')

array(['Ryan Succop', 'Bradley Pinion'], dtype=object)

In [104]:
def assign_team_to_punts(punt_df, punters_df):
    """Given a dataframe of punt plays and another of each teams punters, assign a team to each play"""
    
    teams = punt_df.columns[6:8]
    if any([len(team)!=3 for team in teams]):
        raise ValueError(f'Unexpected teams {teams}')
        
    for team in teams:
        punters = get_team_punters(punters_df, team)
        
        # Assign team to punts based on who is kicking
        for punter in punters:
            punt_df.loc[punt_df['Detail'].str.startswith(punter), 'Tm'] = team
            
    if punt_df['Tm'].isna().any():
        raise ValueError(f'Missing team! {punt_df}')
            
    return punt_df

def get_yrds_to_endzone(punt_df):
    """Given a dataframe of punts with team assigned, extract yards to the endzone"""
    if 'Tm' not in punt_df.columns:
        raise ValueError('No team found, please add teams to dataframe of punts')
        
    punt_df.loc[:, 'field_side'] = punt_df['Location'].str.split(' ').apply(lambda x: x[0])
    punt_df.loc[:, 'yard_start'] = punt_df['Location'].str.split(' ').apply(lambda x: x[1]).astype(int)
    punt_df.loc[:,'yrds_to_endzone'] =  (punt_df['Tm']==punt_df['field_side'])*(100-2*punt_df['yard_start']) +\
                                         punt_df['yard_start']
    
    return punt_df

def get_field_position_score(punt_df):
    """Given a punt dataframe, calculate the field position component of surrender index"""
    
    if 'yrds_to_endzone' not in punt_df.columns:
        raise ValueError('Missing yards to endzone, please add this to dataframe first')
        
    # If in own teritory, score is 1 + 10% for each yard after 40
    own_filter = punt_df['yrds_to_endzone']>=50
    own_function = lambda yrds: max(1, 1.1**((100-yrds)-40))
    punt_df.loc[own_filter, 'field_pos_score'] = punt_df.loc[own_filter,
                                                            'yrds_to_endzone'].apply(own_function)
    
     # In opponent territory, score goes up 20% per yard
    opp_filter = punt_df['yrds_to_endzone']<50
    opp_function = lambda yrds: (1.2)**(50 - yrds) * ((1.1)**(10))
    punt_df.loc[opp_filter, 'field_pos_score'] = punt_df.loc[opp_filter,
                                                            'yrds_to_endzone'].apply(opp_function)
    
    return punt_df

def get_yrds_to_go_score(punt_df):
    """Calculate the yards to make 1st down discount of surrender index"""
    punt_df.loc[:, 'ToGo'] = punt_df['ToGo'].astype(int)
    
    # If its hard to get a first down, surrender index goes down
    punt_df.loc[punt_df['ToGo']>=10, 'to_go_discount'] = 0.2
    punt_df.loc[punt_df['ToGo']<10, 'to_go_discount'] = 0.4
    punt_df.loc[punt_df['ToGo']<6, 'to_go_discount'] = 0.6
    punt_df.loc[punt_df['ToGo']<4, 'to_go_discount'] = 0.8
    punt_df.loc[punt_df['ToGo']<2, 'to_go_discount'] = 1
    
    return punt_df

def get_gamescore_multilpier(punt_df):
    '''Calculate multiplier based on gamescore. If you're trailing by 1, go for it!'''
    
    teams = punt_df['Tm'].unique()
    
    for team in teams:
        team_filter = punt_df['Tm']==team
        opp = [other_team for other_team in teams if other_team != team][0]
        
        # make scores ints
        punt_df.loc[:, team] = punt_df[team].astype(int)
        punt_df.loc[:, opp] = punt_df[opp].astype(int)
        
        punt_df.loc[team_filter, 'current_lead'] = punt_df.loc[team_filter, team] - punt_df.loc[team_filter, opp]
        
    # losing by 2+ scores multiplier = 3x
    punt_df.loc[punt_df['current_lead']<-8, 'score_multiplier'] = 3
    # losing by 1 score multiplier = 4x
    punt_df.loc[punt_df['current_lead']>=-8, 'score_multiplier'] = 4
    # Winning multiplier = 1
    punt_df.loc[punt_df['current_lead']>0, 'score_multiplier'] = 1
    # tied multiplier = 2
    punt_df.loc[punt_df['current_lead']==0, 'score_multiplier'] = 2
    
    return punt_df

def get_gameclock_multiplier(punt_df):
    '''Only applies if tied or losing after halftime. Cubic function'''
    
    punt_df.loc[:,'Quarter'] =punt_df['Quarter'].astype(int)
    after_halftime_filter = punt_df['Quarter']>=3
    winning_or_tied_filter = punt_df['current_lead'] >=0
    gameclock_filter = after_halftime_filter & winning_or_tied_filter
    
    punt_df.loc[:,'gameclock_minutes'] = punt_df['Time'].str.split(':').apply(lambda x: x[0]).astype(int)
    punt_df.loc[:,'gameclock_seconds'] = punt_df['Time'].str.split(':').apply(lambda x: x[1]).astype(int)
    
    punt_df.loc[:, 'seconds_since_halftime'] = 0
    punt_df.loc[after_halftime_filter, 'seconds_since_halftime'] = (punt_df.loc[after_halftime_filter,'Quarter']-3)*15*60 + \
                                                                   (punt_df.loc[after_halftime_filter,'gameclock_minutes'])*60 + \
                                                                   (punt_df.loc[after_halftime_filter,'gameclock_seconds'])
    punt_df.loc[:,'gameclock_multiplier'] = 1
    punt_df.loc[gameclock_filter, 'gameclock_multiplier'] = (punt_df.loc[after_halftime_filter, 'seconds_since_halftime']*0.001)**3+1
    
    return punt_df

def get_surrender_index(punt_df):
    
    punt_df.loc[:,'surrender_index'] = punt_df[]
    
    


In [105]:
punt_df = assign_team_to_punts(punt_df, punters_df)
punt_df = get_yrds_to_endzone(punt_df)
punt_df = get_field_position_score(punt_df)
punt_df = get_yrds_to_go_score(punt_df)
punt_df = get_gamescore_multilpier(punt_df)
punt_df = get_gameclock_multiplier(punt_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [106]:
punt_df

Unnamed: 0,Quarter,Time,Down,ToGo,Location,Detail,DAL,TAM,EPB,EPA,...,yard_start,yrds_to_endzone,field_pos_score,to_go_discount,current_lead,score_multiplier,gameclock_minutes,gameclock_seconds,seconds_since_halftime,gameclock_multiplier
6,1,13:26,4,2,TAM 33,Bradley Pinion punts 65 yards out of bounds,0,0,-1.18,0.38,...,33,67,1.0,0.8,0.0,2.0,13,26,0,1.0
17,1,9:42,4,15,TAM 44,Bryan Anger punts 38 yards out of bounds,0,0,0.33,0.38,...,44,44,7.744873,0.2,0.0,2.0,9,42,0,1.0
45,2,15:00,4,5,DAL 40,Bradley Pinion punts downed by Jaydon Micken...,7,7,0.59,-0.06,...,40,40,16.05977,0.6,0.0,2.0,15,0,0,1.0
46,2,14:53,4,15,DAL 50,"Bradley Pinion punts 42 yards, returned by C...",7,7,-0.06,0.38,...,50,50,2.593742,0.2,0.0,2.0,14,53,0,1.0
51,2,12:55,4,8,DAL 7,"Bryan Anger punts 47 yards, returned by Jayd...",7,7,-2.49,-2.26,...,7,93,1.0,0.4,0.0,2.0,12,55,0,1.0
124,3,8:55,4,5,TAM 38,"Bradley Pinion punts 49 yards, returned by C...",19,21,-0.85,-0.61,...,38,62,1.0,0.6,2.0,1.0,8,55,535,1.15313
154,4,12:00,4,10,DAL 47,"Bradley Pinion punts 41 yards, fair catch by ...",26,28,0.13,0.38,...,47,47,4.481987,0.2,2.0,1.0,12,0,1620,5.251528
158,4,11:05,4,8,DAL 8,"Bryan Anger punts 62 yards, returned by Jayd...",26,28,-2.49,-1.4,...,8,92,1.0,0.4,-2.0,4.0,11,5,1565,1.0
