In [1]:
import requests

import pandas as pd
from bs4 import BeautifulSoup
from bs4 import Comment




In [2]:
game_url = 'https://www.pro-football-reference.com/boxscores/202109090tam.htm'



In [3]:
r = requests.get(game_url)
soup = BeautifulSoup(r.content, 'lxml')

In [6]:
soup.find('table',id='kicking')
# ugh, table is hidden in a comment tag
# Use this arcane magic to find all comment tags in html
comments = soup.find_all(string=lambda text: isinstance(text, Comment))

for comment in comments:
    # Find the comment containing play-by-play
    if 'id="kicking"' in comment:
        # parse the contents of the comment with the play-by-play
        comment_soup = BeautifulSoup(comment, 'lxml')
        # find play-by-play table
        pbp_table = comment_soup.find('table', id='kicking')

In [29]:
def get_table_from_game_comments(game_url, table_id, header_row=0):
    """Give the url of a game, return a dataframe hidden in the html comments
    
    For some reason, PFR hides tables in comment tags. 
    This function extracts them
    
    Args:
        game_url (str): URL to game on pro-football-reference.com
        table_id (str): HTML id attribute for the desired table
        header_row (int): Option row of table header
    Returns:
        pd.DataFrame: Pandas dataframe of target table. Probably has messy rows
            from extra headers
    """
    r = requests.get(game_url)
    soup = BeautifulSoup(r.content, 'lxml')
    # ugh, table is hidden in a comment tag
    # Use this arcane magic to find all comment tags in html
    comments = soup.find_all(string=lambda text: isinstance(text, Comment))

    for comment in comments:
        # Find the comment containing desired table
        if f'id="{table_id}"' in comment:
            # parse the contents of the comment
            comment_soup = BeautifulSoup(comment, 'lxml')
            # find  table
            table = comment_soup.find('table', id=table_id)

    # Use pandas to parse table html:
    df = pd.read_html(table.prettify(), header=header_row,
                      flavor='bs4' ,)[0]
    
    if len(df)==0:
        raise ValueError('No table found!')
    
    return df

def get_punt_plays_from_df(play_df):
    """Filter down to just the punts"""
    return play_df[play_df['Detail'].str.contains('punts')]

def get_team_punters(punt_df, team):
    """Get a team's punters
    
    Args:
        punt_df (pd.DataFrame): Dataframe of kicking data with a Player column and Tm column
        team (str): 3-letter team abbrevation
    Returns:
        [str]: List of player names who could possibly punt for the team.
            Will include players who did not punt.
    """
    
    
    punters = punt_df[punt_df['Tm']==team.upper()].Player.unique()
    
    if len(punters)==0:
        raise ValueError('No punters found!')
        
    return punters

In [30]:
pbp_df = get_table_from_game_comments(game_url, 'pbp')
punt_df = get_punt_plays_from_df(pbp_df)
punters_df = get_table_from_game_comments(game_url, 'kicking', header_row=1)

In [22]:
get_team_punters(punters_df, 'DAL')

array(['Greg Zuerlein', 'Bryan Anger'], dtype=object)

In [31]:
get_team_punters(punters_df, 'TAM')

array(['Ryan Succop', 'Bradley Pinion'], dtype=object)

In [32]:
punt_df

Unnamed: 0,Quarter,Time,Down,ToGo,Location,Detail,DAL,TAM,EPB,EPA
6,1,13:26,4,2,TAM 33,Bradley Pinion punts 65 yards out of bounds,0,0,-1.18,0.38
17,1,9:42,4,15,TAM 44,Bryan Anger punts 38 yards out of bounds,0,0,0.33,0.38
45,2,15:00,4,5,DAL 40,Bradley Pinion punts downed by Jaydon Micken...,7,7,0.59,-0.06
46,2,14:53,4,15,DAL 50,"Bradley Pinion punts 42 yards, returned by C...",7,7,-0.06,0.38
51,2,12:55,4,8,DAL 7,"Bryan Anger punts 47 yards, returned by Jayd...",7,7,-2.49,-2.26
124,3,8:55,4,5,TAM 38,"Bradley Pinion punts 49 yards, returned by C...",19,21,-0.85,-0.61
154,4,12:00,4,10,DAL 47,"Bradley Pinion punts 41 yards, fair catch by ...",26,28,0.13,0.38
158,4,11:05,4,8,DAL 8,"Bryan Anger punts 62 yards, returned by Jayd...",26,28,-2.49,-1.4


In [34]:
set(['a', 'a', 'b', 'c', 'a'])

{'a', 'b', 'c'}

In [35]:
pbp_df

Unnamed: 0,Quarter,Time,Down,ToGo,Location,Detail,DAL,TAM,EPB,EPA
0,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter,1st Quarter
1,,,,,,"Cowboys won the coin toss and deferred, Buccan...",,,,
2,1,15:00,,,DAL 35,"Greg Zuerlein kicks off 65 yards, touchback.",0,0,0.000,0.610
3,1,15:00,1,10,TAM 25,Leonard Fournette left tackle for 5 yards (ta...,0,0,0.610,0.740
4,1,14:17,2,5,TAM 30,Leonard Fournette right guard for 3 yards (ta...,0,0,0.740,0.430
...,...,...,...,...,...,...,...,...,...,...
195,4,0:10,3,10,DAL 18,Tom Brady pass incomplete short left,29,28,3.050,2.250
196,4,0:07,4,10,DAL 18,Ryan Succop 36 yard field goal good,29,31,2.250,3.000
197,4,0:02,,,TAM 35,"Bradley Pinion kicks off 65 yards, touchback.",29,31,0.000,0.610
198,4,0:02,1,10,DAL 25,Tony Pollard right end for 11 yards (tackle b...,29,31,0.610,1.330
