# NFL Project

ScrapePFR will take a url and parse all the data tables I want from it. This works for all but 3 URLs from 2018, 2019, and 2020.

## Problematic URLs

https://www.pro-football-reference.com/boxscores/201909080min.htm - Advanced defense table format is messed up (vikings/falcons week 1 2019).

https://www.pro-football-reference.com/boxscores/202112050mia.htm - Officials table missing

https://www.pro-football-reference.com/boxscores/202112180clt.htm - Officials table missing

In [1]:
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import itertools
import numpy as np

In [2]:
def ScrapePFR_All_Tables(url):
    try:
        driver = webdriver.Chrome(executable_path='chromedriver.exe')
        driver.get(url)
        page = driver.execute_script('return document.body.innerHTML')
        soup = BeautifulSoup(''.join(page), 'html.parser')
    
        #Score Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[0]

        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        df = df.dropna()

        df_2 = pd.DataFrame()
        df_2['Tm'] = df.iloc[:,1]
        df_2['Final'] = df.iloc[:,len(df.columns.values) - 1]
        df = df_2
        df['Home/Away'] = ['Away','Home']
        df_Score = df
    
        #Offense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[6]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 27
        away_end = cols.index('Passing',2)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FL',37) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Sk_Yds','Pass_Lng','Rate','Rush_Att','Rush_Yds',
              'Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
             'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Offense = df
    
        #Defense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[7]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 22
        away_end = cols.index('Def Interceptions',3)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FF',23) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
                      'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits',
                      'Def_FR','Def_FR_Yds','Def_FR_TD','Def_FF']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
             'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR',
             'Def_FR_Yds','Def_FR_TD','Def_FF']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Defense = df
    
        #Kick Returns Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[8]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        try: 
            away_start = 15
            away_end = cols.index('Kick Returns',3)-1
            away_players = cols[away_start:away_end]

            home_start = cols.index('Y/R',16) + 3
            home_end = len(cols)
            home_players = cols[home_start:home_end]

            players = away_players + home_players

        except:
            players_start = cols.index('Y/R') + 3
            players_end = len(cols)
            players = cols[players_start:players_end]  
    
        df.columns = ['Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                      'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                      'Punt_Return_TDs','Punt_Return_Long']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                 'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                 'Punt_Return_TDs','Punt_Return_Long']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_KickReturns = df
    
        #Kicking Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[9]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 13
        away_end = cols.index('Scoring',3)-1
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Lng',13) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_Kicking = df
    
        #Advanced Passing Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[10]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 26
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Yds/Scr',26) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                     'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                     'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedPassing = df
    
        #Advanced Rushing Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[11]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 11
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Att/Br',11) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedRushing = df
    
        #Advanced Receiving Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[12]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 18
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Rat',18) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                     'Drop%','Int','Rat']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                     'Drop%','Int','Rat']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedReceiving = df

        #Advanced Defense
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[13]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 22
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]

        home_start = cols.index('MTkl%',22) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                     'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                     'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedDefense = df

        #Home Starters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[14]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_HomeStarters = df

        #AwayStarters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[15]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AwayStarters = df
    except:
        print(url)
    
    return df_Score, df_Offense, df_Defense, df_KickReturns, df_Kicking, df_AdvancedPassing, df_AdvancedRushing, df_AdvancedReceiving, df_AdvancedDefense,df_HomeStarters,df_AwayStarters

In [3]:
def ScrapePFR_Simple_Tables(url):
    try:
        driver = webdriver.Chrome(executable_path='chromedriver.exe')
        driver.get(url)
        page = driver.execute_script('return document.body.innerHTML')
        soup = BeautifulSoup(''.join(page), 'html.parser')
    
        #Score Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[0]

        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        df = df.dropna()

        df_2 = pd.DataFrame()
        df_2['Tm'] = df.iloc[:,1]
        df_2['Final'] = df.iloc[:,len(df.columns.values) - 1]
        df = df_2
        df['Home/Away'] = ['Away','Home']
        df_Score = df
    
        #Offense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[6]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 27
        away_end = cols.index('Passing',2)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FL',37) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Sk_Yds','Pass_Lng','Rate','Rush_Att','Rush_Yds',
              'Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
             'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Offense = df
    
        #Defense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[7]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 22
        away_end = cols.index('Def Interceptions',3)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FF',23) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
                      'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits',
                      'Def_FR','Def_FR_Yds','Def_FR_TD','Def_FF']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
             'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR',
             'Def_FR_Yds','Def_FR_TD','Def_FF']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Defense = df
    
        #Kick Returns Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[8]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        try: 
            away_start = 15
            away_end = cols.index('Kick Returns',3)-1
            away_players = cols[away_start:away_end]

            home_start = cols.index('Y/R',16) + 3
            home_end = len(cols)
            home_players = cols[home_start:home_end]

            players = away_players + home_players

        except:
            players_start = cols.index('Y/R') + 3
            players_end = len(cols)
            players = cols[players_start:players_end]  
    
        df.columns = ['Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                      'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                      'Punt_Return_TDs','Punt_Return_Long']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                 'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                 'Punt_Return_TDs','Punt_Return_Long']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_KickReturns = df
    
        #Kicking Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[9]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 13
        away_end = cols.index('Scoring',3)-1
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Lng',13) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_Kicking = df
    
        #Home Starters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[10]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_HomeStarters = df

        #AwayStarters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[11]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AwayStarters = df
    except:
        print(url)
    
    return df_Score, df_Offense, df_Defense, df_KickReturns, df_Kicking, df_HomeStarters, df_AwayStarters

In [4]:
def ScrapePFRExceptions(url):
    try:
        driver = webdriver.Chrome(executable_path='chromedriver.exe')
        driver.get(url)
        page = driver.execute_script('return document.body.innerHTML')
        soup = BeautifulSoup(''.join(page), 'html.parser')
    
        #Score Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[0]

        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        df = df.dropna()

        df_2 = pd.DataFrame()
        df_2['Tm'] = df.iloc[:,1]
        df_2['Final'] = df.iloc[:,len(df.columns.values) - 1]
        df = df_2
        df['Home/Away'] = ['Away','Home']
        df_Score = df
    
        #Offense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[5]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 27
        away_end = cols.index('Passing',2)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FL',37) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Sk_Yds','Pass_Lng','Rate','Rush_Att','Rush_Yds',
              'Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
             'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Offense = df
    
        #Defense Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[6]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 22
        away_end = cols.index('Def Interceptions',3)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('FF',23) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
                      'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits',
                      'Def_FR','Def_FR_Yds','Def_FR_TD','Def_FF']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
             'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR',
             'Def_FR_Yds','Def_FR_TD','Def_FF']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df_Defense = df
    
        #Kick Returns Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[7]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        try: 
            away_start = 15
            away_end = cols.index('Kick Returns',3)-1
            away_players = cols[away_start:away_end]

            home_start = cols.index('Y/R',16) + 3
            home_end = len(cols)
            home_players = cols[home_start:home_end]

            players = away_players + home_players

        except:
            players_start = cols.index('Y/R') + 3
            players_end = len(cols)
            players = cols[players_start:players_end]  
    
        df.columns = ['Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                      'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                      'Punt_Return_TDs','Punt_Return_Long']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                 'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                 'Punt_Return_TDs','Punt_Return_Long']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_KickReturns = df
    
        #Kicking Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[8]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 13
        away_end = cols.index('Scoring',3)-1
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Lng',13) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_Kicking = df
    
        #Advanced Passing Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[9]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 26
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Yds/Scr',26) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                     'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                     'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedPassing = df
    
        #Advanced Rushing Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[10]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 11
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Att/Br',11) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players
    
        df.columns = ['Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]
    
        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedRushing = df
    
        #Advanced Receiving Table
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[11]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')
    
        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 18
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]
    
        home_start = cols.index('Rat',18) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                     'Drop%','Int','Rat']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                     'Drop%','Int','Rat']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedReceiving = df

        #Advanced Defense
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[12]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        away_start = 22
        away_end = cols.index('Player',3)
        away_players = cols[away_start:away_end]

        home_start = cols.index('MTkl%',22) + 1
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

        df.columns = ['Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                     'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                     'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AdvancedDefense = df

        #Home Starters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[13]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_HomeStarters = df

        #AwayStarters
        df = pd.DataFrame()
        table = soup.find_all('table', {'class': 'stats_table'})[14]

        cols = [ each.text for each in table.find_all('th') ]
        rows = table.find_all('tr')

        for row in rows:
            data = [ each.text for each in row.find_all('td') ]
            temp_df = pd.DataFrame([data])
            df = df.append(temp_df, sort=True).reset_index(drop=True)

        start = 2
        end = len(cols)
        players = cols[start:end]

        df.columns = ['Pos']
        df = df.dropna()
        df['Players'] = players

        df = df[['Players','Pos']]

        df = df.reset_index()
        df = df.drop(columns = 'index')
        df = df.replace(r'^\s*$', 0, regex=True)
        df_AwayStarters = df
    except:
        print(url)
    
    return df_Score, df_Offense, df_Defense, df_KickReturns, df_Kicking, df_AdvancedPassing, df_AdvancedRushing, df_AdvancedReceiving, df_AdvancedDefense,df_HomeStarters,df_AwayStarters

## Read in csv of URLs

In [5]:
import pandas as pd

df = pd.read_csv('URL_List.csv')
df_2012 = df[df['Year'] ==2012].reset_index().drop(columns = ['index','Unnamed: 0'])
df_2013 = df[df['Year'] == 2013].reset_index().drop(columns = ['index','Unnamed: 0'])
df_2014 = df[df['Year'] == 2014].reset_index().drop(columns = ['index','Unnamed: 0'])
df_2015 = df[df['Year'] == 2015].reset_index().drop(columns = ['index','Unnamed: 0'])
df_2016 = df[df['Year'] == 2016].reset_index().drop(columns = ['index','Unnamed: 0'])
df_2017 = df[df['Year'] == 2017].reset_index().drop(columns = ['index','Unnamed: 0'])

# Run functions

In [6]:
df_Score = pd.DataFrame(columns =  ['Year','Week','Tm','Final','Home/Away'])
df_Offense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL'])
df_Defense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng',
                                     'Def_Pass_Defended','Def_Sack','Def_Combined_Tackles','Def_Solo_Tackle',
                                     'Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR','Def_FR_Yds','Def_FR_TD',
                                     'Def_FF'])
df_KickReturns = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Kick_Returns','Kick_Return_Yds',
                                          'Yards_Per_Kick_Return','Kick_Return_TDs','Kick_Return_Long',
                                          'Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return','Punt_Return_TDs',
                                          'Punt_Return_Long'])
df_Kicking = pd.DataFrame(columns =  ['Year','Week','Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds',
                                      'Yards_Per_Punt','Punt_Lng'])
df_AdvancedPassing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA',
                                              'CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%','BadTh','Bad%',
                                              'Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr'])
df_AdvancedRushing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC',
                                              'YAC/Att','BrkTkl','Att/Br'])
df_AdvancedReceiving = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R',
                                                'YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat'])
df_AdvancedDefense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                                              'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz','Hrry','QBKD','Sk',
                                              'Prss','Comb','MTkl','MTkl%'])
df_HomeStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])
df_AwayStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

In [4]:
df_exceptions = pd.DataFrame(columns = ['URLs','Year','Week'])
df_exceptions['URLs'] = ['https://www.pro-football-reference.com/boxscores/202112180clt.htm',
                         'https://www.pro-football-reference.com/boxscores/202112050mia.htm']
df_exceptions['Year'] = [2021,2021]
df_exceptions['Week'] = 15,13

df_okay = pd.DataFrame(columns = ['URLs','Year','Week'])
df_okay['URLs'] = ['https://www.pro-football-reference.com/boxscores/201912230min.htm']
df_okay['Year'] = 2019
df_okay['Week'] = 16

In [9]:
%%time

df_okay = df_2012

for i in range(0,len(df_okay)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_10, df_11 = ScrapePFR_Simple_Tables(df_okay['URLs'][i])
        df_1['Week'] = int(df_okay['Week'][i])
        df_1['Year'] = int(df_okay['Year'][i])
        df_2['Week'] = int(df_okay['Week'][i])
        df_2['Year'] = int(df_okay['Year'][i])
        df_3['Week'] = int(df_okay['Week'][i])
        df_3['Year'] = int(df_okay['Year'][i])
        df_4['Week'] = int(df_okay['Week'][i])
        df_4['Year'] = int(df_okay['Year'][i])
        df_5['Week'] = int(df_okay['Week'][i])
        df_5['Year'] = int(df_okay['Year'][i])
        #df_6['Week'] = int(df_okay['Week'][i])
        #df_6['Year'] = int(df_okay['Year'][i])
        #df_7['Week'] = int(df_okay['Week'][i])
        #df_7['Year'] = int(df_okay['Year'][i])
        #df_8['Week'] = int(df_okay['Week'][i])
        #df_8['Year'] = int(df_okay['Year'][i])
        #df_9['Week'] = int(df_okay['Week'][i])
        #df_9['Year'] = int(df_okay['Year'][i])
        df_10['Week'] = int(df_okay['Week'][i])
        df_10['Year'] = int(df_okay['Year'][i])
        df_11['Week'] = int(df_okay['Week'][i])
        df_11['Year'] = int(df_okay['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        #df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        #df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        #df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        #df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

https://www.pro-football-reference.com/boxscores/201211040cle.htm
----------------------------------------------
https://www.pro-football-reference.com/boxscores/201212060rai.htm
----------------------------------------------
https://www.pro-football-reference.com/boxscores/201212300clt.htm
----------------------------------------------
Wall time: 3h 56min 55s


In [10]:
df_Score.to_csv('Score_2012.csv')
df_Offense.to_csv('Offense_2012.csv')
df_Defense.to_csv('Defense_2012.csv')
df_KickReturns.to_csv('KickReturns_2012.csv')
df_Kicking.to_csv('Kicking_2012.csv')
#df_AdvancedPassing.to_csv('AdvancedPassing_2018.csv')
#df_AdvancedRushing.to_csv('AdvancedRushing_2018.csv')
#df_AdvancedReceiving.to_csv('AdvancedReceiving_2018.csv')
#df_AdvancedDefense.to_csv('AdvancedDefense_2018.csv')
df_HomeStarters.to_csv('HomeStarters_2012.csv')
df_AwayStarters.to_csv('AwayStarters_2012.csv')

In [11]:
%%time

df_okay = df_2013

for i in range(0,len(df_okay)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_10, df_11 = ScrapePFR_Simple_Tables(df_okay['URLs'][i])
        df_1['Week'] = int(df_okay['Week'][i])
        df_1['Year'] = int(df_okay['Year'][i])
        df_2['Week'] = int(df_okay['Week'][i])
        df_2['Year'] = int(df_okay['Year'][i])
        df_3['Week'] = int(df_okay['Week'][i])
        df_3['Year'] = int(df_okay['Year'][i])
        df_4['Week'] = int(df_okay['Week'][i])
        df_4['Year'] = int(df_okay['Year'][i])
        df_5['Week'] = int(df_okay['Week'][i])
        df_5['Year'] = int(df_okay['Year'][i])
        #df_6['Week'] = int(df_okay['Week'][i])
        #df_6['Year'] = int(df_okay['Year'][i])
        #df_7['Week'] = int(df_okay['Week'][i])
        #df_7['Year'] = int(df_okay['Year'][i])
        #df_8['Week'] = int(df_okay['Week'][i])
        #df_8['Year'] = int(df_okay['Year'][i])
        #df_9['Week'] = int(df_okay['Week'][i])
        #df_9['Year'] = int(df_okay['Year'][i])
        df_10['Week'] = int(df_okay['Week'][i])
        df_10['Year'] = int(df_okay['Year'][i])
        df_11['Week'] = int(df_okay['Week'][i])
        df_11['Year'] = int(df_okay['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        #df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        #df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        #df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        #df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

https://www.pro-football-reference.com/boxscores/201312220cin.htm
----------------------------------------------
Wall time: 4h 26min 49s


In [12]:
df_Score.to_csv('Score_2013.csv')
df_Offense.to_csv('Offense_2013.csv')
df_Defense.to_csv('Defense_2013.csv')
df_KickReturns.to_csv('KickReturns_2013.csv')
df_Kicking.to_csv('Kicking_2013.csv')
#df_AdvancedPassing.to_csv('AdvancedPassing_2018.csv')
#df_AdvancedRushing.to_csv('AdvancedRushing_2018.csv')
#df_AdvancedReceiving.to_csv('AdvancedReceiving_2018.csv')
#df_AdvancedDefense.to_csv('AdvancedDefense_2018.csv')
df_HomeStarters.to_csv('HomeStarters_2013.csv')
df_AwayStarters.to_csv('AwayStarters_2013.csv')

In [None]:
%%time

df_okay = df_2014

for i in range(0,len(df_okay)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_10, df_11 = ScrapePFR_Simple_Tables(df_okay['URLs'][i])
        df_1['Week'] = int(df_okay['Week'][i])
        df_1['Year'] = int(df_okay['Year'][i])
        df_2['Week'] = int(df_okay['Week'][i])
        df_2['Year'] = int(df_okay['Year'][i])
        df_3['Week'] = int(df_okay['Week'][i])
        df_3['Year'] = int(df_okay['Year'][i])
        df_4['Week'] = int(df_okay['Week'][i])
        df_4['Year'] = int(df_okay['Year'][i])
        df_5['Week'] = int(df_okay['Week'][i])
        df_5['Year'] = int(df_okay['Year'][i])
        #df_6['Week'] = int(df_okay['Week'][i])
        #df_6['Year'] = int(df_okay['Year'][i])
        #df_7['Week'] = int(df_okay['Week'][i])
        #df_7['Year'] = int(df_okay['Year'][i])
        #df_8['Week'] = int(df_okay['Week'][i])
        #df_8['Year'] = int(df_okay['Year'][i])
        #df_9['Week'] = int(df_okay['Week'][i])
        #df_9['Year'] = int(df_okay['Year'][i])
        df_10['Week'] = int(df_okay['Week'][i])
        df_10['Year'] = int(df_okay['Year'][i])
        df_11['Week'] = int(df_okay['Week'][i])
        df_11['Year'] = int(df_okay['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        #df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        #df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        #df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        #df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

https://www.pro-football-reference.com/boxscores/201410190clt.htm
----------------------------------------------
https://www.pro-football-reference.com/boxscores/201410260nor.htm
----------------------------------------------


In [None]:
df_Score.to_csv('Score_2014.csv')
df_Offense.to_csv('Offense_2014.csv')
df_Defense.to_csv('Defense_2014.csv')
df_KickReturns.to_csv('KickReturns_2014.csv')
df_Kicking.to_csv('Kicking_2014.csv')
#df_AdvancedPassing.to_csv('AdvancedPassing_2018.csv')
#df_AdvancedRushing.to_csv('AdvancedRushing_2018.csv')
#df_AdvancedReceiving.to_csv('AdvancedReceiving_2018.csv')
#df_AdvancedDefense.to_csv('AdvancedDefense_2018.csv')
df_HomeStarters.to_csv('HomeStarters_2014.csv')
df_AwayStarters.to_csv('AwayStarters_2014.csv')

In [None]:
%%time

df_okay = df_2015

for i in range(0,len(df_okay)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_10, df_11 = ScrapePFR_Simple_Tables(df_okay['URLs'][i])
        df_1['Week'] = int(df_okay['Week'][i])
        df_1['Year'] = int(df_okay['Year'][i])
        df_2['Week'] = int(df_okay['Week'][i])
        df_2['Year'] = int(df_okay['Year'][i])
        df_3['Week'] = int(df_okay['Week'][i])
        df_3['Year'] = int(df_okay['Year'][i])
        df_4['Week'] = int(df_okay['Week'][i])
        df_4['Year'] = int(df_okay['Year'][i])
        df_5['Week'] = int(df_okay['Week'][i])
        df_5['Year'] = int(df_okay['Year'][i])
        #df_6['Week'] = int(df_okay['Week'][i])
        #df_6['Year'] = int(df_okay['Year'][i])
        #df_7['Week'] = int(df_okay['Week'][i])
        #df_7['Year'] = int(df_okay['Year'][i])
        #df_8['Week'] = int(df_okay['Week'][i])
        #df_8['Year'] = int(df_okay['Year'][i])
        #df_9['Week'] = int(df_okay['Week'][i])
        #df_9['Year'] = int(df_okay['Year'][i])
        df_10['Week'] = int(df_okay['Week'][i])
        df_10['Year'] = int(df_okay['Year'][i])
        df_11['Week'] = int(df_okay['Week'][i])
        df_11['Year'] = int(df_okay['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        #df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        #df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        #df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        #df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

In [None]:
df_Score.to_csv('Score_2015.csv')
df_Offense.to_csv('Offense_2015.csv')
df_Defense.to_csv('Defense_2015.csv')
df_KickReturns.to_csv('KickReturns_2015.csv')
df_Kicking.to_csv('Kicking_2015.csv')
#df_AdvancedPassing.to_csv('AdvancedPassing_2018.csv')
#df_AdvancedRushing.to_csv('AdvancedRushing_2018.csv')
#df_AdvancedReceiving.to_csv('AdvancedReceiving_2018.csv')
#df_AdvancedDefense.to_csv('AdvancedDefense_2018.csv')
df_HomeStarters.to_csv('HomeStarters_2015.csv')
df_AwayStarters.to_csv('AwayStarters_2015.csv')

In [7]:
df_Score = pd.DataFrame(columns =  ['Year','Week','Tm','Final','Home/Away'])
df_Offense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL'])
df_Defense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng',
                                     'Def_Pass_Defended','Def_Sack','Def_Combined_Tackles','Def_Solo_Tackle',
                                     'Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR','Def_FR_Yds','Def_FR_TD',
                                     'Def_FF'])
df_KickReturns = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Kick_Returns','Kick_Return_Yds',
                                          'Yards_Per_Kick_Return','Kick_Return_TDs','Kick_Return_Long',
                                          'Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return','Punt_Return_TDs',
                                          'Punt_Return_Long'])
df_Kicking = pd.DataFrame(columns =  ['Year','Week','Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds',
                                      'Yards_Per_Punt','Punt_Lng'])
df_AdvancedPassing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA',
                                              'CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%','BadTh','Bad%',
                                              'Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr'])
df_AdvancedRushing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC',
                                              'YAC/Att','BrkTkl','Att/Br'])
df_AdvancedReceiving = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R',
                                                'YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat'])
df_AdvancedDefense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                                              'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz','Hrry','QBKD','Sk',
                                              'Prss','Comb','MTkl','MTkl%'])
df_HomeStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])
df_AwayStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

In [8]:
%%time

for i in range(0,len(df_2019)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10, df_11 = ScrapePFR(df_2019['URLs'][i])
        df_1['Week'] = int(df_2019['Week'][i])
        df_1['Year'] = int(df_2019['Year'][i])
        df_2['Week'] = int(df_2019['Week'][i])
        df_2['Year'] = int(df_2019['Year'][i])
        df_3['Week'] = int(df_2019['Week'][i])
        df_3['Year'] = int(df_2019['Year'][i])
        df_4['Week'] = int(df_2019['Week'][i])
        df_4['Year'] = int(df_2019['Year'][i])
        df_5['Week'] = int(df_2019['Week'][i])
        df_5['Year'] = int(df_2019['Year'][i])
        df_6['Week'] = int(df_2019['Week'][i])
        df_6['Year'] = int(df_2019['Year'][i])
        df_7['Week'] = int(df_2019['Week'][i])
        df_7['Year'] = int(df_2019['Year'][i])
        df_8['Week'] = int(df_2019['Week'][i])
        df_8['Year'] = int(df_2019['Year'][i])
        df_9['Week'] = int(df_2019['Week'][i])
        df_9['Year'] = int(df_2019['Year'][i])
        df_10['Week'] = int(df_2019['Week'][i])
        df_10['Year'] = int(df_2019['Year'][i])
        df_11['Week'] = int(df_2019['Week'][i])
        df_11['Year'] = int(df_2019['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

https://www.pro-football-reference.com/boxscores/201909080min.htm
----------------------------------------------
https://www.pro-football-reference.com/boxscores/201912230min.htm
----------------------------------------------
Wall time: 2h 11min 41s


In [9]:
df_Score.to_csv('Score_2019.csv')
df_Offense.to_csv('Offense_2019.csv')
df_Defense.to_csv('Defense_2019.csv')
df_KickReturns.to_csv('KickReturns_2019.csv')
df_Kicking.to_csv('Kicking_2019.csv')
df_AdvancedPassing.to_csv('AdvancedPassing_2019.csv')
df_AdvancedRushing.to_csv('AdvancedRushing_2019.csv')
df_AdvancedReceiving.to_csv('AdvancedReceiving_2019.csv')
df_AdvancedDefense.to_csv('AdvancedDefense_2019.csv')
df_HomeStarters.to_csv('HomeStarters_2019.csv')
df_AwayStarters.to_csv('AwayStarters_2019.csv')

In [10]:
df_Score = pd.DataFrame(columns =  ['Year','Week','Tm','Final','Home/Away'])
df_Offense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL'])
df_Defense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng',
                                     'Def_Pass_Defended','Def_Sack','Def_Combined_Tackles','Def_Solo_Tackle',
                                     'Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR','Def_FR_Yds','Def_FR_TD',
                                     'Def_FF'])
df_KickReturns = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Kick_Returns','Kick_Return_Yds',
                                          'Yards_Per_Kick_Return','Kick_Return_TDs','Kick_Return_Long',
                                          'Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return','Punt_Return_TDs',
                                          'Punt_Return_Long'])
df_Kicking = pd.DataFrame(columns =  ['Year','Week','Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds',
                                      'Yards_Per_Punt','Punt_Lng'])
df_AdvancedPassing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA',
                                              'CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%','BadTh','Bad%',
                                              'Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr'])
df_AdvancedRushing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC',
                                              'YAC/Att','BrkTkl','Att/Br'])
df_AdvancedReceiving = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R',
                                                'YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat'])
df_AdvancedDefense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                                              'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz','Hrry','QBKD','Sk',
                                              'Prss','Comb','MTkl','MTkl%'])
df_HomeStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])
df_AwayStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

In [11]:
%%time

for i in range(0,len(df_2020)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10, df_11 = ScrapePFR(df_2020['URLs'][i])
        df_1['Week'] = int(df_2020['Week'][i])
        df_1['Year'] = int(df_2020['Year'][i])
        df_2['Week'] = int(df_2020['Week'][i])
        df_2['Year'] = int(df_2020['Year'][i])
        df_3['Week'] = int(df_2020['Week'][i])
        df_3['Year'] = int(df_2020['Year'][i])
        df_4['Week'] = int(df_2020['Week'][i])
        df_4['Year'] = int(df_2020['Year'][i])
        df_5['Week'] = int(df_2020['Week'][i])
        df_5['Year'] = int(df_2020['Year'][i])
        df_6['Week'] = int(df_2020['Week'][i])
        df_6['Year'] = int(df_2020['Year'][i])
        df_7['Week'] = int(df_2020['Week'][i])
        df_7['Year'] = int(df_2020['Year'][i])
        df_8['Week'] = int(df_2020['Week'][i])
        df_8['Year'] = int(df_2020['Year'][i])
        df_9['Week'] = int(df_2020['Week'][i])
        df_9['Year'] = int(df_2020['Year'][i])
        df_10['Week'] = int(df_2020['Week'][i])
        df_10['Year'] = int(df_2020['Year'][i])
        df_11['Week'] = int(df_2020['Week'][i])
        df_11['Year'] = int(df_2020['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

Wall time: 3h 22min 40s


In [12]:
df_Score.to_csv('Score_2020.csv')
df_Offense.to_csv('Offense_2020.csv')
df_Defense.to_csv('Defense_2020.csv')
df_KickReturns.to_csv('KickReturns_2020.csv')
df_Kicking.to_csv('Kicking_2020.csv')
df_AdvancedPassing.to_csv('AdvancedPassing_2020.csv')
df_AdvancedRushing.to_csv('AdvancedRushing_2020.csv')
df_AdvancedReceiving.to_csv('AdvancedReceiving_2020.csv')
df_AdvancedDefense.to_csv('AdvancedDefense_2020.csv')
df_HomeStarters.to_csv('HomeStarters_2020.csv')
df_AwayStarters.to_csv('AwayStarters_2020.csv')

In [4]:
df_Score = pd.DataFrame(columns =  ['Year','Week','Tm','Final','Home/Away'])
df_Offense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL'])
df_Defense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng',
                                     'Def_Pass_Defended','Def_Sack','Def_Combined_Tackles','Def_Solo_Tackle',
                                     'Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR','Def_FR_Yds','Def_FR_TD',
                                     'Def_FF'])
df_KickReturns = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Kick_Returns','Kick_Return_Yds',
                                          'Yards_Per_Kick_Return','Kick_Return_TDs','Kick_Return_Long',
                                          'Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return','Punt_Return_TDs',
                                          'Punt_Return_Long'])
df_Kicking = pd.DataFrame(columns =  ['Year','Week','Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds',
                                      'Yards_Per_Punt','Punt_Lng'])
df_AdvancedPassing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA',
                                              'CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%','BadTh','Bad%',
                                              'Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr'])
df_AdvancedRushing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC',
                                              'YAC/Att','BrkTkl','Att/Br'])
df_AdvancedReceiving = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R',
                                                'YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat'])
df_AdvancedDefense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
                                              'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz','Hrry','QBKD','Sk',
                                              'Prss','Comb','MTkl','MTkl%'])
df_HomeStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])
df_AwayStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

In [5]:
%%time

for i in range(0,len(df_2021)):
    try:
        df_1, df_2, df_3, df_4, df_5, df_6, df_7, df_8, df_9, df_10, df_11 = ScrapePFR(df_2021['URLs'][i])
        df_1['Week'] = int(df_2021['Week'][i])
        df_1['Year'] = int(df_2021['Year'][i])
        df_2['Week'] = int(df_2021['Week'][i])
        df_2['Year'] = int(df_2021['Year'][i])
        df_3['Week'] = int(df_2021['Week'][i])
        df_3['Year'] = int(df_2021['Year'][i])
        df_4['Week'] = int(df_2021['Week'][i])
        df_4['Year'] = int(df_2021['Year'][i])
        df_5['Week'] = int(df_2021['Week'][i])
        df_5['Year'] = int(df_2021['Year'][i])
        df_6['Week'] = int(df_2021['Week'][i])
        df_6['Year'] = int(df_2021['Year'][i])
        df_7['Week'] = int(df_2021['Week'][i])
        df_7['Year'] = int(df_2021['Year'][i])
        df_8['Week'] = int(df_2021['Week'][i])
        df_8['Year'] = int(df_2021['Year'][i])
        df_9['Week'] = int(df_2021['Week'][i])
        df_9['Year'] = int(df_2021['Year'][i])
        df_10['Week'] = int(df_2021['Week'][i])
        df_10['Year'] = int(df_2021['Year'][i])
        df_11['Week'] = int(df_2021['Week'][i])
        df_11['Year'] = int(df_2021['Year'][i])
        df_Score = pd.concat([df_Score,df_1])
        df_Offense = pd.concat([df_Offense,df_2])
        df_Defense = pd.concat([df_Defense,df_3])
        df_KickReturns = pd.concat([df_KickReturns,df_4])
        df_Kicking = pd.concat([df_Kicking,df_5])
        df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_6])
        df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_7])
        df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_8])
        df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_9])
        df_HomeStarters = pd.concat([df_HomeStarters,df_10])
        df_AwayStarters = pd.concat([df_AwayStarters,df_11])
    except:
        print('----------------------------------------------')

https://www.pro-football-reference.com/boxscores/202112050mia.htm
----------------------------------------------
https://www.pro-football-reference.com/boxscores/202112180clt.htm
----------------------------------------------
Wall time: 2h 40min 58s


In [6]:
df_Score.to_csv('Score_2021.csv')
df_Offense.to_csv('Offense_2021.csv')
df_Defense.to_csv('Defense_2021.csv')
df_KickReturns.to_csv('KickReturns_2021.csv')
df_Kicking.to_csv('Kicking_2021.csv')
df_AdvancedPassing.to_csv('AdvancedPassing_2021.csv')
df_AdvancedRushing.to_csv('AdvancedRushing_2021.csv')
df_AdvancedReceiving.to_csv('AdvancedReceiving_2021.csv')
df_AdvancedDefense.to_csv('AdvancedDefense_2021.csv')
df_HomeStarters.to_csv('HomeStarters_2021.csv')
df_AwayStarters.to_csv('AwayStarters_2021.csv')

In [58]:
df_Score.to_csv('Score_Exceptions.csv')
df_Offense.to_csv('Offense_Exceptions.csv')
df_Defense.to_csv('Defense_Exceptions.csv')
df_KickReturns.to_csv('KickReturns_Exceptions.csv')
df_Kicking.to_csv('Kicking_Exceptions.csv')
df_AdvancedPassing.to_csv('AdvancedPassing_Exceptions.csv')
df_AdvancedRushing.to_csv('AdvancedRushing_Exceptions.csv')
df_AdvancedReceiving.to_csv('AdvancedReceiving_Exceptions.csv')
df_AdvancedDefense.to_csv('AdvancedDefense_Exceptions.csv')
df_HomeStarters.to_csv('HomeStarters_Exceptions.csv')
df_AwayStarters.to_csv('AwayStarters_Exceptions.csv')

## Individual functions 

## Score Table

In [5]:
def GetScore(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')

    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[0]

    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    df = df.dropna()

    df_2 = pd.DataFrame()
    df_2['Tm'] = df.iloc[:,1]
    df_2['Final'] = df.iloc[:,len(df.columns.values) - 1]
    df = df_2
    df['Home/Away'] = ['Away','Home']
    return df

## Offense Table

In [6]:
def GetOffense(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')

    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[6]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 27
    away_end = cols.index('Passing',2)-1
    away_players = cols[away_start:away_end]

    home_start = cols.index('FL',37) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players

    df.columns = ['Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Sk_Yds','Pass_Lng','Rate','Rush_Att','Rush_Yds',
              'Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')

    return df

## Defense Table

In [7]:
def GetDefense(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')

    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[7]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 22
    away_end = cols.index('Def Interceptions',3)-1
    away_players = cols[away_start:away_end]

    home_start = cols.index('FF',23) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
                  'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits',
                  'Def_FR','Def_FR_Yds','Def_FR_TD','Def_FF']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng','Def_Pass_Defended','Def_Sack',
         'Def_Combined_Tackles','Def_Solo_Tackle','Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR',
         'Def_FR_Yds','Def_FR_TD','Def_FF']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')

    return df

## Kick Returns Table

In [8]:
def GetKickReturns(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[8]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    try: 
        away_start = 15
        away_end = cols.index('Kick Returns',3)-1
        away_players = cols[away_start:away_end]

        home_start = cols.index('Y/R',16) + 3
        home_end = len(cols)
        home_players = cols[home_start:home_end]

        players = away_players + home_players

    except:
        players_start = cols.index('Y/R') + 3
        players_end = len(cols)
        players = cols[players_start:players_end]  
    
    df.columns = ['Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
                  'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
                  'Punt_Return_TDs','Punt_Return_Long']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Kick_Returns','Kick_Return_Yds','Yards_Per_Kick_Return','Kick_Return_TDs',
             'Kick_Return_Long','Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return',
             'Punt_Return_TDs','Punt_Return_Long']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)

    return df

## Kicking Table

In [9]:
def GetKicking(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[9]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 13
    away_end = cols.index('Scoring',3)-1
    away_players = cols[away_start:away_end]
    
    home_start = cols.index('Lng',13) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds','Yards_Per_Punt','Punt_Lng']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)

    return df

## Advanced Passing Table

In [10]:
def GetAdvancedPassing(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[10]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 26
    away_end = cols.index('Player',3)
    away_players = cols[away_start:away_end]
    
    home_start = cols.index('Yds/Scr',26) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                 'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops',
                 'Drop%','BadTh','Bad%','Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

## Advanced Rushing Table

In [11]:
def GetAdvancedRushing(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[11]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 11
    away_end = cols.index('Player',3)
    away_players = cols[away_start:away_end]
    
    home_start = cols.index('Att/Br',11) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC','YAC/Att','BrkTkl','Att/Br']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

## Advanced Receiving Table

In [12]:
def GetAdvancedReceiving(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[12]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 18
    away_end = cols.index('Player',3)
    away_players = cols[away_start:away_end]
    
    home_start = cols.index('Rat',18) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                 'Drop%','Int','Rat']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R','YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop',
                 'Drop%','Int','Rat']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

## Advanced Defense Table

In [13]:
def GetAdvancedDefense(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[13]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_start = 22
    away_end = cols.index('Player',3)
    away_players = cols[away_start:away_end]
    
    home_start = cols.index('MTkl%',22) + 1
    home_end = len(cols)
    home_players = cols[home_start:home_end]

    players = away_players + home_players
    
    df.columns = ['Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                 'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                 'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

In [83]:
def GetAdvancedDefenseException(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')

    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[13]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')

    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    away_players_1 = cols[22:24]

    home_players_1 = cols[46:50]

    away_players_2 = cols[72:83]

    home_players_2 = cols[105:len(cols)]

    players = away_players_1 + home_players_1 + away_players_2 + home_players_2

    df.columns = ['Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                 'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp','Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz',
                 'Hrry','QBKD','Sk','Prss','Comb','MTkl','MTkl%']]

    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)

    return df

## Home Starters Table

In [14]:
def GetHomeStarters(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[14]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    start = 2
    end = len(cols)
    players = cols[start:end]
    
    df.columns = ['Pos']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Pos']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

## Away Starters Table

In [15]:
def GetAwayStarters(url):
    driver = webdriver.Chrome(executable_path='chromedriver.exe')
    driver.get(url)
    page = driver.execute_script('return document.body.innerHTML')
    soup = BeautifulSoup(''.join(page), 'html.parser')
    
    df = pd.DataFrame()
    table = soup.find_all('table', {'class': 'stats_table'})[15]

    cols = [ each.text for each in table.find_all('th') ]
    rows = table.find_all('tr')
    
    for row in rows:
        data = [ each.text for each in row.find_all('td') ]
        temp_df = pd.DataFrame([data])
        df = df.append(temp_df, sort=True).reset_index(drop=True)

    start = 2
    end = len(cols)
    players = cols[start:end]
    
    df.columns = ['Pos']
    df = df.dropna()
    df['Players'] = players

    df = df[['Players','Pos']]
    
    df = df.reset_index()
    df = df.drop(columns = 'index')
    df = df.replace(r'^\s*$', 0, regex=True)


    return df

In [85]:
df = pd.DataFrame(columns = ['url','Year','Week'])
df['url'] = ['https://www.pro-football-reference.com/boxscores/201909080min.htm']
df['Year'] = 2019
df['Week'] = 1
df

Unnamed: 0,url,Year,Week
0,https://www.pro-football-reference.com/boxscor...,2019,1


## Scoring Table

In [48]:
%%time 

#df_Score = pd.DataFrame(columns =  ['Year','Week','Tm','Final','Home/Away'])

for i in range(0,len(df)):
    df_new = GetScore(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_Score = pd.concat([df_Score,df_new])

df_Score

Wall time: 10.4 s


Unnamed: 0,Year,Week,Tm,Final,Home/Away
1,2021,15,New England Patriots,17,Away
2,2021,15,Indianapolis Colts,27,Home
1,2021,13,New York Giants,9,Away
2,2021,13,Miami Dolphins,20,Home
1,2019,16,Green Bay Packers,23,Away
2,2019,16,Minnesota Vikings,10,Home
1,2019,1,Atlanta Falcons,12,Away
2,2019,1,Minnesota Vikings,28,Home


## Offense Table

In [49]:
%%time 

#df_Offense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Pass_Att','Pass_Yds','Pass_TD','Int','Sk','Pass_Lng','Rate',
#         'Rush_Att','Rush_Yds','Rush_TD','Rush_Lng','Tgt','Rec','Rec_Yds','Rec_TD','Rec_Lng','Fmb','FL'])

for i in range(0,len(df)):
    df_new = GetOffense(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_Offense = pd.concat([df_Offense,df_new])

Wall time: 14.3 s


## Defense Table

In [50]:
%%time 

#df_Defense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Def_Ints','Def_Int_Yds','Def_Int_TD','Def_Int_Lng',
#                                     'Def_Pass_Defended','Def_Sack','Def_Combined_Tackles','Def_Solo_Tackle',
#                                     'Def_Assisted_Tackles','Def_TFL','Def_QB_Hits','Def_FR','Def_FR_Yds','Def_FR_TD',
#                                     'Def_FF'])

for i in range(0,len(df)):
    df_new = GetDefense(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_Defense = pd.concat([df_Defense,df_new])

Wall time: 38.3 s


## Kick Returns

In [51]:
%%time 

#df_KickReturns = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Kick_Returns','Kick_Return_Yds',
#                                          'Yards_Per_Kick_Return','Kick_Return_TDs','Kick_Return_Long',
#                                          'Punt_Returns','Punt_Return_Yds','Yards_Per_Punt_Return','Punt_Return_TDs',
#                                          'Punt_Return_Long'])

for i in range(0,len(df)):
    df_new = GetKickReturns(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_KickReturns = pd.concat([df_KickReturns,df_new])

Wall time: 19.8 s


## Kicking

In [52]:
%%time 

#df_Kicking = pd.DataFrame(columns =  ['Year','Week','Players','Tm','XPM','XPA','FGM','FGA','Punts','Punt_Yds',
#                                      'Yards_Per_Punt','Punt_Lng'])

for i in range(0,len(df)):
    df_new = GetKicking(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_Kicking = pd.concat([df_Kicking,df_new])

df_Kicking

Wall time: 15.8 s


Unnamed: 0,Year,Week,Players,Tm,XPM,XPA,FGM,FGA,Punts,Punt_Yds,Yards_Per_Punt,Punt_Lng
0,2021,15,Nick Folk,NWE,2,2,1,1,0,0,0.0,0
1,2021,15,Jake Bailey,NWE,0,0,0,0,2,80,40.0,46
2,2021,15,Michael Badgley,IND,3,3,2,3,0,0,0.0,0
3,2021,15,Rigoberto Sanchez,IND,0,0,0,0,2,87,43.5,46
0,2021,13,Riley Dixon,NYG,0,0,0,0,6,248,41.3,46
1,2021,13,Graham Gano,NYG,0,0,3,4,0,0,0.0,0
2,2021,13,Jason Sanders,MIA,2,2,2,3,0,0,0.0,0
3,2021,13,Michael Palardy,MIA,0,0,0,0,6,290,48.3,65
0,2019,16,Mason Crosby,GNB,0,1,3,3,0,0,0.0,0
1,2019,16,JK Scott,GNB,0,0,0,0,5,234,46.8,39


## Advanced Passing

In [53]:
%%time 

#df_AdvancedPassing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Cmp','Att','Yds','1D','1D%','IAY','IAY/PA',
#                                              'CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','Drops','Drop%','BadTh','Bad%',
#                                              'Sk','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr'])

for i in range(0,len(df)):
    df_new = GetAdvancedPassing(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_AdvancedPassing = pd.concat([df_AdvancedPassing,df_new])

df_AdvancedPassing

Wall time: 14.1 s


Unnamed: 0,Year,Week,Players,Tm,Cmp,Att,Yds,1D,1D%,IAY,...,BadTh,Bad%,Sk,Bltz,Hrry,Hits,Prss,Prss%,Scrm,Yds/Scr
0,2021,15,Mac Jones,NWE,26,45,299,15,32.6,478,...,13,29.5%,1,14,0,4,5,10.6%,1,12.0
1,2021,15,Carson Wentz,IND,5,12,57,3,23.1,112,...,3,25.0%,1,4,0,0,1,7.1%,1,11.0
0,2021,13,Mike Glennon,NYG,23,44,187,10,21.3,333,...,6,15.8%,3,16,2,5,10,21.3%,0,0.0
1,2021,13,Tua Tagovailoa,MIA,30,41,244,17,39.5,280,...,5,12.2%,2,11,2,1,5,11.4%,1,2.0
0,2019,16,Aaron Rodgers,GNB,26,40,216,14,32.6,266,...,7,18.4%,3,7,1,2,6,14.0%,0,0.0
1,2019,16,Kirk Cousins,MIN,16,31,122,6,16.7,239,...,8,27.6%,5,4,3,1,9,25.0%,0,0.0
2,2019,16,Stefon Diggs,MIN,0,1,0,0,0.0,12,...,1,100.0%,0,0,0,0,0,0.0%,0,0.0
0,2019,1,Matt Ryan,ATL,33,46,304,16,32.0,360,...,6,13.3%,4,10,7,3,14,26.9%,2,12.0
1,2019,1,Kirk Cousins,MIN,8,10,98,4,36.4,51,...,1,11.1%,1,8,2,2,5,45.5%,0,0.0


## Advanced Rushing

In [54]:
%%time 

#df_AdvancedRushing = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Att','Yds','1D','YBC','YBC/Att','YAC',
#                                              'YAC/Att','BrkTkl','Att/Br'])

for i in range(0,len(df)):
    df_new = GetAdvancedRushing(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_AdvancedRushing = pd.concat([df_AdvancedRushing,df_new])

df_AdvancedRushing

Wall time: 20.3 s


Unnamed: 0,Year,Week,Players,Tm,Att,Yds,1D,YBC,YBC/Att,YAC,YAC/Att,BrkTkl,Att/Br
0,2021,15,Rhamondre Stevenson,NWE,10,36,1,19,1.9,17,1.7,0,0.0
1,2021,15,Brandon Bolden,NWE,4,3,0,-2,-0.5,5,1.3,0,0.0
2,2021,15,Kendrick Bourne,NWE,2,19,1,15,7.5,4,2.0,0,0.0
3,2021,15,Mac Jones,NWE,1,12,1,0,0.0,12,12.0,0,0.0
4,2021,15,Jakobi Meyers,NWE,1,9,0,8,8.0,1,1.0,0,0.0
5,2021,15,Jonnu Smith,NWE,1,2,0,2,2.0,0,0.0,0,0.0
6,2021,15,Jonathan Taylor,IND,29,170,7,104,3.6,66,2.3,0,0.0
7,2021,15,Carson Wentz,IND,8,17,4,12,1.5,5,0.6,0,0.0
8,2021,15,Ashton Dulin,IND,2,39,1,34,17.0,5,2.5,0,0.0
0,2021,13,Saquon Barkley,NYG,11,55,5,35,3.2,20,1.8,0,0.0


## Advanced Receiving

In [55]:
%%time 

#df_AdvancedReceiving = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Tgt','Rec','Yds','TD','1D','YBC','YBC/R',
#                                                'YAC','YAC/R','ADOT','BrkTkl','Rec/Br','Drop','Drop%','Int','Rat'])

for i in range(0,len(df)):
    df_new = GetAdvancedReceiving(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_AdvancedReceiving = pd.concat([df_AdvancedReceiving,df_new])

df_AdvancedReceiving

Wall time: 16.8 s


Unnamed: 0,Year,Week,Players,Tm,Tgt,Rec,Yds,TD,1D,YBC,YBC/R,YAC,YAC/R,ADOT,BrkTkl,Rec/Br,Drop,Drop%,Int,Rat
0,2021,15,Hunter Henry,NWE,8,6,77,2,6,62,10.3,15,2.5,9.8,0,0,0,0.0,1,104.7
1,2021,15,Jakobi Meyers,NWE,12,6,44,0,3,32,5.3,12,2.0,10.2,0,0,1,8.3,0,59.0
2,2021,15,Nelson Agholor,NWE,6,4,34,0,1,34,8.5,0,0.0,14.3,0,0,0,0.0,0,81.2
3,2021,15,Brandon Bolden,NWE,5,3,41,0,2,-4,-1.3,45,15.0,0.6,1,3.0,0,0.0,1,46.7
4,2021,15,Kendrick Bourne,NWE,3,3,44,0,2,41,13.7,3,1.0,13.7,0,0,0,0.0,0,118.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,2019,1,Adam Thielen,MIN,3,3,43,1,2,25,8.3,18,6.0,8.3,1,3.0,0,0.0,0,158.3
10,2019,1,Dalvin Cook,MIN,2,2,9,0,0,2,1.0,7,3.5,1.0,0,0,0,0.0,0,85.4
11,2019,1,Stefon Diggs,MIN,2,2,37,0,1,29,14.5,8,4.0,14.5,0,0,0,0.0,0,118.7
12,2019,1,Chad Beebe,MIN,1,1,9,0,1,0,0.0,9,9.0,0.0,0,0,0,0.0,0,104.2


## Advanced Defense

In [86]:
%%time 

#df_AdvancedDefense = pd.DataFrame(columns =  ['Year','Week','Players','Tm','Int','Tgt','Cmp','Cmp%','Yds','Yds/Cmp',
#                                              'Yds/Tgt','TD','Rat','DADOT','Air','YAC','Blitz','Hrry','QBKD','Sk',
#                                              'Prss','Comb','MTkl','MTkl%'])

for i in range(0,len(df)):
    df_new = GetAdvancedDefenseException(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_AdvancedDefense = pd.concat([df_AdvancedDefense,df_new])
    print(df['url'][i])

df_AdvancedDefense

https://www.pro-football-reference.com/boxscores/201909080min.htm
Wall time: 10.5 s


Unnamed: 0,Year,Week,Players,Tm,Int,Tgt,Cmp,Cmp%,Yds,Yds/Cmp,...,Air,YAC,Blitz,Hrry,QBKD,Sk,Prss,Comb,MTkl,MTkl%
0,2021,15,J.C. Jackson,NWE,0,5,2,40.0%,42,21.0,...,26,16,0,0,0,0.0,0,3,2,40.0%
1,2021,15,Kyle Dugger,NWE,0,4,1,25.0%,0,0.0,...,0,0,0,0,0,0.0,0,5,0,0.0%
2,2021,15,Devin McCourty,NWE,1,1,1,100.0%,7,7.0,...,5,2,0,0,0,0.0,0,6,0,0.0%
3,2021,15,Deatrich Wise Jr.,NWE,0,0,0,0,0,0,...,0,0,0,0,0,1.0,1,2,0,0.0%
4,2021,15,Myles Bryant,NWE,0,1,0,0.0%,0,0,...,0,0,0,0,0,0.0,0,1,0,0.0%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26,2019,1,Danielle Hunter,MIN,0,0,0,0,0,0,...,0,0,0,3,0,1.0,4,4,0,0.0%
27,2019,1,Everson Griffen,MIN,0,0,0,0,0,0,...,0,0,0,1,1,1.0,3,1,2,66.7%
28,2019,1,Linval Joseph,MIN,0,0,0,0,0,0,...,0,0,0,0,0,1.0,1,4,0,0.0%
29,2019,1,Ben Gedeon,MIN,0,0,0,0,0,0,...,0,0,1,0,0,0.0,0,1,1,50.0%


## Home Starters

In [56]:
%%time 

#df_HomeStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

for i in range(0,len(df)):
    df_new = GetHomeStarters(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_HomeStarters = pd.concat([df_HomeStarters,df_new])

df_HomeStarters

Wall time: 16.3 s


Unnamed: 0,Year,Week,Players,Pos
0,2021,15,Carson Wentz,QB
1,2021,15,Jonathan Taylor,RB
2,2021,15,T.Y. Hilton,WR
3,2021,15,Michael Pittman Jr.,WR
4,2021,15,Jack Doyle,TE
...,...,...,...,...
17,2019,1,Ben Gedeon,LB
18,2019,1,Xavier Rhodes,CB
19,2019,1,Trae Waynes,CB
20,2019,1,Harrison Smith,S


## Away Starters

In [57]:
%%time 

#df_AwayStarters = pd.DataFrame(columns =  ['Year','Week','Players','Pos'])

for i in range(0,len(df)):
    df_new = GetAwayStarters(df['url'][i])
    df_new['Week'] = int(df['Week'][i])
    df_new['Year'] = int(df['Year'][i])
    df_AwayStarters = pd.concat([df_AwayStarters,df_new])

df_AwayStarters

Wall time: 22.8 s


Unnamed: 0,Year,Week,Players,Pos
0,2021,15,Mac Jones,QB
1,2021,15,Rhamondre Stevenson,RB
2,2021,15,Jakob Johnson,FB
3,2021,15,Nelson Agholor,WR
4,2021,15,Jakobi Meyers,WR
...,...,...,...,...
17,2019,1,De'Vondre Campbell,LB
18,2019,1,Isaiah Oliver,CB
19,2019,1,Desmond Trufant,CB
20,2019,1,Keanu Neal,S
