# Webscraping ESPN.com for Quarterback Rating (QBR) Statistics
## By: Nick Bruno

### Import Libraries

In [1]:
import pandas as pd
import re
import requests
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Reworking script to use selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

### Set Driver

In [3]:
# Set driver options
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)

## 2023 Regular Season Example

In [8]:
url = "https://www.espn.com/nfl/qbr/_/season/2023/seasontype/2"
driver.get(url)

In [9]:
# table[0] shows rank and name and table[1] shows all of the stats
tables = driver.find_elements(By.CSS_SELECTOR, "table")

In [10]:
# QB Name table
rows = tables[0].find_elements(By.CSS_SELECTOR, "tbody tr")
data = []
for row in rows:  # skip first header row
    cells = row.find_elements(By.TAG_NAME, "td")
    if cells:
        data.append([cell.text.strip() for cell in cells])
df1 = pd.DataFrame(data)
df1.columns = ['SEASON_RANK','NAME']
df1[['NAME', 'TEAM']] = df1.NAME.str.split('\n', expand=True)

In [11]:
# Stats table
rows = tables[1].find_elements(By.CSS_SELECTOR, "tbody tr")
data = []
for row in rows:  # skip first header row
    cells = row.find_elements(By.TAG_NAME, "td")
    if cells:
        data.append([cell.text.strip() for cell in cells])
df2 = pd.DataFrame(data)
df2.columns = ['QBR','PAA','PLAYS','EPA','PASS','RUN','SACK','PEN','RAW']

In [12]:
df = pd.concat([df1,df2],axis=1)

In [14]:
df.head(2)

Unnamed: 0,SEASON_RANK,NAME,TEAM,QBR,PAA,PLAYS,EPA,PASS,RUN,SACK,PEN,RAW
0,1,B. Purdy,SF,73.4,37.2,530,76.9,66.8,9.1,-10.2,1.0,73.0
1,2,D. Prescott,DAL,73.4,57.0,724,106.7,91.6,10.6,-16.3,4.5,75.3


This works. Now we need to add a loop for years (2006-2024) and season_type (2 and 3). Season type = 2 represents the regular season and season type = 3 represents the postseason. Note: 2024 does not include the year in the URL since it is the most current season.

## Scrape season summary QBR Stats

In [4]:
def scrape_qbr_season(url):
    # Get driver
    driver.get(url)
    
    # Find the two tables in the web-page that make up the one QBR table
    tables = driver.find_elements(By.CSS_SELECTOR, "table")
    
    # Scrape the "QB Names" section of the table (tables[0])
    rows = tables[0].find_elements(By.CSS_SELECTOR, "tbody tr")
    data = []
    for row in rows:  # skip first header row
        cells = row.find_elements(By.TAG_NAME, "td")
        if cells:
            data.append([cell.text.strip() for cell in cells])
    df1 = pd.DataFrame(data)
    df1.columns = ['SEASON_RANK','NAME']
    df1[['NAME', 'TEAM']] = df1.NAME.str.split('\n', expand=True)
    
    # Scrape the QBR Data from the second part of the QBR table (tables[1])
    rows = tables[1].find_elements(By.CSS_SELECTOR, "tbody tr")
    data = []
    for row in rows:  # skip first header row
        cells = row.find_elements(By.TAG_NAME, "td")
        if cells:
            data.append([cell.text.strip() for cell in cells])
    df2 = pd.DataFrame(data)
    df2.columns = ['QBR','PAA','PLAYS','EPA','PASS','RUN','SACK','PEN','RAW']
    
    # Concatenate df1 and df2 into one dataframe
    df = pd.concat([df1,df2],axis=1)
    
    # Add Columns for Year and Season Type
    df['YEAR'] = url.split('/')[-3]
    df['SEASON_TYPE'] = np.where(url.split('/')[-1] == '2', 'REGULAR','POST')
    
    return df

In [6]:
seasons_df = pd.DataFrame()
for year in range(2006,2024):
    for season_type in ['2','3']:
        url = "https://www.espn.com/nfl/qbr/_/season/{}/seasontype/{}".format(str(year),season_type)
        df = scrape_qbr_season(url)
        seasons_df = pd.concat([seasons_df,df])

In [9]:
# Add 2024 data (doesn't include the 'year' in the url)
for season_type in ['2','3']:
    url = "https://www.espn.com/nfl/qbr/_/seasontype/{}".format(season_type)
    df = scrape_qbr_season(url)
    seasons_df = pd.concat([seasons_df,df])

In [10]:
seasons_df.shape

(821, 14)

In [11]:
seasons_df.head(2)

Unnamed: 0,SEASON_RANK,NAME,TEAM,QBR,PAA,PLAYS,EPA,PASS,RUN,SACK,PEN,RAW,YEAR,SEASON_TYPE
0,1,P. Manning,IND,86.4,85.5,624,108.8,96.0,6.8,-5.0,1.1,87.4,2006,REGULAR
1,2,T. Brady,NE,68.6,30.9,610,57.9,38.8,4.3,-12.0,2.8,67.2,2006,REGULAR


In [14]:
# Write to .csv
seasons_df.to_csv('./data/qbr_season_totals_df.csv', index=None)

## Scrape Weekly Results

### Regular Season Scrape

##### 2006 - 2021 Scrape

Necessary to split up because the NFL expanded to 18 weeks in the 2022 season. Season type = 2 remains consistent to look only at regular season games. We will need to adjust since the "postseason" weeks are labeled by name, not by number. The columns provided also includes the week number and the result of the game, so we will need to adjust the column names for that by creating a new function.

In [15]:
regular_seasons_weeks_df = pd.DataFrame()
for year in range(2006,2024):
    for week in range(1, 18):
        url = "https://www.espn.com/nfl/qbr/_/view/weekly/season/{}/seasontype/2/week/{}".format(str(year),str(week))
        df = scrape_qbr_season(url)
        regular_seasons_weeks_df = pd.concat([regular_seasons_weeks_df,df])

ValueError: Length mismatch: Expected axis has 11 elements, new values have 9 elements

In [None]:
https://www.espn.com/nfl/qbr/_/view/weekly/season/2023/seasontype/2/week/1

In [None]:
https://www.espn.com/nfl/qbr/_/view/weekly/season/2023/seasontype/2

### Summary

This Jupyter Notebook has showed how to scrape NFL Quarterback QBR statistics from ESPN.com. I have presented multiple functions that scrape season total QBR stats as well as weekly QBR stats. This scraping was done in an effort to compare QBR stats with the Passer Rating metric, and this analysis will be conducted in a future blog post on btc.bashingbitcoin.com. Gathering the data is the first step in this analysis process. The scraped ESPN data will be combined with the Quarterback Rating statistics scraped from pro-football-reference.com that was accomplished in an earlier blog post.