# Project: Levels of Football

Kaleb Sailer

This is the scraping file for NFL data.

### Packages

In [48]:
import pandas as pd 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import random

## NFL Data: Pro Football Reference

In [50]:
# url team name abbreviations
team_abbrev = ['buf', 'mia', 'nyj', 'nwe', 'rav', 'cle', 'pit', 'cin', 
               'htx', 'jax', 'clt', 'oti', 'kan', 'rai', 'den', 'sdg', 
               'dal', 'phi', 'nyg', 'was', 'det', 'gnb', 'min', 'chi', 
              'tam', 'nor', 'atl', 'car', 'sfo', 'ram', 'sea', 'crd']

# stats to track
team_names_nfl = []
wins_nfl = []
losses_nfl = []
ppg_nfl = []
ppg_a_nfl = []
ppg_diff_nfl = []
pass_ypg_nfl = []
rush_ypg_nfl = []
tds_nfl = []
rush_tds_nfl = []
pass_tds_nfl = []

In [51]:
service = Service(ChromeDriverManager().install())
browser = webdriver.Chrome(service=service)

for nfl_team in team_abbrev:
    url = f'https://www.pro-football-reference.com/teams/{nfl_team}/2023.htm'
    browser.get(url) 

    print(f'Accessing page for {nfl_team}...')
    
    # team name
    team_name = browser.find_element(By.XPATH, '//*[@id="meta"]/div[2]/h1/span[2]')

    # win/loss records
    record_elements = browser.find_element(By.XPATH, '//*[@id="meta"]/div[2]/p[1]')

    # making altercations to the record text
    record_text = record_elements.text.strip()
    record_text = record_text.replace("Record: ", "")
    
    record_parts = record_text.split('-')

    # wins and losses
    win_count = record_parts[0]
    loss_count = record_parts[1]

    # ppg and ppg_a - deriving ppg_diff from the two
    pf = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[1]/td[1]')
    ppg = round(float(pf.text)/17, 1)
    
    pa = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[2]/td[1]')
    ppg_a = round(float(pa.text)/17, 1)

    ppg_diff = round(ppg - ppg_a, 1)

    # passing stats 
    pass_yds = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[1]/td[10]')
    pass_ypg = round(float(pass_yds.text)/17, 1)
    
    pass_tds = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[1]/td[11]')

    # rushing stats 
    rush_yds = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[1]/td[16]')
    rush_ypg = round(float(rush_yds.text)/17, 1)
    
    rush_tds = browser.find_element(By.XPATH, '//*[@id="team_stats"]/tbody/tr[1]/td[17]')

    # total tds 
    tds = int(pass_tds.text) + int(rush_tds.text)
    
    # Append to lists
    team_names_nfl.append(team_name.text)
    wins_nfl.append(win_count)
    losses_nfl.append(loss_count)
    ppg_nfl.append(ppg)
    ppg_a_nfl.append(ppg_a)
    ppg_diff_nfl.append(ppg_diff)
    pass_ypg_nfl.append(pass_ypg)
    pass_tds_nfl.append(pass_tds.text)
    rush_ypg_nfl.append(rush_ypg)
    rush_tds_nfl.append(rush_tds.text)
    tds_nfl.append(tds)
        
    print(f'Completed scraping for {nfl_team}')
        
browser.quit()

Accessing page for buf...
Completed scraping for buf
Accessing page for mia...
Completed scraping for mia
Accessing page for nyj...
Completed scraping for nyj
Accessing page for nwe...
Completed scraping for nwe
Accessing page for rav...
Completed scraping for rav
Accessing page for cle...
Completed scraping for cle
Accessing page for pit...
Completed scraping for pit
Accessing page for cin...
Completed scraping for cin
Accessing page for htx...
Completed scraping for htx
Accessing page for jax...
Completed scraping for jax
Accessing page for clt...
Completed scraping for clt
Accessing page for oti...
Completed scraping for oti
Accessing page for kan...
Completed scraping for kan
Accessing page for rai...
Completed scraping for rai
Accessing page for den...
Completed scraping for den
Accessing page for sdg...
Completed scraping for sdg
Accessing page for dal...
Completed scraping for dal
Accessing page for phi...
Completed scraping for phi
Accessing page for nyg...
Completed scraping f

In [52]:
nfl_df = pd.DataFrame({
    'team_name': team_names_nfl,
    'W': wins_nfl,
    'L': losses_nfl,
    'ppg': ppg_nfl,
    'ppg_a': ppg_a_nfl,
    'ppg_diff': ppg_diff_nfl,
    'pass_ypg': pass_ypg_nfl,
    'rush_ypg': rush_ypg_nfl,
    'off_tds': tds_nfl,
    'rush_tds': rush_tds_nfl,
    'pass_tds': pass_tds_nfl
})

In [53]:
nfl_df.to_csv('nfl_df_raw.csv', index=False)