In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [2]:
# Create a base url
base_url = 'https://www.spotrac.com/nfl/{team}/cap/_/year/{year}/sort/cap_total_top51/dir/desc'

In [3]:
# List of team names
teams = [
    'arizona-cardinals', 'atlanta-falcons', 'baltimore-ravens', 'buffalo-bills', 'carolina-panthers', 'chicago-bears', 'cincinnati-bengals',
    'cleveland-browns', 'dallas-cowboys', 'denver-broncos', 'detroit-lions', 'green-bay-packers', 'houston-texans', 'indianapolis-colts', 
    'jacksonville-jaguars', 'kansas-city-chiefs', 'los-angeles-chargers', 'los-angeles-rams', 'las-vegas-raiders', 'miami-dolphins', 
    'minnesota-vikings', 'new-england-patriots', 'new-orleans-saints', 'new-york-giants', 'new-york-jets', 'philadelphia-eagles', 
    'pittsburgh-steelers', 'seattle-seahawks', 'san-francisco-49ers', 'tampa-bay-buccaneers', 'tennessee-titans', 'washington-commanders'
]

# List of years
years = [
    2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023
]

In [4]:
# Initialize empty list for storing data
all_data = []

# Loop through each year and team
for year in years:
    for team in teams:
        url = base_url.format(team=team, year=year)
        print(f'Fetching data for {team} in {year} from URL: {url}')

        # Send a GET request to fetch the webpage content
        response = requests.get(url)
        webpage = response.content

        # Parse through the webpage content with Beautiful Soup
        soup = BeautifulSoup(webpage, 'html.parser')

        # Select the target table
        table = soup.find('table', class_ = 'table table-internal-sort rounded-top mt-2 mb-0')

        # Locate all rows in the table
        rows = table.find_all('tr')

        # Loop through each row and extract the data
        for row in rows:
            cells = row.find_all('td')
            if len(cells) > 0:
                player = cells[0].find('a').text.strip()
                position = cells[1].find('span').text.strip()
                age = cells[2].find('span').text.strip()
                cap_hit = cells[3].find('span').text.strip()
                cap_hit_percent = cells[4].find('span').text.strip()
                dead_cap = cells[5].find('span').text.strip()
                base_salary = cells[6].find('span').text.strip()
                signing_bonus = cells[7].find('span').text.strip()
                per_game_bonus = cells[8].find('span').text.strip()
                roster_bonus = cells[9].find('span').text.strip()
                option_bonus = cells[10].find('span').text.strip()
                workout_bonus = cells[11].find('span').text.strip()
                restructure_proration = cells[12].find('span').text.strip()
                incentives_likely = cells[13].find('span').text.strip()
                team = team
                year = year

                all_data.append([
                    player, position, age, cap_hit, cap_hit_percent, dead_cap, base_salary, signing_bonus, per_game_bonus, roster_bonus, option_bonus, 
                    workout_bonus, restructure_proration, incentives_likely, team, year])

Fetching data for arizona-cardinals in 2011 from URL: https://www.spotrac.com/nfl/arizona-cardinals/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for atlanta-falcons in 2011 from URL: https://www.spotrac.com/nfl/atlanta-falcons/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for baltimore-ravens in 2011 from URL: https://www.spotrac.com/nfl/baltimore-ravens/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for buffalo-bills in 2011 from URL: https://www.spotrac.com/nfl/buffalo-bills/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for carolina-panthers in 2011 from URL: https://www.spotrac.com/nfl/carolina-panthers/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for chicago-bears in 2011 from URL: https://www.spotrac.com/nfl/chicago-bears/cap/_/year/2011/sort/cap_total_top51/dir/desc
Fetching data for cincinnati-bengals in 2011 from URL: https://www.spotrac.com/nfl/cincinnati-bengals/cap/_/year/2011/sort/cap_total_top51/dir/des

In [5]:
# Initialize empty list for column names
columns = []

# Find the correct tags and extract column names
if table:
    th_tags = table.find_all('th')

    for th in th_tags:
        head_text = th.text.strip().split('\n')[0]
        columns.append(head_text)

# Remove whitespace from column names
columns[0] = columns[0].split(' ')[0]

# Add 'Team' and 'Year' columns
columns.append('Team')
columns.append('Year')

# Create DataFrame using scraped data
df = pd.DataFrame(all_data, columns=columns)

In [6]:
# Save DataFrame as excel file
df.to_excel('nfl_team_cap_data.xlsx', index=False)