# <center>Scraping NFL Vegas Line Data (2025 Season)</center>

In [1]:
# Import Python Libraries

import numpy as np
import pandas as pd
import random
import time

In [2]:
# Create list of teams for Pro-Football-Reference Datapull

teams = [
        'crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle',
        'dal', 'den', 'det', 'gnb', 'htx', 'clt', 'jax', 'kan',
        'sdg', 'ram', 'rai', 'mia', 'min', 'nwe', 'nor', 'nyg',
        'nyj', 'phi', 'pit', 'sfo', 'sea', 'tam', 'oti', 'was'
]

print(f'number of teams = {len(teams)}')

number of teams = 32


In [5]:
# Scrape the NFL Vegas line Data

# Get the starting time
start_time = time.time()

# Create the list of seasons to download - change to 2026 when wanting to grab 2025 season
seasons = range(2015, 2025)

# Create an empty dataframe to append
veg_df = pd.DataFrame()

# Iterate through the seasons
for season in seasons:
    # Iterate through the teams
    for team in teams:
        # Set URL
        url = 'https://www.pro-football-reference.com/teams/' + team + '/' + str(season) + '_lines.htm'
        print(url)

        # Get the Vegas Lines (from 'vegas_lines' table)
        lines_df = pd.read_html(url, header=0, attrs={'id':'vegas_lines'})[0]

        # Insert the Season and Team Columns
        lines_df.insert(loc=0, column='Season', value=season)
        lines_df.insert(loc=1, column='Team', value=team.upper())

        # Concatenate the team lines dataframe to the aggregate dataframe (along rows, axis = 0)
        veg_df = pd.concat([veg_df, lines_df], ignore_index=True)

        # Pause the program to abide by the website's rules on (FBref and Stathead sites no more than 10 requests in a minute)
        time.sleep(random.randint(4, 5))

# Get the ending time
end_time = time.time()

# Display the elapsed time
elapsed_time = end_time - start_time

# Display the aggregate dataframe information
print(veg_df.info())

https://www.pro-football-reference.com/teams/crd/2015_lines.htm
https://www.pro-football-reference.com/teams/atl/2015_lines.htm
https://www.pro-football-reference.com/teams/rav/2015_lines.htm
https://www.pro-football-reference.com/teams/buf/2015_lines.htm
https://www.pro-football-reference.com/teams/car/2015_lines.htm
https://www.pro-football-reference.com/teams/chi/2015_lines.htm
https://www.pro-football-reference.com/teams/cin/2015_lines.htm
https://www.pro-football-reference.com/teams/cle/2015_lines.htm
https://www.pro-football-reference.com/teams/dal/2015_lines.htm
https://www.pro-football-reference.com/teams/den/2015_lines.htm
https://www.pro-football-reference.com/teams/det/2015_lines.htm
https://www.pro-football-reference.com/teams/gnb/2015_lines.htm
https://www.pro-football-reference.com/teams/htx/2015_lines.htm
https://www.pro-football-reference.com/teams/clt/2015_lines.htm
https://www.pro-football-reference.com/teams/jax/2015_lines.htm
https://www.pro-football-reference.com/t

In [6]:
# Clean the data

# Drop some columns from the 'veg_df' dataframe
veg_df = veg_df.drop(veg_df.columns[6:], axis=1)

# Rename some columns
veg_df = veg_df.rename(columns={'G#':'Gtm', 'Over/Under':'Total'})

# Drop rows that are in the post-season
veg_df = veg_df.query('(Season <= 2020 and Gtm < 17) or (Season >=2021 and Gtm < 18)')

# Display the info
print(veg_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 5246 entries, 0 to 5482
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Season  5246 non-null   int64  
 1   Team    5246 non-null   object 
 2   Gtm     5246 non-null   int64  
 3   Opp     5246 non-null   object 
 4   Spread  5246 non-null   float64
 5   Total   5246 non-null   float64
dtypes: float64(2), int64(2), object(2)
memory usage: 286.9+ KB
None


In [7]:
# Save the downloaded data to a CSV File
veg_df.to_csv('nfl_vegas_lines_2015-2024.csv', index=False)