In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def fetch_defense_data_for_year(year):
    # URL of the page for the specific year
    url = f'https://www.pro-football-reference.com/years/{year}/opp.htm'
    
    # Fetch the page
    response = requests.get(url)
    html = response.text

    # Parse the HTML content
    soup = BeautifulSoup(html, 'html.parser')

    # Find the table by a specific ID or a recognizable feature
    table = soup.find('table', id='team_stats')  # Adjust if necessary

    # Extract headers from the table, assuming they are in the last row of the table header section
    header_row = table.find('thead').find_all('tr')[-1]
    headers = [th.get_text() for th in header_row.find_all('th')]
    if headers[0] == 'Rk':
        headers = headers[1:]  # Remove 'Rk' if it's the first header and you don't need it

    # Extract row data
    rows = []
    for row in table.find('tbody').find_all('tr'):
        cols = row.find_all('td')
        if cols:  # Ensure the row is not empty
            row_data = [col.get_text() for col in cols]
            rows.append(row_data)

    # Create DataFrame for the current year
    defense_data = pd.DataFrame(rows, columns=headers)
    defense_data['Year'] = year  # Add a year column to differentiate data by year

    return defense_data

# List of years to iterate over
years = range(2016, 2024)

# Initialize an empty DataFrame to store all defense data
dst_data = pd.DataFrame()

# Fetch and combine data for all specified years using a for loop
for year in years:
    year_data = fetch_defense_data_for_year(year)
    dst_data = pd.concat([dst_data, year_data], ignore_index=True)
    time.sleep(3)

dst_data = dst_data.rename(columns={'Tm': 'team', 'Year': 'season'})
dst_data


Unnamed: 0,team,G,PA,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,TD,Y/A,1stD.1,Pen,Yds.1,1stPy,Sc%,TO%,EXP,season
0,New England Patriots,16,250,5223,998,5.2,23,10,294,368,...,6,3.9,81,113,930,23,26.8,11.2,108.15,2016
1,New York Giants,16,284,5435,1062,5.1,25,8,298,369,...,10,3.6,82,104,853,25,29.9,12.2,98.26,2016
2,Seattle Seahawks,16,292,5099,1020,5.0,19,8,307,331,...,16,3.4,94,103,925,33,28.3,10.3,69.70,2016
3,Denver Broncos,16,297,5057,1076,4.7,27,13,306,306,...,15,4.3,101,110,990,36,29.4,12.2,144.49,2016
4,Dallas Cowboys,16,306,5503,1009,5.5,20,11,330,425,...,9,3.9,79,90,784,25,35.1,11.3,-27.42,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,Indianapolis Colts,17,415,5947,1137,5.2,24,9,346,370,...,22,4.1,123,100,843,31,38.9,11.8,-38.18,2023
252,Carolina Panthers,17,416,4997,1016,4.9,11,3,311,309,...,25,4.1,122,89,679,27,37.7,5.5,-56.65,2023
253,Philadelphia Eagles,17,428,6054,1105,5.5,18,9,364,425,...,13,4.3,111,107,810,31,41.1,9.7,-121.62,2023
254,Arizona Cardinals,17,455,6047,1060,5.7,17,6,369,347,...,19,4.7,141,92,765,30,44.0,8.6,-160.50,2023


In [31]:
import requests
from bs4 import BeautifulSoup

# Corrections dictionary for team names
corrections = {
    'Indianapolis Colts': 'clt', 'New England Patriots': 'nwe', 'Tampa Bay Buccaneers': 'tam',
    'New Orleans Saints': 'nor', 'Los Angeles Rams': 'ram', 'St. Louis Rams': 'ram',
    'Arizona Cardinals': 'crd', 'Green Bay Packers': 'gnb', 'Houston Texans': 'htx',
    'Las Vegas Raiders': 'rai', 'Oakland Raiders': 'rai', 'Baltimore Ravens': 'rav',
    'Los Angeles Chargers': 'sdg', 'San Diego Chargers': 'sdg', 'Kansas City Chiefs': 'kan',
    'Tennessee Titans': 'oti', 'San Francisco 49ers': 'sfo', 'Atlanta Falcons': 'atl',
    'Buffalo Bills': 'buf', 'Carolina Panthers': 'car', 'Chicago Bears': 'chi',
    'Cincinnati Bengals': 'cin', 'Cleveland Browns': 'cle', 'Dallas Cowboys': 'dal',
    'Denver Broncos': 'den', 'Detroit Lions': 'det', 'Jacksonville Jaguars': 'jax',
    'Miami Dolphins': 'mia', 'Minnesota Vikings': 'min', 'New York Giants': 'nyg',
    'New York Jets': 'nyj', 'Philadelphia Eagles': 'phi', 'Pittsburgh Steelers': 'pit',
    'Seattle Seahawks': 'sea', 'Washington Commanders': 'was', 'Washington Redskins': 'was',
    'Washington Football Team': 'was'
}

def fetch_dst_total_scores(year):
    url = f'https://www.fantasypros.com/nfl/reports/leaders/dst.php?year={year}'
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises HTTPError for bad responses
        data = response.text
    except requests.RequestException as e:
        print(f"Error fetching data for {year}: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on error

    soup = BeautifulSoup(data, 'html.parser')
    table = soup.find('table')
    if not table:
        print(f"No table found for {year}")
        return pd.DataFrame()

    headers = [th.get_text() for th in table.find('thead').find_all('tr')[-1].find_all('th')]
    if headers[0] == 'Rk':
        headers = headers[1:]  # Adjust header list if 'Rk' is not required

    years, teams, total_scores = [], [], []
    for row in table.find('tbody').find_all('tr'):
        cols = row.find_all('td')
        if cols:
            team_name_element = row.find('a', class_='player-name')
            team_name = corrections.get(team_name_element.text.strip(), team_name_element.text.strip()) if team_name_element else None
            total_score = float(cols[-1].text.strip()) if cols[-1].text.strip().isdigit() else None
            years.append(year)
            teams.append(team_name)
            total_scores.append(total_score)

    year_df = pd.DataFrame({
        'season': years,
        'team': teams,
        'fantasy_points': total_scores
    })
    return year_df

# Prepare to collect data over multiple years
all_years_data = pd.DataFrame()
for year in range(2016, 2024):
    yearly_data = fetch_dst_total_scores(year)
    all_years_data = pd.concat([all_years_data, yearly_data], ignore_index=True)

# Display initial data
print(all_years_data.head())

def apply_corrections(df, corrections_dict):
    # Apply corrections to the 'team' column in the dataframe
    df['team'] = df['team'].apply(lambda x: corrections_dict.get(x, x))
    return df

# Apply team name corrections to all_years_data
all_years_data = apply_corrections(all_years_data, corrections)
dst_data = apply_corrections(dst_data, corrections)
# Assuming dst_data exists and is prepared for merging
dst_data = pd.merge(dst_data, all_years_data, on=['team', 'season'], how='left')
print(dst_data.head())

   season team  fantasy_points
0    2016  den           152.0
1    2016  kan           148.0
2    2016  crd           146.0
3    2016  nyg           141.0
4    2016  min           139.0
  team   G   PA   Yds   Ply  Y/P  TO  FL 1stD  Cmp  ...  Y/A 1stD  Pen  Yds  \
0  nwe  16  250  5223   998  5.2  23  10  294  368  ...  3.9   81  113  930   
1  nyg  16  284  5435  1062  5.1  25   8  298  369  ...  3.6   82  104  853   
2  sea  16  292  5099  1020  5.0  19   8  307  331  ...  3.4   94  103  925   
3  den  16  297  5057  1076  4.7  27  13  306  306  ...  4.3  101  110  990   
4  dal  16  306  5503  1009  5.5  20  11  330  425  ...  3.9   79   90  784   

  1stPy   Sc%   TO%     EXP season fantasy_points  
0    23  26.8  11.2  108.15   2016          126.0  
1    25  29.9  12.2   98.26   2016          141.0  
2    33  28.3  10.3   69.70   2016          116.0  
3    36  29.4  12.2  144.49   2016          152.0  
4    25  35.1  11.3  -27.42   2016           91.0  

[5 rows x 29 columns]


In [32]:
dst_data.to_csv("dst_data.csv")
dst_data

Unnamed: 0,team,G,PA,Yds,Ply,Y/P,TO,FL,1stD,Cmp,...,Y/A,1stD.1,Pen,Yds.1,1stPy,Sc%,TO%,EXP,season,fantasy_points
0,nwe,16,250,5223,998,5.2,23,10,294,368,...,3.9,81,113,930,23,26.8,11.2,108.15,2016,126.0
1,nyg,16,284,5435,1062,5.1,25,8,298,369,...,3.6,82,104,853,25,29.9,12.2,98.26,2016,141.0
2,sea,16,292,5099,1020,5.0,19,8,307,331,...,3.4,94,103,925,33,28.3,10.3,69.70,2016,116.0
3,den,16,297,5057,1076,4.7,27,13,306,306,...,4.3,101,110,990,36,29.4,12.2,144.49,2016,152.0
4,dal,16,306,5503,1009,5.5,20,11,330,425,...,3.9,79,90,784,25,35.1,11.3,-27.42,2016,91.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,clt,17,415,5947,1137,5.2,24,9,346,370,...,4.1,123,100,843,31,38.9,11.8,-38.18,2023,139.0
252,car,17,416,4997,1016,4.9,11,3,311,309,...,4.1,122,89,679,27,37.7,5.5,-56.65,2023,71.0
253,phi,17,428,6054,1105,5.5,18,9,364,425,...,4.3,111,107,810,31,41.1,9.7,-121.62,2023,101.0
254,crd,17,455,6047,1060,5.7,17,6,369,347,...,4.7,141,92,765,30,44.0,8.6,-160.50,2023,65.0
