In [22]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Define the teams and years to loop through
teams = ["crd", "atl", "rav", "buf", "car", "chi", "cin", "cle", "dal", "den", "det", "gnb", "htx", "clt", "jax", "kan", "rai", "sdg", "ram", "mia", "min", "nwe", "nor", "nyg", "nyj", "phi", "pit", "sfo", "sea", "tam", "oti", "was"]
years = ["2013","2014","2015","2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"]

# Function to scrape data
def scrape_data(team, year):
    # Adjust URL to include the team and year dynamically
    url = f"https://www.pro-football-reference.com/teams/{team}/{year}.htm"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        # Extract the specific statistics
        stats = {
            'team': team,
            'year': year,
            'offensive_points_per_drive': None,
            'defensive_points_per_drive': None,
            'total_offense_points': None,
            'team_rush_yards': None,
            'team_total_yards': None,
            'team_passing_yards': None
        }
        
        body = soup.find('tbody')
        if body:
            for row in body.find_all('tr'):
                data_stat = row.find('th', {"scope": "row"})
                if data_stat and data_stat.text == "Team Stats":
                    stats['offensive_points_per_drive'] = row.find('td', {'data-stat': 'points_avg'}).text
                    stats['total_offense_points'] = row.find('td', {'data-stat': 'points'}).text
                    stats['team_total_yards'] = row.find('td', {'data-stat': 'total_yards'}).text
                    stats['team_passing_yards'] = row.find('td', {'data-stat': 'pass_yds'}).text
                    stats['team_rush_yards'] = row.find('td', {'data-stat': 'rush_yds'}).text
                elif data_stat and data_stat.text == "Opp. Stats":
                    stats['defensive_points_per_drive'] = row.find('td', {'data-stat': 'points_avg'}).text
                
        return stats
    else:
        print(f"Failed to retrieve data for {team} {year}")
        return None

# Loop through teams and years
all_data = []
for team in teams:
    for year in years:
        print(f"Scraping data for {team} {year}...")
        data = scrape_data(team, year)
        if data:
            all_data.append(data)
        time.sleep(4)  # Respectful delay to avoid overloading the server

# Create a DataFrame from the collected data
AVgrades = pd.DataFrame(all_data)

# Print the DataFrame
print(AVgrades)

# Save the DataFrame to a CSV file if needed
# df.to_csv('NFL_team_stats.csv', index=False)


Scraping data for crd 2013...
Scraping data for crd 2014...
Scraping data for crd 2015...
Scraping data for crd 2016...
Scraping data for crd 2017...
Scraping data for crd 2018...
Scraping data for crd 2019...
Scraping data for crd 2020...
Scraping data for crd 2021...
Scraping data for crd 2022...
Scraping data for crd 2023...
Scraping data for atl 2013...
Scraping data for atl 2014...
Scraping data for atl 2015...
Scraping data for atl 2016...
Scraping data for atl 2017...
Scraping data for atl 2018...
Scraping data for atl 2019...
Scraping data for atl 2020...
Scraping data for atl 2021...
Scraping data for atl 2022...
Scraping data for atl 2023...
Scraping data for rav 2013...
Scraping data for rav 2014...
Scraping data for rav 2015...
Scraping data for rav 2016...
Scraping data for rav 2017...
Scraping data for rav 2018...
Scraping data for rav 2019...
Scraping data for rav 2020...
Scraping data for rav 2021...
Scraping data for rav 2022...
Scraping data for rav 2023...
Scraping d

KeyboardInterrupt: 

In [None]:
AVgrades = AVgrades.sort_values(by=['year','team'])
# List of columns to convert
columns_to_convert = ['year', 'offensive_points_per_drive', 'defensive_points_per_drive', 'total_offense_points', 'team_rush_yards', 'team_total_yards', 'team_passing_yards']

# Converting columns
AVgrades[columns_to_convert] = AVgrades[columns_to_convert].apply(pd.to_numeric, errors='coerce')

AVgrades['league_avg_offensive_points_per_drive'] = AVgrades.groupby('year')['offensive_points_per_drive'].transform('mean')
AVgrades['league_avg_defensive_points_per_drive'] = AVgrades.groupby('year')['defensive_points_per_drive'].transform('mean')

AVgrades["team_offense_points"] = (100 * AVgrades['offensive_points_per_drive']) / AVgrades['league_avg_offensive_points_per_drive']
AVgrades["team_points_for_o_line"] = (5/11) * AVgrades["team_offense_points"]
AVgrades["team_points_for_skill_positions"] = AVgrades["team_offense_points"] - AVgrades["team_points_for_o_line"]
AVgrades["team_points_for_rushers"] = AVgrades["team_points_for_skill_positions"] * 0.22 * (AVgrades["team_rush_yards"]/AVgrades["team_total_yards"]) / 0.37
AVgrades["team_points_for_passers"] = (AVgrades["team_points_for_skill_positions"] - AVgrades["team_points_for_rushers"]) * 0.26
AVgrades["team_points_for_receivers"] = (AVgrades["team_points_for_skill_positions"] - AVgrades["team_points_for_rushers"]) * 0.74
AVgrades["M"] = AVgrades["defensive_points_per_drive"] / AVgrades['league_avg_defensive_points_per_drive']
AVgrades["team_defense_points"] = 100 * ((1+2*AVgrades["M"]-AVgrades["M"]**2) / (2*AVgrades["M"]))
unnamed_cols = [col for col in AVgrades.columns if 'Unnamed' in col]
AVgrades = AVgrades.drop(columns=unnamed_cols)

AVgrades


Unnamed: 0,team,year,offensive_points_per_drive,defensive_points_per_drive,total_offense_points,team_rush_yards,team_total_yards,team_passing_yards,league_avg_offensive_points_per_drive,league_avg_defensive_points_per_drive,team_offense_points,team_points_for_o_line,team_points_for_skill_positions,team_points_for_rushers,team_points_for_passers,team_points_for_receivers,M,team_defense_points
11,atl,2013,1.85,2.38,353,1247,5490,4243,1.81375,1.810000,101.998622,46.363010,55.635612,7.513944,12.511634,35.610034,1.314917,72.279354
33,buf,2013,1.48,1.68,339,2307,5410,3103,1.81375,1.810000,81.598897,37.090408,44.508489,11.285327,8.638022,24.585140,0.928177,107.460208
44,car,2013,1.99,1.33,366,2026,5069,3043,1.81375,1.810000,109.717436,49.871562,59.845874,14.222381,11.862108,33.761385,0.734807,131.304781
55,chi,2013,2.16,2.36,445,1828,6109,4281,1.81375,1.810000,119.090283,54.131947,64.958336,11.557447,13.884231,39.516658,1.303867,73.154087
66,cin,2013,1.91,1.39,430,1755,5891,4136,1.81375,1.810000,105.306685,47.866675,57.440010,10.174746,12.288969,34.976296,0.767956,126.710124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,sdg,2023,1.78,2.01,346,1642,5599,3957,1.89000,1.884688,94.179894,42.809043,51.370851,8.957780,11.027398,31.385673,1.066490,93.558283
318,sea,2023,1.93,2.20,364,1580,5490,3910,1.89000,1.884688,102.116402,46.416546,55.699856,9.531467,12.003781,34.164607,1.167302,84.468693
307,sfo,2023,2.70,1.71,491,2389,6773,4384,1.89000,1.884688,142.857143,64.935065,77.922078,16.342427,16.010709,45.568942,0.907312,109.742211
329,tam,2023,1.85,1.78,348,1509,5321,3812,1.89000,1.884688,97.883598,44.492544,53.391053,9.002962,11.540904,32.847187,0.944454,105.717977


In [None]:
AVgrades["team_receiving_yards"] = AVgrades["team_passing_yards"]

In [None]:
AVgrades = AVgrades.rename(columns={'team_points_for_o_line': 'oline', 'team_points_for_rushers': 'rb', 'team_points_for_passers': 'qb', 'team_points_for_receivers':'wrte', 'team_defense_points':'dst', 'year':'season'})
AVgrades.to_csv("AVgrades.csv")

In [None]:
AVgrades1 = AVgrades[['team','oline','qb','rb','wrte','dst','season']]
AVgrades1.to_csv("AVbyPositionGroup.csv")