## Overview
This script analyzes how NFL offenses have changed over time, and attempts to analyze different trends in how different stats can be impact a teams record. The was collected from NFL Reference using web scraping techniques in Python such as BeatifulSoup and Selenium web drivers.

Loading Team Data from JSON File

In [2]:
import json

data_file = 'team_stats_2003_2022.json'
with open(data_file, 'r') as in_file:
    team_data_dict = json.load(in_file)

Passing to Rushing Ratio over last 20 years

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates

years = list(range(2003, 2023))
pass_to_rush_yds_ratio = []
for year in team_data_dict:
    total_passing_yards = 0
    total_rushing_yards = 0
    team_yds_ratio = []
    for team in team_data_dict[year]:
        team_yds_ratio.append(float(team_data_dict[year][team]["pass_yds"])/float(team_data_dict[year][team]["rush_yds"]))
    pass_to_rush_yds_ratio.append(sum(team_yds_ratio)/len(team_yds_ratio))
df = pd.DataFrame({"Years": years, "Pass:Rush Yds Ratio": pass_to_rush_yds_ratio})
df['Years'] = pd.to_datetime(df['Years'], format='%Y')

sns.set(style="whitegrid")
sns.lineplot(x="Years", y="Pass:Rush Yds Ratio", data=df, linewidth=2.5, marker='o', color='blue')
plt.title('Passing to Rushing Yard Ratio by Year', fontsize=15)
plt.xlabel('Year', fontsize=12)
plt.ylabel('Avg Pass:Rushing Yds', fontsize=12)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.xticks(rotation=45)
plt.show()

In [None]:
import statsmodels.api as sm 

plt.clf()

team_yds_ratio = []
team_win_perc = []
team_year = []
for year in team_data_dict:
    for team in team_data_dict[year]:
        team_yds_ratio.append(float(team_data_dict[year][team]["pass_yds"])/float(team_data_dict[year][team]["rush_yds"]))
        team_win_perc.append(float(team_data_dict[year][team]["win_loss_perc"]))
        team_year.append(f"{year} {team}")
df = pd.DataFrame({"Team": team_year, "Pass:Rush Yds": team_yds_ratio, "Win %": team_win_perc})        
X = sm.add_constant(df["Pass:Rush Yds"])
model = sm.OLS(df["Win %"], X)
results = model.fit()

print(results.summary())

sns.set(style="whitegrid")
sns.scatterplot(x="Pass:Rush Yds", y="Win %", data=df, color='blue')
plt.title('Pass:Rush vs Win Percentage', fontsize=15)
plt.xlabel('Pass:Rush Yds', fontsize=12)
plt.ylabel('Win %', fontsize=12)
plt.xticks(rotation=45)
plt.show()