In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 1. Load Data
df23 = pd.read_csv('data/nfl_2023_season.csv')
df24 = pd.read_csv('data/nfl_2024_season.csv')

# 2. Feature Engineering
def create_features(df):
    df['PointDifferential'] = df['Points'] - df['PointsAgainst']
    df['WinPct'] = df['Wins'] / (df['Wins'] + df['Losses'] + df['Ties'])
    df['TotalGames'] = df['Wins'] + df['Losses'] + df['Ties']
    return df

df23 = create_features(df23)
df24 = create_features(df24)

# Merge to track improvement
merged = pd.merge(df23, df24, on='Team', suffixes=('_2023', '_2024'))
merged['YearOverYear_WinChange'] = merged['Wins_2024'] - merged['Wins_2023']
merged['YearOverYear_PtDiffChange'] = merged['PointDifferential_2024'] - merged['PointDifferential_2023']

# 3. Prepare Training Data
features = [
    'Points_2023', 'PointsAgainst_2023', 'Yards_2023', 'Turnovers_2023', 'PointDifferential_2023', 'WinPct_2023',
    'Points_2024', 'PointsAgainst_2024', 'Yards_2024', 'Turnovers_2024', 'PointDifferential_2024', 'WinPct_2024',
    'YearOverYear_WinChange', 'YearOverYear_PtDiffChange'
]
X = merged[features]
y = merged['Wins_2024']

# 4. EDA
plt.figure(figsize=(8,4))
sns.histplot(y, bins=8)
plt.title('2024 Team Wins Distribution')
plt.show()

sns.heatmap(merged[features + ['Wins_2024']].corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlations')
plt.show()

# 5. Modeling
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)

# Example simulation for 2025 (small tweaks to stats)
df25 = df24.copy()
df25['Points'] += np.random.randint(-20, 20, size=len(df25))
df25['PointsAgainst'] += np.random.randint(-15, 15, size=len(df25))
df25['Yards'] += np.random.randint(-80, 80, size=len(df25))
df25['Turnovers'] += np.random.randint(-1, 2, size=len(df25))

df25 = create_features(df25)
df25['YearOverYear_WinChange'] = df25['Wins'] - df24['Wins']
df25['YearOverYear_PtDiffChange'] = df25['PointDifferential'] - df24['PointDifferential']

X25 = df25[[
    'Points', 'PointsAgainst', 'Yards', 'Turnovers', 'PointDifferential', 'WinPct',
    'Points', 'PointsAgainst', 'Yards', 'Turnovers', 'PointDifferential', 'WinPct',
    'YearOverYear_WinChange', 'YearOverYear_PtDiffChange'
]]

df25['Projected_Wins_2025'] = rf.predict(X25)

# Visualization
plt.figure(figsize=(10,5))
sns.barplot(x='Team', y='Projected_Wins_2025', data=df25.sort_values('Projected_Wins_2025', ascending=False))
plt.xticks(rotation=75)
plt.title('2025 NFL Projected Wins per Team')
plt.ylabel('Projected Wins')
plt.show()

# Top 5
print("Top 5 Projected Teams for 2025:")
print(df25[['Team', 'Projected_Wins_2025']].sort_values('Projected_Wins_2025', ascending=False).head(5))
