<a href="https://colab.research.google.com/github/natelove02/NFL-Model/blob/main/NFL_Betting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datetime import datetime

def convert_time_to_seconds(time_str):
    """Convert MM:SS format to seconds"""
    try:
        minutes, seconds = map(int, time_str.split(':'))
        return minutes * 60 + seconds
    except:
        return 0

def convert_game_time_to_hours(time_str):
    """Convert game time to 24-hour format decimal"""
    try:
        time_str = time_str.strip().upper()
        time = pd.to_datetime(time_str, format='%I:%M %p').time()
        return time.hour + time.minute/60.0
    except:
        return 0

class TeamStatsProcessor:
    def __init__(self, csv_path, years_of_data=3):
        self.data = pd.read_csv(csv_path)

        # Convert date and time columns
        self.data['date'] = pd.to_datetime(self.data['date'])
        self.data['game_hour'] = self.data['time_et'].apply(convert_game_time_to_hours)

        # Create time brackets
        self.data['time_bracket'] = pd.cut(
            self.data['game_hour'],
            bins=[0, 14, 17, 20, 24],
            labels=['morning', 'afternoon', 'evening', 'night']
        )

        # Filter for last N years if specified
        if years_of_data:
            latest_date = self.data['date'].max()
            cutoff_date = latest_date - pd.DateOffset(years=years_of_data)
            self.data = self.data[self.data['date'] >= cutoff_date]
            print(f"Using data from {cutoff_date.strftime('%Y-%m-%d')} to {latest_date.strftime('%Y-%m-%d')}")

        print(f"Total games in dataset: {len(self.data)}")
        self.team_stats = {}
        self.calculate_team_stats()

    def calculate_team_stats(self):
        """Calculate average statistics for each team from historical data"""
        # Convert possession time to seconds
        self.data['possession_away'] = self.data['possession_away'].apply(convert_time_to_seconds)
        self.data['possession_home'] = self.data['possession_home'].apply(convert_time_to_seconds)

        offensive_stats = [
            'yards', 'pass_yards', 'rush_yards', 'first_downs',
            'third_down_comp', 'third_down_att', 'possession'
        ]

        teams = pd.concat([self.data['away'], self.data['home']]).unique()
        print(f"\nTeams in dataset: {len(teams)}")

        for team in teams:
            self.team_stats[team] = {}

            away_games = self.data[self.data['away'] == team]
            home_games = self.data[self.data['home'] == team]

            # Calculate time-based performance
            for time_bracket in ['morning', 'afternoon', 'evening', 'night']:
                away_time_games = away_games[away_games['time_bracket'] == time_bracket]
                home_time_games = home_games[home_games['time_bracket'] == time_bracket]

                total_time_games = len(away_time_games) + len(home_time_games)
                if total_time_games > 0:
                    away_time_wins = away_time_games[away_time_games['score_away'] > away_time_games['score_home']].shape[0]
                    home_time_wins = home_time_games[home_time_games['score_home'] > home_time_games['score_away']].shape[0]
                    self.team_stats[team][f'{time_bracket}_win_pct'] = (away_time_wins + home_time_wins) / total_time_games
                else:
                    self.team_stats[team][f'{time_bracket}_win_pct'] = 0.5

            # Offensive stats
            for stat in offensive_stats:
                away_val = away_games[f'{stat}_away'].mean()
                home_val = home_games[f'{stat}_home'].mean()
                self.team_stats[team][stat] = np.mean([away_val, home_val])

            # Defensive stats
            for stat in ['yards', 'pass_yards', 'rush_yards', 'first_downs']:
                away_def = away_games[f'{stat}_home'].mean()
                home_def = home_games[f'{stat}_away'].mean()
                self.team_stats[team][f'{stat}_allowed'] = np.mean([away_def, home_def])

            # Calculate winning percentage
            away_wins = away_games[away_games['score_away'] > away_games['score_home']].shape[0]
            home_wins = home_games[home_games['score_home'] > home_games['score_away']].shape[0]
            total_games = away_games.shape[0] + home_games.shape[0]

            self.team_stats[team]['win_pct'] = (away_wins + home_wins) / total_games if total_games > 0 else 0.5

    def get_game_features(self, away_team, home_team, game_time):
        """Get features for a specific matchup including game time"""
        if away_team not in self.team_stats or home_team not in self.team_stats:
            raise ValueError("One or both teams not found in historical data")

        # Convert game time to decimal hours and determine time bracket
        game_hour = convert_game_time_to_hours(game_time)
        if game_hour < 14:
            time_bracket = 'morning'
        elif game_hour < 17:
            time_bracket = 'afternoon'
        elif game_hour < 20:
            time_bracket = 'evening'
        else:
            time_bracket = 'night'

        away_stats = self.team_stats[away_team]
        home_stats = self.team_stats[home_team]

        feature_dict = {
            'game_hour': game_hour,
            'away_time_win_pct': away_stats[f'{time_bracket}_win_pct'],
            'home_time_win_pct': home_stats[f'{time_bracket}_win_pct'],
            'away_yards_per_game': away_stats['yards'],
            'home_yards_per_game': home_stats['yards'],
            'away_pass_yards_per_game': away_stats['pass_yards'],
            'home_pass_yards_per_game': home_stats['pass_yards'],
            'away_rush_yards_per_game': away_stats['rush_yards'],
            'home_rush_yards_per_game': home_stats['rush_yards'],
            'away_def_yards_per_game': away_stats['yards_allowed'],
            'home_def_yards_per_game': home_stats['yards_allowed'],
            'away_win_pct': away_stats['win_pct'],
            'home_win_pct': home_stats['win_pct'],
            'away_third_down_pct': away_stats['third_down_comp'] / away_stats['third_down_att'] if away_stats['third_down_att'] > 0 else 0,
            'home_third_down_pct': home_stats['third_down_comp'] / home_stats['third_down_att'] if home_stats['third_down_att'] > 0 else 0,
            'away_possession': away_stats['possession'] / 60,
            'home_possession': home_stats['possession'] / 60,
        }

        return pd.Series(feature_dict)

class NFLPredictor(nn.Module):
    def __init__(self, input_size):
        super(NFLPredictor, self).__init__()
        self.layer1 = nn.Linear(input_size, 64)
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.dropout(self.relu(self.layer1(x)))
        x = self.dropout(self.relu(self.layer2(x)))
        x = self.sigmoid(self.layer3(x))
        return x

def prepare_training_data(stats_processor):
    """Prepare training data from all historical matchups"""
    all_features = []
    all_labels = []

    for _, game in stats_processor.data.iterrows():
        try:
            features = stats_processor.get_game_features(game['away'], game['home'], game['time_et'])
            label = 1 if game['score_away'] > game['score_home'] else 0
            all_features.append(features)
            all_labels.append(label)
        except ValueError:
            continue

    X = pd.DataFrame(all_features)
    y = np.array(all_labels)

    return X.values, y

def train_model(X, y, epochs=1000, learning_rate=0.001):
    X = torch.FloatTensor(X)
    y = torch.FloatTensor(y).reshape(-1, 1)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = torch.FloatTensor(scaler.fit_transform(X_train))
    X_test = torch.FloatTensor(scaler.transform(X_test))

    model = NFLPredictor(X_train.shape[1])
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        model.train()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        predictions = (test_outputs >= 0.5).float()
        accuracy = (predictions == y_test).float().mean()
        print(f'Test Accuracy: {accuracy.item():.4f}')

    return model, scaler

def predict_game(model, scaler, stats_processor, away_team, home_team, game_time):
    features = stats_processor.get_game_features(away_team, home_team, game_time)
    input_tensor = torch.FloatTensor(scaler.transform([features]))

    model.eval()
    with torch.no_grad():
        prediction = model(input_tensor)
        probability = prediction.item()

    return {
        'winner': away_team if probability >= 0.5 else home_team,
        'win_probability': probability if probability >= 0.5 else 1 - probability,
        'away_win_probability': probability,
        'home_win_probability': 1 - probability
    }

# Example usage
if __name__ == "__main__":
    # Initialize
    csv_path = "NFLstats.csv"
    stats_processor = TeamStatsProcessor(csv_path, years_of_data=1)

    # Prepare and train model
    X, y = prepare_training_data(stats_processor)
    model, scaler = train_model(X, y, epochs=500)

    # Interactive prediction loop
    while True:
        print("\nEnter team names and game time (or 'quit' to exit):")
        away_team = input("Away team: ")
        if away_team.lower() == 'quit':
            break

        home_team = input("Home team: ")
        if home_team.lower() == 'quit':
            break

        game_time = input("Game time (e.g., '1:00 PM', '4:25 PM', '8:20 PM'): ")
        if game_time.lower() == 'quit':
            break

        try:
            prediction = predict_game(model, scaler, stats_processor, away_team, home_team, game_time)
            print(f"\nPrediction for {away_team} @ {home_team} ({game_time} ET):")
            print(f"Predicted Winner: {prediction['winner']}")
            print(f"Win Probability: {prediction['win_probability']:.2%}")
            print(f"\nDetailed Probabilities:")
            print(f"{away_team} Win Probability: {prediction['away_win_probability']:.2%}")
            print(f"{home_team} Win Probability: {prediction['home_win_probability']:.2%}")

            # Print team stats
            away_stats = stats_processor.team_stats[away_team]
            home_stats = stats_processor.team_stats[home_team]

            print(f"\nTeam Stats (Last 3 Years):")
            print(f"{away_team}:")
            print(f"Win %: {away_stats['win_pct']:.1%}")
            print(f"Yards Per Game: {away_stats['yards']:.1f}")

            print(f"\n{home_team}:")
            print(f"Win %: {home_stats['win_pct']:.1%}")
            print(f"Yards Per Game: {home_stats['yards']:.1f}")

        except ValueError as e:
            print(f"Error: {e}")

Using data from 2023-02-11 to 2024-02-11
Total games in dataset: 286

Teams in dataset: 32
Epoch [10/500], Loss: 0.6641
Epoch [20/500], Loss: 0.6281
Epoch [30/500], Loss: 0.5787
Epoch [40/500], Loss: 0.5413
Epoch [50/500], Loss: 0.5019
Epoch [60/500], Loss: 0.4576
Epoch [70/500], Loss: 0.4523
Epoch [80/500], Loss: 0.4160
Epoch [90/500], Loss: 0.4128
Epoch [100/500], Loss: 0.3882
Epoch [110/500], Loss: 0.3571
Epoch [120/500], Loss: 0.3666
Epoch [130/500], Loss: 0.3484
Epoch [140/500], Loss: 0.3229
Epoch [150/500], Loss: 0.3116
Epoch [160/500], Loss: 0.2984
Epoch [170/500], Loss: 0.2956
Epoch [180/500], Loss: 0.2890
Epoch [190/500], Loss: 0.2909
Epoch [200/500], Loss: 0.2790
Epoch [210/500], Loss: 0.2583
Epoch [220/500], Loss: 0.2406
Epoch [230/500], Loss: 0.2426
Epoch [240/500], Loss: 0.2365
Epoch [250/500], Loss: 0.2496
Epoch [260/500], Loss: 0.2106
Epoch [270/500], Loss: 0.2246
Epoch [280/500], Loss: 0.2033
Epoch [290/500], Loss: 0.1982
Epoch [300/500], Loss: 0.2312
Epoch [310/500], L

KeyboardInterrupt: Interrupted by user

In [None]:
import pandas as pd

# Defining the data based on the provided input
data = {
    "Rk": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
           21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32],
    "Tm": ["Los Angeles Chargers", "Pittsburgh Steelers", "Kansas City Chiefs", "Chicago Bears", "Detroit Lions",
           "Minnesota Vikings", "Philadelphia Eagles", "Denver Broncos", "Buffalo Bills", "San Francisco 49ers",
           "New York Jets", "Miami Dolphins", "Washington Commanders", "Indianapolis Colts", "Green Bay Packers",
           "Los Angeles Rams", "Houston Texans", "New York Giants", "Cleveland Browns", "Tennessee Titans",
           "Arizona Cardinals", "Atlanta Falcons", "New England Patriots", "Baltimore Ravens", "Seattle Seahawks",
           "Dallas Cowboys", "Cincinnati Bengals", "New Orleans Saints", "Tampa Bay Buccaneers", "Las Vegas Raiders",
           "Jacksonville Jaguars", "Carolina Panthers"],
    "G": [8, 8, 8, 8, 8, 8, 8, 9, 9, 8, 9, 8, 9, 9, 9, 8, 9, 9, 9, 8, 9, 9, 9, 9, 9, 8, 9, 9, 9, 9, 9, 9],
    "PA": [12.6, 14.9, 18.4, 18.5, 18.5, 18.8, 19.4, 17.9, 19.2, 22.8, 20.3, 23.4, 21.0, 21.4, 21.6, 24.3, 22.2, 22.4,
           23.7, 26.6, 23.8, 24.0, 24.1, 24.3, 24.6, 28.1, 25.2, 25.4, 27.0, 27.9, 28.0, 32.6],
    "Tot Yds": [303.8, 310.3, 293.6, 322.9, 357.1, 331.9, 290.1, 295.2, 333.3, 314.8, 290.8, 303.1, 326.3, 383.6,
                320.0, 360.8, 281.7, 334.1, 328.2, 269.1, 361.4, 351.4, 361.4, 356.6, 357.6, 364.8, 342.9, 376.4,
                386.7, 325.0, 389.3, 391.9],
    "Ply": [60.0, 59.0, 57.4, 60.1, 62.8, 64.3, 57.8, 62.7, 61.9, 59.6, 62.7, 56.8, 57.3, 68.2, 60.4, 62.4, 58.1, 59.1,
            59.7, 56.8, 64.0, 65.7, 63.3, 63.3, 64.6, 61.8, 66.2, 62.9, 67.0, 61.7, 64.7, 66.1],
    "Y/P": [5.1, 5.3, 5.1, 5.4, 5.7, 5.2, 5.0, 4.7, 5.4, 5.3, 4.6, 5.3, 5.7, 5.6, 5.3, 5.8, 4.8, 5.7, 5.5, 4.7, 5.6,
            5.4, 5.7, 5.6, 5.5, 5.9, 5.2, 6.0, 5.8, 5.3, 6.0, 5.9],
    "TO": [1.63, 1.88, 1.00, 1.88, 2.00, 2.13, 0.88, 1.33, 1.67, 1.88, 0.78, 0.75, 0.89, 1.67, 2.11, 1.38, 1.44, 0.78,
           0.56, 0.75, 1.11, 1.11, 0.89, 1.00, 1.00, 0.75, 1.11, 1.56, 1.22, 0.56, 0.56, 0.89],
    # Additional columns would be added here as needed...
}

# Convert the data to a DataFrame
df = pd.DataFrame(data)

# Save to CSV
csv_path = 'defense.csv'
df.to_csv(csv_path, index=False)

In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

def get_team_averages(offense_df):
    # Calculate true season scoring averages
    team_averages = pd.DataFrame()
    team_averages['Team'] = offense_df['Tm']
    team_averages['Avg_Points'] = offense_df['PF']  # PF is already in PPG
    team_averages['Max_Points'] = team_averages['Avg_Points'] * 1.4  # Cap at 140% of average
    team_averages['Min_Points'] = team_averages['Avg_Points'] * 0.6  # Floor at 60% of average
    return team_averages

def prepare_data(offense_df, defense_df):
    # Prepare offensive dataframe with comprehensive per-game statistics
    offense = pd.DataFrame()
    offense['Team'] = offense_df['Tm']

    # Calculate weighted recent performance
    games_played = offense_df['G']
    recent_weight = np.where(games_played >= 4, 1.2, 1.0)

    # Core offensive stats - PF is already in PPG
    offense['Off_PPG'] = offense_df['PF']  # Already in PPG format
    offense['Off_YPG'] = offense_df['Tot Yds']  # Already per game
    offense['Off_YPP'] = offense_df['Y/P']
    offense['Off_Score_Pct'] = offense_df['Sc%']

    # Passing efficiency
    offense['Off_Pass_YPG'] = offense_df['Passing Yds']  # Already per game
    offense['Off_Pass_TD_Rate'] = offense_df['Passing TD'] / games_played
    offense['Off_Comp_Pct'] = (offense_df['Passing Cmp'] / offense_df['Passing Att']) * 100
    offense['Off_Int_Rate'] = offense_df['Passing Int'] / games_played
    offense['Off_NY/A'] = offense_df['Passing NY/A']

    # Running efficiency
    offense['Off_Rush_YPG'] = offense_df['Rushing Yds']  # Already per game
    offense['Off_Rush_TD_Rate'] = offense_df['Rushing TD'] / games_played
    offense['Off_Rush_YPA'] = offense_df['Rushing Y/A']

    # Offensive efficiency
    offense['Off_1stD_Rate'] = offense_df['1stD'] / games_played
    offense['Off_TO_Rate'] = offense_df['TO%']

    # Prepare defensive dataframe
    defense = pd.DataFrame()
    defense['Team'] = defense_df['Tm']

    # Core defensive stats - PA is already in PPG
    defense['Def_PPG'] = defense_df['PA'] / defense_df['G']  # Convert to PPG
    defense['Def_YPG'] = defense_df['Yds'] / defense_df['G']
    defense['Def_YPP'] = defense_df['Y/P']
    defense['Def_Score_Pct'] = defense_df['Sc%']

    # Defense efficiency
    defense['Def_Pass_YPG'] = defense_df['Yds'] / defense_df['G']
    defense['Def_TD_Rate'] = defense_df['TD'] / defense_df['G']
    defense['Def_Int_Rate'] = defense_df['Int'] / defense_df['G']
    defense['Def_NY/A'] = defense_df['NY/A']
    defense['Def_Rush_YPG'] = defense_df['Yds'] / defense_df['G']
    defense['Def_Rush_YPA'] = defense_df['Y/A']
    defense['Def_1stD_Rate'] = defense_df['1stD'] / defense_df['G']
    defense['Def_TO_Rate'] = defense_df['TO%']
    defense['Def_EXP'] = defense_df['EXP']

    # Merge offensive and defensive stats
    team_stats = pd.merge(offense, defense, on='Team')

    return team_stats

def predict_game(team1, team2, team_stats, team_averages, scaler, model_scored, model_allowed, home_team=None):
    # Get team stats
    team1_stats = team_stats[team_stats['Team'] == team1].iloc[0]
    team2_stats = team_stats[team_stats['Team'] == team2].iloc[0]

    # Get team averages
    team1_avg = team_averages[team_averages['Team'] == team1].iloc[0]
    team2_avg = team_averages[team_averages['Team'] == team2].iloc[0]

    # Prepare features for prediction
    feature_columns = [col for col in team_stats.columns if col != 'Team']
    team1_features = team1_stats[feature_columns].values.reshape(1, -1)
    team2_features = team2_stats[feature_columns].values.reshape(1, -1)

    # Scale features
    team1_scaled = scaler.transform(team1_features)
    team2_scaled = scaler.transform(team2_features)

    # Calculate offensive and defensive strength indices (0-2 scale where 1 is average)
    league_avg_off = team_stats['Off_PPG'].mean()
    league_avg_def = team_stats['Def_PPG'].mean()

    team1_off_strength = (team1_stats['Off_PPG'] / league_avg_off)
    team2_off_strength = (team2_stats['Off_PPG'] / league_avg_off)
    team1_def_strength = (league_avg_def / team1_stats['Def_PPG'])
    team2_def_strength = (league_avg_def / team2_stats['Def_PPG'])

    # Base score predictions with enhanced variance
    team1_base = (
        model_scored.predict(team1_scaled)[0] * 0.5 +  # Model prediction
        team1_stats['Off_PPG'] * team2_stats['Def_PPG'] / league_avg_def * 0.5  # Matchup-based scoring
    )

    team2_base = (
        model_scored.predict(team2_scaled)[0] * 0.5 +  # Model prediction
        team2_stats['Off_PPG'] * team1_stats['Def_PPG'] / league_avg_def * 0.5  # Matchup-based scoring
    )

    # Enhanced efficiency adjustments
    team1_efficiency_mult = (
        1 + (team1_stats['Off_Score_Pct'] - team2_stats['Def_Score_Pct']) / 200 +  # Scoring efficiency
        (team1_stats['Off_YPP'] - team2_stats['Def_YPP']) / 10 +  # Yards per play differential
        (team1_off_strength - team2_def_strength) / 4  # Strength differential
    )

    team2_efficiency_mult = (
        1 + (team2_stats['Off_Score_Pct'] - team1_stats['Def_Score_Pct']) / 200 +  # Scoring efficiency
        (team2_stats['Off_YPP'] - team1_stats['Def_YPP']) / 10 +  # Yards per play differential
        (team2_off_strength - team1_def_strength) / 4  # Strength differential
    )

    team1_score = team1_base * team1_efficiency_mult
    team2_score = team2_base * team2_efficiency_mult

    # Enhanced home field advantage (league average is about 2.5 points)
    if home_team:
        if home_team == team1:
            team1_score *= 1.05
            team2_score *= 1.0
        else:
            team2_score *= 1.05
            team1_score *= 1.0

    # Apply variance multiplier based on team strength differences
    strength_diff = abs(team1_off_strength * team2_def_strength - team2_off_strength * team1_def_strength)
    margin_multiplier = 1 + strength_diff

    # Adjust scores to enhance margins while maintaining the relative difference
    avg_score = (team1_score + team2_score) / 2
    score_diff = (team1_score - team2_score) * margin_multiplier

    team1_final = avg_score + score_diff / 2
    team2_final = avg_score - score_diff / 2

    return team1_final, team2_final

def create_prediction_model(team_stats):
    # Prepare features for scaling
    feature_columns = [col for col in team_stats.columns if col != 'Team']
    X = team_stats[feature_columns].values

    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Create XGBoost models with more aggressive parameters
    model = XGBRegressor(
        n_estimators=150,
        max_depth=5,
        learning_rate=0.15,  # Increased learning rate
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )

    # Train on points scored
    y_scored = team_stats['Off_PPG'].values
    model_scored = model.fit(X_scaled, y_scored)

    # Train on points allowed
    y_allowed = team_stats['Def_PPG'].values
    model_allowed = model.fit(X_scaled, y_allowed)

    return scaler, model_scored, model_allowed
def analyze_matchup(team1, team2, team_stats, offense_df, defense_df):
    """Analyze key matchup factors between the teams"""

    # Get team stats
    team1_off = offense_df[offense_df['Tm'] == team1].iloc[0]
    team2_off = offense_df[offense_df['Tm'] == team2].iloc[0]
    team1_def = defense_df[defense_df['Tm'] == team1].iloc[0]
    team2_def = defense_df[defense_df['Tm'] == team2].iloc[0]

    analysis = {
        'offense': {
            team1: {
                'points_per_game': team1_off['PF'],  # Already in PPG format
                'total_yards_per_game': team1_off['Tot Yds'],
                'pass_yards_per_game': team1_off['Passing Yds'],
                'rush_yards_per_game': team1_off['Rushing Yds'],
                'yards_per_play': team1_off['Y/P'],
                'scoring_pct': team1_off['Sc%'],
                'turnover_pct': team1_off['TO%']
            },
            team2: {
                'points_per_game': team2_off['PF'],  # Already in PPG format
                'total_yards_per_game': team2_off['Tot Yds'],
                'pass_yards_per_game': team2_off['Passing Yds'],
                'rush_yards_per_game': team2_off['Rushing Yds'],
                'yards_per_play': team2_off['Y/P'],
                'scoring_pct': team2_off['Sc%'],
                'turnover_pct': team2_off['TO%']
            }
        },
        'defense': {
            team1: {
                'points_allowed_per_game': team1_def['PA'] / team1_def['G'],  # Now correctly divided by games
                'yards_allowed_per_game': team1_def['Yds'] / team1_def['G'],
                'yards_per_play_allowed': team1_def['Y/P'],
                'takeaway_pct': team1_def['TO%'],
                'expected_points': team1_def['EXP']
            },
            team2: {
                'points_allowed_per_game': team2_def['PA'] / team2_def['G'],  # Now correctly divided by games
                'yards_allowed_per_game': team2_def['Yds'] / team2_def['G'],
                'yards_per_play_allowed': team2_def['Y/P'],
                'takeaway_pct': team2_def['TO%'],
                'expected_points': team2_def['EXP']
            }
        }
    }

    return analysis


def generate_insights(team1, team2, analysis, prediction_result, home_team=None):
    """Generate detailed insights about the prediction"""

    insights = []

    # Scoring Trends
    insights.append(f"\nScoring Trends:")
    insights.append(f"{team1} is averaging {analysis['offense'][team1]['points_per_game']:.1f} PPG (Offense) and allowing {analysis['defense'][team1]['points_allowed_per_game']:.1f} PPG (Defense)")
    insights.append(f"{team2} is averaging {analysis['offense'][team2]['points_per_game']:.1f} PPG (Offense) and allowing {analysis['defense'][team2]['points_allowed_per_game']:.1f} PPG (Defense)")

    # Offensive Comparison
    insights.append(f"\nOffensive Comparison:")
    t1_off_adv = []
    t2_off_adv = []

    # Yards per play comparison
    if analysis['offense'][team1]['yards_per_play'] > analysis['offense'][team2]['yards_per_play']:
        t1_off_adv.append(f"better yards per play ({analysis['offense'][team1]['yards_per_play']:.1f} vs {analysis['offense'][team2]['yards_per_play']:.1f})")
    else:
        t2_off_adv.append(f"better yards per play ({analysis['offense'][team2]['yards_per_play']:.1f} vs {analysis['offense'][team1]['yards_per_play']:.1f})")

    # Scoring efficiency comparison
    if analysis['offense'][team1]['scoring_pct'] > analysis['offense'][team2]['scoring_pct']:
        t1_off_adv.append(f"higher scoring percentage ({analysis['offense'][team1]['scoring_pct']:.1f}% vs {analysis['offense'][team2]['scoring_pct']:.1f}%)")
    else:
        t2_off_adv.append(f"higher scoring percentage ({analysis['offense'][team2]['scoring_pct']:.1f}% vs {analysis['offense'][team1]['scoring_pct']:.1f}%)")

    # Turnover comparison
    if analysis['offense'][team1]['turnover_pct'] < analysis['offense'][team2]['turnover_pct']:
        t1_off_adv.append(f"lower turnover rate ({analysis['offense'][team1]['turnover_pct']:.1f}% vs {analysis['offense'][team2]['turnover_pct']:.1f}%)")
    else:
        t2_off_adv.append(f"lower turnover rate ({analysis['offense'][team2]['turnover_pct']:.1f}% vs {analysis['offense'][team1]['turnover_pct']:.1f}%)")

    if t1_off_adv:
        insights.append(f"{team1} has {', '.join(t1_off_adv)}")
    if t2_off_adv:
        insights.append(f"{team2} has {', '.join(t2_off_adv)}")

    # Defensive Comparison
    insights.append(f"\nDefensive Comparison:")
    t1_def_adv = []
    t2_def_adv = []

    # Points allowed comparison
    if analysis['defense'][team1]['points_allowed_per_game'] < analysis['defense'][team2]['points_allowed_per_game']:
        t1_def_adv.append(f"allows fewer points ({analysis['defense'][team1]['points_allowed_per_game']:.1f} vs {analysis['defense'][team2]['points_allowed_per_game']:.1f})")
    else:
        t2_def_adv.append(f"allows fewer points ({analysis['defense'][team2]['points_allowed_per_game']:.1f} vs {analysis['defense'][team1]['points_allowed_per_game']:.1f})")

    # Takeaway comparison
    if analysis['defense'][team1]['takeaway_pct'] > analysis['defense'][team2]['takeaway_pct']:
        t1_def_adv.append(f"higher takeaway rate ({analysis['defense'][team1]['takeaway_pct']:.1f}% vs {analysis['defense'][team2]['takeaway_pct']:.1f}%)")
    else:
        t2_def_adv.append(f"higher takeaway rate ({analysis['defense'][team2]['takeaway_pct']:.1f}% vs {analysis['defense'][team1]['takeaway_pct']:.1f}%)")

    if t1_def_adv:
        insights.append(f"{team1} {', '.join(t1_def_adv)}")
    if t2_def_adv:
        insights.append(f"{team2} {', '.join(t2_def_adv)}")

    # Key Matchup Analysis
    insights.append(f"\nKey Matchup Analysis:")
    # Offense vs Defense matchups
    t1_off_vs_t2_def = analysis['offense'][team1]['yards_per_play'] - analysis['defense'][team2]['yards_per_play_allowed']
    t2_off_vs_t1_def = analysis['offense'][team2]['yards_per_play'] - analysis['defense'][team1]['yards_per_play_allowed']

    if t1_off_vs_t2_def > t2_off_vs_t1_def:
        insights.append(f"{team1}'s offense vs {team2}'s defense appears to be the more favorable matchup")
    else:
        insights.append(f"{team2}'s offense vs {team1}'s defense appears to be the more favorable matchup")

    # Home Field Advantage
    if home_team:
        insights.append(f"\nHome Field Factor:")
        insights.append(f"{home_team} has home field advantage (historically worth ~2 points)")

    # Prediction Summary
    insights.append(f"\nPrediction Summary:")
    winner = prediction_result['winner']
    margin = prediction_result['margin']
    total = prediction_result['total']

    insights.append(f"Model predicts a {margin:.1f} point victory for {winner}")
    insights.append(f"Predicted total points: {total:.1f}")

    # Confidence Analysis
    confidence = "High" if margin > 7 else "Medium" if margin > 3 else "Low"
    insights.append(f"Prediction Confidence: {confidence} (based on margin of victory)")

    return "\n".join(insights)

def predict_winner(offense_csv, defense_csv, team1, team2, home_team=None):
    # Read the CSV files
    offense_df = pd.read_csv(offense_csv)
    defense_df = pd.read_csv(defense_csv)

    # Prepare the data and make prediction
    team_stats = prepare_data(offense_df, defense_df)
    scaler, model_scored, model_allowed = create_prediction_model(team_stats)
    team1_score, team2_score = predict_game(
        team1, team2, team_stats, get_team_averages(offense_df),
        scaler, model_scored, model_allowed, home_team
    )

    # Create prediction result
    result = {
        'winner': team1 if team1_score > team2_score else team2,
        'team1_score': round(team1_score, 1),
        'team2_score': round(team2_score, 1),
        'margin': round(abs(team1_score - team2_score), 1),
        'total': round(team1_score + team2_score, 1)
    }

    # Generate detailed analysis
    matchup_analysis = analyze_matchup(team1, team2, team_stats, offense_df, defense_df)
    insights = generate_insights(team1, team2, matchup_analysis, result, home_team)

    return result, insights

##Factor in strength of schedule and performance against teams

if __name__ == "__main__":
    team1 = "Philadelphia Eagles"
    team2 = "Dallas Cowboys"
    home_team = "Dallas Cowboys"

    result, insights = predict_winner('offense.csv', 'defense.csv', team1, team2, home_team)

    print(f"\nPredicted outcome:")
    print(f"{team1}: {result['team1_score']} points")
    print(f"{team2}: {result['team2_score']} points")
    print(f"Total: {result['total']} points")
    print(f"\nPredicted winner: {result['winner']}")
    print(f"Predicted margin: {result['margin']} points")

    print("\nDetailed Analysis:")
    print(insights)

KeyError: 'Yds'