In [1]:
pip install xgboost nfl_data_py

Note: you may need to restart the kernel to use updated packages.


In [3]:
# NFL Game Prediction using nfl_data_py
# Updated version of NFL prediction pipeline using modern, maintained data source

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Core ML libraries
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedStratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.calibration import CalibratedClassifierCV as CCV
from sklearn.pipeline import Pipeline

# Advanced ML libraries
try:
    import xgboost as xgb
    HAS_XGB = True
except ImportError:
    HAS_XGB = False
    print("XGBoost not available. Install with: pip install xgboost")

# NFL data library
try:
    import nfl_data_py as nfl
    HAS_NFL_DATA = True
except ImportError:
    HAS_NFL_DATA = False
    print("nfl_data_py not available. Install with: pip install nfl_data_py")

class NFLGamePredictor:
    def __init__(self):
        self.models = {}
        self.best_features = []
        self.scaler = StandardScaler()
        self.final_model = None
        
    def collect_data(self, start_year=2010, end_year=2024):
        """Collect comprehensive NFL data using nfl_data_py"""
        
        if not HAS_NFL_DATA:
            raise ImportError("nfl_data_py is required. Install with: pip install nfl_data_py")
        
        print(f"Collecting NFL data from {start_year} to {end_year}...")
        
        # Get play-by-play data for game-level statistics
        years = list(range(start_year, end_year + 1))
        pbp_data = nfl.import_pbp_data(years)
        
        # Get team information
        teams = nfl.import_team_desc()
        team_dict = teams.set_index('team_abbr')['team_name'].to_dict()
        
        # Get weekly data for season-long team statistics
        weekly_data = nfl.import_weekly_data(years)
        
        # Get schedule data
        schedule_data = nfl.import_schedules(years)
        
        print(f"Collected {len(pbp_data)} play-by-play records")
        print(f"Collected {len(weekly_data)} weekly team records") 
        print(f"Collected {len(schedule_data)} scheduled games")
        
        return pbp_data, weekly_data, schedule_data, team_dict
    
    def create_team_features(self, weekly_data, season, week):
        """Create team-level features for a specific season/week"""
        
        # Filter data up to the current week
        season_data = weekly_data[
            (weekly_data['season'] == season) & 
            (weekly_data['week'] < week)
        ]
        
        if season_data.empty:
            return {}
        
        # Calculate season averages for each team
        team_features = {}
        
        for team in season_data['recent_team'].unique():
            team_data = season_data[season_data['recent_team'] == team]
            
            if len(team_data) == 0:
                continue
                
            # Offensive features
            features = {
                'passing_yards_pg': team_data['passing_yards'].mean(),
                'rushing_yards_pg': team_data['rushing_yards'].mean(), 
                'total_yards_pg': team_data['passing_yards'].mean() + team_data['rushing_yards'].mean(),
                'points_pg': team_data['fantasy_points'].mean() if 'fantasy_points' in team_data.columns else 0,
                'completions_pg': team_data['completions'].mean(),
                'passing_tds_pg': team_data['passing_tds'].mean(),
                'interceptions_thrown_pg': team_data['interceptions'].mean(),
                'rushing_tds_pg': team_data['rushing_tds'].mean(),
                'fumbles_lost_pg': team_data['fumbles_lost'].mean() if 'fumbles_lost' in team_data.columns else 0,
                
                # Defensive features (opponent stats)
                'opp_passing_yards_pg': 0,  # Will be calculated separately
                'opp_rushing_yards_pg': 0,
                'opp_points_pg': 0,
                
                # Advanced metrics
                'turnover_ratio': (team_data['interceptions'].mean() - 
                                 team_data['interceptions'].mean()),  # Simplified
                'games_played': len(team_data)
            }
            
            team_features[team] = features
        
        return team_features
    
    def create_game_features(self, home_team, away_team, team_features, 
                           season, week, is_playoff=False, is_neutral=False):
        """Create features for a specific matchup"""
        
        if home_team not in team_features or away_team not in team_features:
            return None
        
        home_stats = team_features[home_team]
        away_stats = team_features[away_team]
        
        # Create matchup features
        features = {
            # Home team offensive stats
            'home_passing_ypg': home_stats['passing_yards_pg'],
            'home_rushing_ypg': home_stats['rushing_yards_pg'],
            'home_total_ypg': home_stats['total_yards_pg'],
            'home_points_pg': home_stats['points_pg'],
            'home_passing_tds_pg': home_stats['passing_tds_pg'],
            'home_turnovers_pg': home_stats.get('fumbles_lost_pg', 0) + home_stats['interceptions_thrown_pg'],
            
            # Away team offensive stats  
            'away_passing_ypg': away_stats['passing_yards_pg'],
            'away_rushing_ypg': away_stats['rushing_yards_pg'],
            'away_total_ypg': away_stats['total_yards_pg'],
            'away_points_pg': away_stats['points_pg'],
            'away_passing_tds_pg': away_stats['passing_tds_pg'],
            'away_turnovers_pg': away_stats.get('fumbles_lost_pg', 0) + away_stats['interceptions_thrown_pg'],
            
            # Matchup advantages
            'passing_advantage': home_stats['passing_yards_pg'] - away_stats['passing_yards_pg'],
            'rushing_advantage': home_stats['rushing_yards_pg'] - away_stats['rushing_yards_pg'],
            'scoring_advantage': home_stats['points_pg'] - away_stats['points_pg'],
            'turnover_advantage': away_stats.get('fumbles_lost_pg', 0) + away_stats['interceptions_thrown_pg'] - 
                                (home_stats.get('fumbles_lost_pg', 0) + home_stats['interceptions_thrown_pg']),
            
            # Game context
            'home_field_advantage': 0 if is_neutral else 2.5,
            'is_playoff': 1 if is_playoff else 0,
            'is_neutral': 1 if is_neutral else 0,
            'week': week,
            'season': season,
        }
        
        return features
    
    def build_dataset(self, pbp_data, weekly_data, schedule_data):
        """Build complete dataset from NFL data"""
        
        print("Building dataset from collected data...")
        
        game_records = []
        
        # Process each scheduled game
        for _, game in schedule_data.iterrows():
            season = game['season']
            week = game['week']
            home_team = game['home_team']
            away_team = game['away_team']
            
            # Skip if missing essential data
            if pd.isna(home_team) or pd.isna(away_team):
                continue
            
            # Get team features up to this point in season
            team_features = self.create_team_features(weekly_data, season, week)
            
            if not team_features:
                continue
            
            # Create game features
            game_features = self.create_game_features(
                home_team, away_team, team_features, 
                season, week,
                is_playoff=game.get('game_type', '') == 'REG',
                is_neutral=False  # Simplified for now
            )
            
            if game_features is None:
                continue
            
            # Determine result (home team win = 1, loss = 0)
            home_score = game.get('home_score', 0)
            away_score = game.get('away_score', 0)
            
            # Skip games without scores (future games)
            if pd.isna(home_score) or pd.isna(away_score):
                continue
            
            game_features['home_win'] = 1 if home_score > away_score else 0
            game_features['home_score'] = home_score
            game_features['away_score'] = away_score
            game_features['game_id'] = f"{season}_{week}_{home_team}_{away_team}"
            
            game_records.append(game_features)
        
        df = pd.DataFrame(game_records)
        print(f"Created dataset with {len(df)} games")
        
        return df
    
    def select_features(self, df, n_features=10):
        """Select best features using RFE"""
        
        # Prepare data
        feature_cols = [col for col in df.columns 
                       if col not in ['home_win', 'home_score', 'away_score', 'game_id']]
        
        X = df[feature_cols]
        y = df['home_win']
        
        # Remove any columns with all NaN or constant values
        X = X.loc[:, X.var() > 0]
        X = X.fillna(X.mean())
        
        print(f"Starting feature selection with {len(X.columns)} features...")
        
        # Use RFE with different numbers of features
        models = {}
        results = []
        
        for i in range(2, min(n_features + 1, len(X.columns) + 1)):
            rfe = RFE(estimator=LDA(), n_features_to_select=i)
            model = DecisionTreeClassifier(random_state=42)
            pipeline = Pipeline(steps=[('s', rfe), ('m', model)])
            
            cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)
            scores = cross_val_score(pipeline, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
            
            results.append(scores)
            models[str(i)] = pipeline
            
            print(f'{i} features: {scores.mean():.3f} (+/- {scores.std():.3f})')
        
        # Find best number of features
        best_idx = np.argmax([np.mean(result) for result in results])
        best_n_features = best_idx + 2
        
        print(f"Best number of features: {best_n_features}")
        
        # Get the best feature set
        rfe = RFE(estimator=LDA(), n_features_to_select=best_n_features)
        rfe.fit(X, y)
        
        selected_features = X.columns[rfe.support_].tolist()
        self.best_features = selected_features
        
        print("Selected features:")
        for feature in selected_features:
            print(f"  - {feature}")
        
        return selected_features
    
    def train_models(self, df):
        """Train and compare multiple models"""
        
        if not self.best_features:
            self.select_features(df)
        
        # Prepare data
        X = df[self.best_features].fillna(df[self.best_features].mean())
        y = df['home_win']
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        # Scale features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        # Define models to test
        models_to_test = {
            'Logistic Regression': LogisticRegression(random_state=42),
            'Decision Tree': DecisionTreeClassifier(random_state=42),
            'Random Forest': RandomForestClassifier(random_state=42)
        }
        
        if HAS_XGB:
            models_to_test['XGBoost'] = xgb.XGBClassifier(random_state=42, verbosity=0)
        
        # Train and evaluate models
        model_results = {}
        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=42)
        
        print("\nTraining and evaluating models...")
        
        for name, model in models_to_test.items():
            # Use scaled data for logistic regression, raw for tree-based
            X_use = X_train_scaled if 'Logistic' in name else X_train
            scores = cross_val_score(model, X_use, y_train, cv=cv, scoring='accuracy', n_jobs=-1)
            
            model_results[name] = {
                'mean_score': scores.mean(),
                'std_score': scores.std(),
                'scores': scores
            }
            
            print(f"{name}: {scores.mean():.3f} (+/- {scores.std():.3f})")
        
        self.models = models_to_test
        return model_results
    
    def tune_best_model(self, df, model_name='Random Forest'):
        """Tune hyperparameters for the best performing model"""
        
        X = df[self.best_features].fillna(df[self.best_features].mean())
        y = df['home_win']
        
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        if model_name == 'Random Forest':
            param_grid = {
                'n_estimators': [50, 100, 200],
                'max_depth': [3, 5, 7, None],
                'min_samples_split': [2, 5, 10],
                'criterion': ['gini', 'entropy']
            }
            model = RandomForestClassifier(random_state=42)
        
        elif model_name == 'Logistic Regression':
            param_grid = {
                'C': [0.01, 0.1, 1, 10, 100],
                'penalty': ['l1', 'l2'],
                'solver': ['liblinear']
            }
            model = LogisticRegression(random_state=42)
            X_train = self.scaler.fit_transform(X_train)
            X_test = self.scaler.transform(X_test)
        
        elif model_name == 'XGBoost' and HAS_XGB:
            param_grid = {
                'max_depth': [3, 5, 7],
                'learning_rate': [0.01, 0.1, 0.2],
                'n_estimators': [100, 200, 300]
            }
            model = xgb.XGBClassifier(random_state=42, verbosity=0)
        
        else:
            print(f"Tuning not implemented for {model_name}")
            return None
        
        print(f"\nTuning {model_name}...")
        
        # Grid search
        grid_search = GridSearchCV(
            model, param_grid, 
            cv=5, scoring='accuracy', 
            n_jobs=-1, verbose=1
        )
        
        grid_search.fit(X_train, y_train)
        
        print(f"Best parameters: {grid_search.best_params_}")
        print(f"Best cross-validation score: {grid_search.best_score_:.3f}")
        
        # Test on holdout set
        test_score = grid_search.score(X_test, y_test)
        print(f"Test set accuracy: {test_score:.3f}")
        
        return grid_search.best_estimator_
    
    def create_ensemble_model(self, df):
        """Create ensemble model combining multiple algorithms"""
        
        X = df[self.best_features].fillna(df[self.best_features].mean())
        y = df['home_win']
        
        # Individual models
        rf = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
        lr = LogisticRegression(C=1, random_state=42)
        
        estimators = [('rf', rf), ('lr', lr)]
        
        if HAS_XGB:
            xgb_model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=200, 
                                        random_state=42, verbosity=0)
            estimators.append(('xgb', xgb_model))
        
        # Voting classifier
        voting_clf = VotingClassifier(estimators=estimators, voting='soft')
        
        # Calibrated classifier for better probability estimates
        self.final_model = CCV(voting_clf, method='isotonic', cv=3)
        
        print("Training ensemble model...")
        self.final_model.fit(X, y)
        
        return self.final_model
    
    def predict_games(self, games_df, confidence_threshold=0.6):
        """Make predictions on new games"""
        
        if self.final_model is None:
            raise ValueError("Model not trained yet. Call create_ensemble_model() first.")
        
        X = games_df[self.best_features].fillna(games_df[self.best_features].mean())
        
        # Get probability predictions
        probabilities = self.final_model.predict_proba(X)[:, 1]
        predictions = self.final_model.predict(X)
        
        # Create results dataframe
        results = games_df.copy()
        results['home_win_prob'] = probabilities
        results['predicted_home_win'] = predictions
        
        # High-confidence bets
        results['high_confidence_bet'] = (
            (probabilities >= confidence_threshold) | 
            (probabilities <= (1 - confidence_threshold))
        )
        
        return results

# Example usage and testing
if __name__ == "__main__":
    
    # Initialize predictor
    predictor = NFLGamePredictor()
    
    # Check if nfl_data_py is available
    if not HAS_NFL_DATA:
        print("Please install nfl_data_py to use this predictor:")
        print("pip install nfl_data_py")
        exit()
    
    try:
        # Collect data (this may take a few minutes)
        print("This may take a few minutes to download NFL data...")
        pbp_data, weekly_data, schedule_data, team_dict = predictor.collect_data(2015, 2024)
        
        # Build dataset
        df = predictor.build_dataset(pbp_data, weekly_data, schedule_data)
        
        if df.empty:
            print("No data collected. Check your internet connection and try again.")
            exit()
        
        # Feature selection
        predictor.select_features(df, n_features=10)
        
        # Train models
        model_results = predictor.train_models(df)
        
        # Create ensemble
        final_model = predictor.create_ensemble_model(df)
        
        # Example prediction on test set
        train_data = df[df['season'] < 2023]
        test_data = df[df['season'] == 2023]
        
        if not test_data.empty:
            predictor.final_model.fit(
                train_data[predictor.best_features].fillna(train_data[predictor.best_features].mean()), 
                train_data['home_win']
            )
            
            predictions = predictor.predict_games(test_data)
            
            # Calculate accuracy
            accuracy = (predictions['predicted_home_win'] == predictions['home_win']).mean()
            print(f"\n2023 season prediction accuracy: {accuracy:.3f}")
            
            # High confidence bets
            high_conf = predictions[predictions['high_confidence_bet']]
            if not high_conf.empty:
                conf_accuracy = (high_conf['predicted_home_win'] == high_conf['home_win']).mean()
                print(f"High confidence bet accuracy: {conf_accuracy:.3f} ({len(high_conf)} games)")
        
        print("\nPredictor trained successfully!")
        print("You can now use predictor.predict_games() on new data.")
        
    except Exception as e:
        print(f"Error: {e}")
        print("Make sure you have a stable internet connection for data download.")

This may take a few minutes to download NFL data...
Collecting NFL data from 2015 to 2024...
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
Downcasting floats.
Collected 483605 play-by-play records
Collected 54479 weekly team records
Collected 2743 scheduled games
Building dataset from collected data...
Created dataset with 2465 games
Starting feature selection with 19 features...
2 features: 0.522 (+/- 0.019)
3 features: 0.532 (+/- 0.021)
4 features: 0.515 (+/- 0.016)
5 features: 0.524 (+/- 0.024)
6 features: 0.525 (+/- 0.028)
7 features: 0.520 (+/- 0.021)
8 features: 0.529 (+/- 0.024)
9 features: 0.536 (+/- 0.013)
10 features: 0.539 (+/- 0.009)
Best number of features: 10
Selected features:
  - home_passing_ypg
  - home_points_pg
  - home_passing_tds_pg
  - home_turnovers_pg
  - away_passing_tds_pg
  - away_turnovers_pg
  - scoring_advantage
  - turnover_advantage
  - is_playoff
  - season

Training an

In [5]:
# Run the main training pipeline
predictor = NFLGamePredictor()

# Train the model (this will take a few minutes)
print("Training the model...")
pbp_data, weekly_data, schedule_data, team_dict = predictor.collect_data(2015, 2024)
df = predictor.build_dataset(pbp_data, weekly_data, schedule_data)

# Train and create ensemble model
predictor.select_features(df, n_features=10)
model_results = predictor.train_models(df)
final_model = predictor.create_ensemble_model(df)

print("Model trained successfully!")
print(f"Best features: {predictor.best_features}")

Training the model...
Collecting NFL data from 2015 to 2024...
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
2024 done.
Downcasting floats.
Downcasting floats.
Collected 483605 play-by-play records
Collected 54479 weekly team records
Collected 2743 scheduled games
Building dataset from collected data...
Created dataset with 2465 games
Starting feature selection with 19 features...
2 features: 0.522 (+/- 0.019)
3 features: 0.532 (+/- 0.021)
4 features: 0.515 (+/- 0.016)
5 features: 0.524 (+/- 0.024)
6 features: 0.525 (+/- 0.028)
7 features: 0.520 (+/- 0.021)
8 features: 0.529 (+/- 0.024)
9 features: 0.536 (+/- 0.013)
10 features: 0.539 (+/- 0.009)
Best number of features: 10
Selected features:
  - home_passing_ypg
  - home_points_pg
  - home_passing_tds_pg
  - home_turnovers_pg
  - away_passing_tds_pg
  - away_turnovers_pg
  - scoring_advantage
  - turnover_advantage
  - is_playoff
  - season

Training and evaluating models...
Logisti

In [7]:
# Make predictions for specific games
def predict_specific_game(predictor, home_team, away_team, season=2025, week=1):
    """
    Predict the outcome of a specific game
    
    Args:
        predictor: Trained NFLGamePredictor instance
        home_team: Home team abbreviation (e.g., 'NYJ')
        away_team: Away team abbreviation (e.g., 'PIT')
        season: Season year
        week: Week number
    """
    
    # Get 2024 team features to predict 2025 games
    team_features = predictor.create_team_features(weekly_data, 2024, 19)  # End of 2024 season
    
    if not team_features or home_team not in team_features or away_team not in team_features:
        print(f"Error: Could not find team data for {home_team} or {away_team}")
        return None
    
    # Create game features
    game_features = predictor.create_game_features(
        home_team, away_team, team_features, 
        season, week,
        is_playoff=False,
        is_neutral=False
    )
    
    if game_features is None:
        print("Error: Could not create game features")
        return None
    
    # Convert to DataFrame for prediction
    game_df = pd.DataFrame([game_features])
    
    # Make prediction
    prediction_result = predictor.predict_games(game_df)
    
    # Extract results
    home_win_prob = prediction_result['home_win_prob'].iloc[0]
    predicted_home_win = prediction_result['predicted_home_win'].iloc[0]
    high_confidence = prediction_result['high_confidence_bet'].iloc[0]
    
    # Display results
    winner = home_team if predicted_home_win == 1 else away_team
    confidence = home_win_prob if predicted_home_win == 1 else (1 - home_win_prob)
    
    print(f"\n🏈 GAME PREDICTION: {away_team} @ {home_team}")
    print(f"{'='*50}")
    print(f"🏆 Predicted Winner: {winner}")
    print(f"🎯 Confidence: {confidence:.1%}")
    print(f"📊 Home Win Probability: {home_win_prob:.1%}")
    print(f"📊 Away Win Probability: {(1-home_win_prob):.1%}")
    print(f"🔥 High Confidence Bet: {'Yes' if high_confidence else 'No'}")
    
    return prediction_result

# Example: Predict Upcoming Games
if 'predictor' in locals() and predictor.final_model is not None:
    steelers_jets_prediction = predict_specific_game(
        predictor, 
        home_team='LAC',  
        away_team='KC',  
        season=2025, 
        week=1
    )
else:
    print("Please run the training cell first to create the predictor model")


🏈 GAME PREDICTION: KC @ LAC
🏆 Predicted Winner: LAC
🎯 Confidence: 63.5%
📊 Home Win Probability: 63.5%
📊 Away Win Probability: 36.5%
🔥 High Confidence Bet: Yes


In [10]:
# MULTIPLE GAME PREDICTIONS - Predict entire game slates
import re
from datetime import datetime

def predict_multiple_games(predictor, games_text, season=2025, week=1, show_details=True):
    """
    Predict multiple games from formatted text
    
    Args:
        predictor: Trained NFLGamePredictor instance
        games_text: Formatted text with games (Away) TEAM vs. (Home) TEAM
        season: Season year
        week: Week number
        show_details: Whether to show detailed output for each game
    
    Returns:
        DataFrame with all predictions
    """
    
    if predictor.final_model is None:
        print("Model not trained yet. Please train the model first.")
        return None
    
    # Team abbreviation mapping for common variations
    team_mapping = {
        'TB Buccaneers': 'TB', 'TB': 'TB',
        'ATL Falcons': 'ATL', 'ATL': 'ATL',
        'CIN Bengals': 'CIN', 'CIN': 'CIN',
        'CLE Browns': 'CLE', 'CLE': 'CLE',
        'MIA Dolphins': 'MIA', 'MIA': 'MIA',
        'IND Colts': 'IND', 'IND': 'IND',
        'CAR Panthers': 'CAR', 'CAR': 'CAR',
        'JAX Jaguars': 'JAX', 'JAX': 'JAX',
        'LV Raiders': 'LV', 'LV': 'LV',
        'NE Patriots': 'NE', 'NE': 'NE',
        'ARI Cardinals': 'ARI', 'ARI': 'ARI',
        'NO Saints': 'NO', 'NO': 'NO',
        'PIT Steelers': 'PIT', 'PIT': 'PIT',
        'NYJ Jets': 'NYJ', 'NYJ': 'NYJ',
        'NYG Giants': 'NYG', 'NYG': 'NYG',
        'WAS Commanders': 'WAS', 'WAS': 'WAS',
        'TEN Titans': 'TEN', 'TEN': 'TEN',
        'DEN Broncos': 'DEN', 'DEN': 'DEN',
        'SF 49ers': 'SF', 'SF': 'SF',
        'SEA Seahawks': 'SEA', 'SEA': 'SEA',
        'DET Lions': 'DET', 'DET': 'DET',
        'GB Packers': 'GB', 'GB': 'GB',
        'HOU Texans': 'HOU', 'HOU': 'HOU',
        'LAR Rams': 'LA', 'LA': 'LA', 
        'BAL Ravens': 'BAL', 'BAL': 'BAL',
        'BUF Bills': 'BUF', 'BUF': 'BUF',
        'KC Chiefs': 'KC', 'KC': 'KC',
        'PHI Eagles': 'PHI', 'PHI': 'PHI',
        'DAL Cowboys': 'DAL', 'DAL': 'DAL',
        'MIN Vikings': 'MIN', 'MIN': 'MIN',
        'CHI Bears': 'CHI', 'CHI': 'CHI',
        'LAC Chargers': 'LAC', 'LAC': 'LAC'
    }
    
    # Game schedule with times (Eastern Time)
    game_schedule = {
        'DAL @ PHI': ('Thursday, September 4', '8:20 PM ET'),
        'KC @ LAC': ('Friday, September 5', '8:00 PM ET (São Paulo, Brazil)'),
        'TB @ ATL': ('Sunday, September 7', '1:00 PM ET'),
        'CIN @ CLE': ('Sunday, September 7', '1:00 PM ET'),
        'MIA @ IND': ('Sunday, September 7', '1:00 PM ET'),
        'CAR @ JAX': ('Sunday, September 7', '1:00 PM ET'),
        'LV @ NE': ('Sunday, September 7', '1:00 PM ET'),
        'ARI @ NO': ('Sunday, September 7', '1:00 PM ET'),
        'PIT @ NYJ': ('Sunday, September 7', '1:00 PM ET'),
        'NYG @ WAS': ('Sunday, September 7', '1:00 PM ET'),
        'TEN @ DEN': ('Sunday, September 7', '4:05 PM ET'),
        'SF @ SEA': ('Sunday, September 7', '4:05 PM ET'),
        'DET @ GB': ('Sunday, September 7', '4:25 PM ET'),
        'HOU @ LA': ('Sunday, September 7', '4:25 PM ET'),
        'BAL @ BUF': ('Sunday, September 7', '8:20 PM ET'),
        'MIN @ CHI': ('Monday, September 8', '8:15 PM ET')
    }
    
    # Parse games from text
    games = []
    game_pattern = r'\(Away\)\s+(.*?)\s+vs\.\s+\(Home\)\s+(.*)'
    
    matches = re.findall(game_pattern, games_text, re.IGNORECASE)
    
    if not matches:
        print("No games found in the provided text. Expected format:")
        print("(Away) TEAM vs. (Home) TEAM")
        return None
    
    print(f"MULTIPLE GAME PREDICTIONS - Week {week}, {season}")
    print("="*60)
    print(f"Found {len(matches)} games to predict")
    print()
    
    # Get team features once for all predictions
    try:
        team_features = predictor.create_team_features(weekly_data, 2024, 19)  # End of 2024 season
        
        if not team_features:
            print("Could not create team features")
            return None
        
        # Debug: Show available teams
        if show_details:
            print(f"Available teams in data: {sorted(list(team_features.keys()))}")
            print()
            
    except:
        print("Error loading team data. Make sure weekly_data is available.")
        return None
    
    all_predictions = []
    successful_predictions = 0
    
    # Track current time slot for headers
    current_day = None
    current_time_slot = None
    
    for i, (away_full, home_full) in enumerate(matches, 1):
        away_full = away_full.strip()
        home_full = home_full.strip()
        
        # Map to abbreviations
        away_team = team_mapping.get(away_full, away_full.split()[-1] if away_full.split() else away_full)
        home_team = team_mapping.get(home_full, home_full.split()[-1] if home_full.split() else home_full)
        
        # Clean up team names (remove extra spaces, etc.)
        away_team = away_team.strip().upper()
        home_team = home_team.strip().upper()
        
        if show_details:
            # Get game time info for header
            matchup_key = f"{away_team} @ {home_team}"
            if matchup_key in game_schedule:
                day, time = game_schedule[matchup_key]
                
                # Display time slot header if it's a new time slot
                if day != current_day or time.split()[0] != current_time_slot:
                    if current_day is not None:  # Add spacing between time slots
                        print()
                    print(f"{day.upper()}")
                    print(f"{time}")
                    print("-" * 50)
                    current_day = day
                    current_time_slot = time.split()[0]  # Get just the time part
            
            print(f"Game {i}: {away_team} @ {home_team} (Processing...)")
        
        try:
            # Check if teams exist in our data
            if away_team not in team_features or home_team not in team_features:
                if show_details:
                    print(f"   Missing team data for {away_team} or {home_team}")
                    available_teams = list(team_features.keys())[:10]  # Show first 10
                    print(f"   Available teams (sample): {available_teams}")
                continue
            
            # Create game features
            game_features = predictor.create_game_features(
                home_team, away_team, team_features,
                season, week, is_playoff=False, is_neutral=False
            )
            
            if game_features is None:
                if show_details:
                    print(f"   Could not create game features")
                continue
            
            # Make prediction
            game_df = pd.DataFrame([game_features])
            prediction_result = predictor.predict_games(game_df)
            
            # Extract results for this specific game
            home_win_prob = prediction_result['home_win_prob'].iloc[0]
            predicted_home_win = prediction_result['predicted_home_win'].iloc[0]
            high_confidence = prediction_result['high_confidence_bet'].iloc[0]
            
            # Determine winner for this specific game
            winner = home_team if predicted_home_win == 1 else away_team
            confidence = home_win_prob if predicted_home_win == 1 else (1 - home_win_prob)
            
            # Store prediction
            prediction_data = {
                'game_num': i,
                'away_team': away_team,
                'home_team': home_team,
                'matchup': f"{away_team} @ {home_team}",
                'predicted_winner': winner,
                'confidence': confidence,
                'home_win_prob': home_win_prob,
                'away_win_prob': 1 - home_win_prob,
                'high_confidence': high_confidence
            }
            
            all_predictions.append(prediction_data)
            successful_predictions += 1
            
            if show_details:
                # Get game time info
                matchup_key = f"{away_team} @ {home_team}"
                if matchup_key in game_schedule:
                    day, time = game_schedule[matchup_key]
                    print(f"   Game {i} Complete: {away_team} @ {home_team}")
                    print(f"   {day} at {time}")
                    print(f"   Winner: {winner} ({confidence:.1%})")
                    print(f"   Home: {home_win_prob:.1%} | Away: {(1-home_win_prob):.1%}")
                else:
                    print(f"   Game {i} Complete: {away_team} @ {home_team}")
                    print(f"   Winner: {winner} ({confidence:.1%})")
                    print(f"   Home: {home_win_prob:.1%} | Away: {(1-home_win_prob):.1%}")
                print()
        
        except Exception as e:
            if show_details:
                print(f"   Error: {e}")
                print()
            continue
    
    if not all_predictions:
        print("No successful predictions made")
        return None
    
    # Create summary DataFrame
    predictions_df = pd.DataFrame(all_predictions)
    
    # Summary statistics
    print(f"\nPREDICTION SUMMARY")
    print("="*40)
    print(f"Successful predictions: {successful_predictions}/{len(matches)}")
    
    # Show top confident predictions
    print(f"\nTOP CONFIDENT PREDICTIONS:")
    top_predictions = predictions_df.nlargest(3, 'confidence')
    for _, game in top_predictions.iterrows():
        print(f"   {game['matchup']} -> {game['predicted_winner']} ({game['confidence']:.1%})")
    
    return predictions_df

# Example: NFL Week 1 Complete Schedule (Chronological Order)
week1_games = """
(Away) DAL Cowboys vs. (Home) PHI Eagles
(Away) KC Chiefs vs. (Home) LAC Chargers
(Away) TB Buccaneers vs. (Home) ATL Falcons
(Away) CIN Bengals vs. (Home) CLE Browns
(Away) MIA Dolphins vs. (Home) IND Colts
(Away) CAR Panthers vs. (Home) JAX Jaguars
    "(Away) LV Raiders vs. (Home) NE Patriots
",
(Away) ARI Cardinals vs. (Home) NO Saints
(Away) PIT Steelers vs. (Home) NYJ Jets
(Away) NYG Giants vs. (Home) WAS Commanders
(Away) TEN Titans vs. (Home) DEN Broncos
(Away) SF 49ers vs. (Home) SEA Seahawks
(Away) DET Lions vs. (Home) GB Packers
(Away) HOU Texans vs. (Home) LAR Rams
(Away) BAL Ravens vs. (Home) BUF Bills
(Away) MIN Vikings vs. (Home) CHI Bears
"""

# Quick prediction function for copy-paste game lists
def quick_predict_week1_games():
    """Quick function to predict Week 1 NFL games"""
    
    # Check if we have a trained predictor
    if 'predictor' in locals() or 'predictor' in globals():
        try:
            # Access predictor from global scope
            global predictor
            if predictor.final_model is not None:
                print("PREDICTING NFL WEEK 1 GAMES")
                print("="*50)
                
                results = predict_multiple_games(
                    predictor, 
                    week1_games, 
                    season=2025, 
                    week=1, 
                    show_details=True  # Show details to debug missing games
                )
                
                return results
            else:
                print("No trained model found. Train the model first.")
                return None
        except NameError:
            print("No predictor found. Train the model first.")
            return None
    else:
        print("No predictor found. Train the model first.")
        return None

print("MULTIPLE GAME PREDICTION SYSTEM READY!")
print("\nAvailable Functions:")
print("  predict_multiple_games(predictor, games_text, season, week)")
print("  quick_predict_week1_games()")
print("\nUsage:")
print("  # Predict all Week 1 games:")
print("  results = quick_predict_week1_games()")
print("\n  # Custom game list:")
print("  my_games = '(Away) KC vs. (Home) BUF\\n(Away) DAL vs. (Home) PHI'")
print("  predict_multiple_games(predictor, my_games, 2025, 1)")

MULTIPLE GAME PREDICTION SYSTEM READY!

Available Functions:
  predict_multiple_games(predictor, games_text, season, week)
  quick_predict_week1_games()

Usage:
  # Predict all Week 1 games:
  results = quick_predict_week1_games()

  # Custom game list:
  my_games = '(Away) KC vs. (Home) BUF\n(Away) DAL vs. (Home) PHI'
  predict_multiple_games(predictor, my_games, 2025, 1)


In [12]:
# RUN WEEK 1 GAME PREDICTIONS
print("EXECUTING NFL WEEK 1 PREDICTIONS")
print("="*50)

# Check if model is trained and ready
if 'predictor' in locals() and hasattr(predictor, 'final_model') and predictor.final_model is not None:
    print("Model is trained and ready!")
    print("Predicting all Week 1 games...")
    print()
    
    # Run predictions for all Week 1 games with concise output
    week1_results = predict_multiple_games(predictor, week1_games, season=2025, week=1, show_details=False)
    
    if week1_results is not None:
        print(f"Found {len(week1_results)} successful predictions out of 16 expected games")
        print("\nPREDICTED WINNERS:")
        print("="*50)
        
        # Game schedule for times
        game_schedule = {
            'DAL @ PHI': ('Thursday, September 4', '8:20 PM ET'),
            'KC @ LAC': ('Friday, September 5', '8:00 PM ET'),
            'TB @ ATL': ('Sunday, September 7', '1:00 PM ET'),
            'CIN @ CLE': ('Sunday, September 7', '1:00 PM ET'),
            'MIA @ IND': ('Sunday, September 7', '1:00 PM ET'),
            'CAR @ JAX': ('Sunday, September 7', '1:00 PM ET'),
            'LV @ NE': ('Sunday, September 7', '1:00 PM ET'),
            'ARI @ NO': ('Sunday, September 7', '1:00 PM ET'),
            'PIT @ NYJ': ('Sunday, September 7', '1:00 PM ET'),
            'NYG @ WAS': ('Sunday, September 7', '1:00 PM ET'),
            'TEN @ DEN': ('Sunday, September 7', '4:05 PM ET'),
            'SF @ SEA': ('Sunday, September 7', '4:05 PM ET'),
            'DET @ GB': ('Sunday, September 7', '4:25 PM ET'),
            'HOU @ LA': ('Sunday, September 7', '4:25 PM ET'),
            'BAL @ BUF': ('Sunday, September 7', '8:20 PM ET'),
            'MIN @ CHI': ('Monday, September 8', '8:15 PM ET')
        }
        
        # Display results in chronological order (not sorted by confidence)
        time_order = [
            'DAL @ PHI', 'KC @ LAC', 'TB @ ATL', 'CIN @ CLE', 
            'MIA @ IND', 'CAR @ JAX', 'LV @ NE', 'ARI @ NO',
            'PIT @ NYJ', 'NYG @ WAS', 'TEN @ DEN', 'SF @ SEA',
            'DET @ GB', 'HOU @ LA', 'BAL @ BUF', 'MIN @ CHI'
        ]
        
        for matchup in time_order:
            game_row = week1_results[week1_results['matchup'] == matchup]
            if not game_row.empty:
                game = game_row.iloc[0]
                time_info = game_schedule.get(matchup, ('', ''))
                day, time = time_info if time_info else ('', '')
                print(f"{day} {time}: {matchup} -> {game['predicted_winner']} ({game['confidence']:.1%})")
        
        print(f"\nSUMMARY:")
        print("-" * 30)
        print(f"Games Predicted: {len(week1_results)}")
        print(f"Average Confidence: {week1_results['confidence'].mean():.1%}")
        
        # FINAL RESULTS - Clean format for screenshots
        print(f"\n\nFINAL WEEK 1 PREDICTIONS")
        print("=" * 50)
        print(f"{'MATCHUP':<20} {'WINNER':<12} {'CONFIDENCE':<12}")
        print("-" * 50)
        
        # Team name mapping for display
        team_names = {
            'DAL': 'Cowboys', 'PHI': 'Eagles', 'KC': 'Chiefs', 'LAC': 'Chargers',
            'TB': 'Buccaneers', 'ATL': 'Falcons', 'CIN': 'Bengals', 'CLE': 'Browns',
            'MIA': 'Dolphins', 'IND': 'Colts', 'CAR': 'Panthers', 'JAX': 'Jaguars',
            'LV': 'Raiders', 'NE': 'Patriots', 'ARI': 'Cardinals', 'NO': 'Saints',
            'PIT': 'Steelers', 'NYJ': 'Jets', 'NYG': 'Giants', 'WAS': 'Commanders',
            'TEN': 'Titans', 'DEN': 'Broncos', 'SF': '49ers', 'SEA': 'Seahawks',
            'DET': 'Lions', 'GB': 'Packers', 'HOU': 'Texans', 'LA': 'Rams',
            'BAL': 'Ravens', 'BUF': 'Bills', 'MIN': 'Vikings', 'CHI': 'Bears'
        }
        
        # Sort by game time order for easy reading
        time_order = [
            'DAL @ PHI', 'KC @ LAC', 'TB @ ATL', 'CIN @ CLE', 
            'MIA @ IND', 'CAR @ JAX', 'LV @ NE', 'ARI @ NO',
            'PIT @ NYJ', 'NYG @ WAS', 'TEN @ DEN', 'SF @ SEA',
            'DET @ GB', 'HOU @ LA', 'BAL @ BUF', 'MIN @ CHI'
        ]
        
        # Display results in chronological order
        for matchup in time_order:
            game_row = week1_results[week1_results['matchup'] == matchup]
            if not game_row.empty:
                game = game_row.iloc[0]
                winner_abbr = game['predicted_winner']
                winner_name = team_names.get(winner_abbr, winner_abbr)
                print(f"{game['matchup']:<20} {winner_name:<12} {game['confidence']:.1%}")
        
        print("-" * 50)
        print(f"Total Games: {len(week1_results)} | Avg Confidence: {week1_results['confidence'].mean():.1%}")
        
        # Save results to variable for further analysis
        print(f"\nResults saved to 'week1_results' variable")
        
    else:
        print("ERROR: Failed to generate predictions")
        
else:
    print("ERROR: Model not trained yet!")
    print("\nPlease run cell 3 to train the model first")

EXECUTING NFL WEEK 1 PREDICTIONS
Model is trained and ready!
Predicting all Week 1 games...

MULTIPLE GAME PREDICTIONS - Week 1, 2025
Found 16 games to predict


PREDICTION SUMMARY
Successful predictions: 16/16

TOP CONFIDENT PREDICTIONS:
   DAL @ PHI -> PHI (93.3%)
   PIT @ NYJ -> NYJ (74.8%)
   TEN @ DEN -> DEN (72.2%)
Found 16 successful predictions out of 16 expected games

PREDICTED WINNERS:
Thursday, September 4 8:20 PM ET: DAL @ PHI -> PHI (93.3%)
Friday, September 5 8:00 PM ET: KC @ LAC -> LAC (63.5%)
Sunday, September 7 1:00 PM ET: TB @ ATL -> TB (54.0%)
Sunday, September 7 1:00 PM ET: CIN @ CLE -> CIN (58.5%)
Sunday, September 7 1:00 PM ET: MIA @ IND -> IND (62.7%)
Sunday, September 7 1:00 PM ET: CAR @ JAX -> JAX (67.3%)
Sunday, September 7 1:00 PM ET: LV @ NE -> NE (51.5%)
Sunday, September 7 1:00 PM ET: ARI @ NO -> NO (64.1%)
Sunday, September 7 1:00 PM ET: PIT @ NYJ -> NYJ (74.8%)
Sunday, September 7 1:00 PM ET: NYG @ WAS -> WAS (66.1%)
Sunday, September 7 4:05 PM ET: TEN 

In [14]:
import joblib

# Save the trained model

joblib.dump(predictor.final_model, 'final_model.joblib')

# To load later:
# predictor.final_model = joblib.load('nfl_data/final_model.joblib')

['final_model.joblib']

In [18]:
# MULTIPLE GAME PREDICTIONS - Predict entire game slates
import re
from datetime import datetime

def predict_multiple_games(predictor, games_text, season=2025, week=2, show_details=True):
    """
    Predict multiple games from formatted text
    
    Args:
        predictor: Trained NFLGamePredictor instance
        games_text: Formatted text with games (Away) TEAM vs. (Home) TEAM
        season: Season year
        week: Week number
        show_details: Whether to show detailed output for each game
    
    Returns:
        DataFrame with all predictions
    """
    
    if predictor.final_model is None:
        print("Model not trained yet. Please train the model first.")
        return None
    
    # Team abbreviation mapping for common variations
    team_mapping = {
        'TB Buccaneers': 'TB', 'Tampa Bay Buccaneers': 'TB', 'TB': 'TB',
        'ATL Falcons': 'ATL', 'Atlanta Falcons': 'ATL', 'ATL': 'ATL',
        'CIN Bengals': 'CIN', 'Cincinnati Bengals': 'CIN', 'CIN': 'CIN',
        'CLE Browns': 'CLE', 'Cleveland Browns': 'CLE', 'CLE': 'CLE',
        'MIA Dolphins': 'MIA', 'Miami Dolphins': 'MIA', 'MIA': 'MIA',
        'IND Colts': 'IND', 'Indianapolis Colts': 'IND', 'IND': 'IND',
        'CAR Panthers': 'CAR', 'Carolina Panthers': 'CAR', 'CAR': 'CAR',
        'JAX Jaguars': 'JAX', 'Jacksonville Jaguars': 'JAX', 'JAX': 'JAX',
        'LV Raiders': 'LV', 'Las Vegas Raiders': 'LV', 'LV': 'LV',
        'NE Patriots': 'NE', 'New England Patriots': 'NE', 'NE': 'NE',
        'ARI Cardinals': 'ARI', 'Arizona Cardinals': 'ARI', 'ARI': 'ARI',
        'NO Saints': 'NO', 'New Orleans Saints': 'NO', 'NO': 'NO',
        'PIT Steelers': 'PIT', 'Pittsburgh Steelers': 'PIT', 'PIT': 'PIT',
        'NYJ Jets': 'NYJ', 'New York Jets': 'NYJ', 'NYJ': 'NYJ',
        'NYG Giants': 'NYG', 'New York Giants': 'NYG', 'NYG': 'NYG',
        'WAS Commanders': 'WAS', 'Washington Commanders': 'WAS', 'WAS': 'WAS',
        'TEN Titans': 'TEN', 'Tennessee Titans': 'TEN', 'TEN': 'TEN',
        'DEN Broncos': 'DEN', 'Denver Broncos': 'DEN', 'DEN': 'DEN',
        'SF 49ers': 'SF', 'San Francisco 49ers': 'SF', 'SF': 'SF',
        'SEA Seahawks': 'SEA', 'Seattle Seahawks': 'SEA', 'SEA': 'SEA',
        'DET Lions': 'DET', 'Detroit Lions': 'DET', 'DET': 'DET',
        'GB Packers': 'GB', 'Green Bay Packers': 'GB', 'GB': 'GB',
        'HOU Texans': 'HOU', 'Houston Texans': 'HOU', 'HOU': 'HOU',
        'LA Rams': 'LA', 'Los Angeles Rams': 'LA', 'LA': 'LA',
        'LAR Rams': 'LA', 'Los Angeles Rams': 'LA', 'LAR': 'LA',  # Handle LAR variation
        'BAL Ravens': 'BAL', 'Baltimore Ravens': 'BAL', 'BAL': 'BAL',
        'BUF Bills': 'BUF', 'Buffalo Bills': 'BUF', 'BUF': 'BUF',
        'KC Chiefs': 'KC', 'Kansas City Chiefs': 'KC', 'KC': 'KC',
        'PHI Eagles': 'PHI', 'Philadelphia Eagles': 'PHI', 'PHI': 'PHI',
        'DAL Cowboys': 'DAL', 'Dallas Cowboys': 'DAL', 'DAL': 'DAL',
        'MIN Vikings': 'MIN', 'Minnesota Vikings': 'MIN', 'MIN': 'MIN',
        'CHI Bears': 'CHI', 'Chicago Bears': 'CHI', 'CHI': 'CHI',
        'LAC Chargers': 'LAC', 'Los Angeles Chargers': 'LAC', 'LAC': 'LAC'
    }
    
    
    # Parse games from text
    games = []
    game_pattern = r'\(Away\)\s+(.*?)\s+vs\.\s+\(Home\)\s+(.*)'
    
    matches = re.findall(game_pattern, games_text, re.IGNORECASE)
    
    if not matches:
        print("No games found in the provided text. Expected format:")
        print("(Away) TEAM vs. (Home) TEAM")
        return None
    
    print(f"MULTIPLE GAME PREDICTIONS - Week {week}, {season}")
    print("="*60)
    print(f"Found {len(matches)} games to predict")
    print()
    
    # Get team features once for all predictions - Use 2025 Week 1 data if available
    try:
        team_features = predictor.create_team_features(weekly_data, 2025, week)
        
        # If no 2025 data available (early in season), fall back to end of 2024
        if not team_features:
            print("No 2025 data available yet, using 2024 season-end data...")
            team_features = predictor.create_team_features(weekly_data, 2024, 19)
            data_source = "2024 season-end"
        else:
            print(f"Using 2025 data through Week {week-1}...")
            data_source = f"2025 Week {week-1}"
        
        if not team_features:
            print("Could not create team features")
            return None
        
        # Debug: Show available teams and data source
        if show_details:
            print(f"Data Source: {data_source}")
            print(f"Available teams in data: {sorted(list(team_features.keys()))}")
            print()
            
    except Exception as e:
        print(f"Error loading team data: {e}")
        print("Make sure weekly_data is available.")
        return None
    
    all_predictions = []
    successful_predictions = 0
    
    # Track current time slot for headers
    current_day = None
    current_time_slot = None
    
    for i, (away_full, home_full) in enumerate(matches, 1):
        away_full = away_full.strip()
        home_full = home_full.strip()
        
        # Map to abbreviations
        away_team = team_mapping.get(away_full, away_full.split()[-1] if away_full.split() else away_full)
        home_team = team_mapping.get(home_full, home_full.split()[-1] if home_full.split() else home_full)
        
        # Clean up team names (remove extra spaces, etc.)
        away_team = away_team.strip().upper()
        home_team = home_team.strip().upper()
        
        if show_details:
            # Get game time info for header
            matchup_key = f"{away_team} @ {home_team}"
            if matchup_key in game_schedule:
                day, time = game_schedule[matchup_key]
                
                # Display time slot header if it's a new time slot
                if day != current_day or time.split()[0] != current_time_slot:
                    if current_day is not None:  # Add spacing between time slots
                        print()
                    print(f"{day.upper()}")
                    print(f"{time}")
                    print("-" * 50)
                    current_day = day
                    current_time_slot = time.split()[0]  # Get just the time part
            
            print(f"Game {i}: {away_team} @ {home_team} (Processing...)")
        
        try:
            # Check if teams exist in our data
            if away_team not in team_features or home_team not in team_features:
                if show_details:
                    print(f"   Missing team data for {away_team} or {home_team}")
                    available_teams = list(team_features.keys())[:10]  # Show first 10
                    print(f"   Available teams (sample): {available_teams}")
                continue
            
            # Create game features
            game_features = predictor.create_game_features(
                home_team, away_team, team_features,
                season, week, is_playoff=False, is_neutral=False
            )
            
            if game_features is None:
                if show_details:
                    print(f"   Could not create game features")
                continue
            
            # Make prediction
            game_df = pd.DataFrame([game_features])
            prediction_result = predictor.predict_games(game_df)
            
            # Extract results for this specific game
            home_win_prob = prediction_result['home_win_prob'].iloc[0]
            predicted_home_win = prediction_result['predicted_home_win'].iloc[0]
            high_confidence = prediction_result['high_confidence_bet'].iloc[0]
            
            # Determine winner for this specific game
            winner = home_team if predicted_home_win == 1 else away_team
            confidence = home_win_prob if predicted_home_win == 1 else (1 - home_win_prob)
            
            # Store prediction
            prediction_data = {
                'game_num': i,
                'away_team': away_team,
                'home_team': home_team,
                'matchup': f"{away_team} @ {home_team}",
                'predicted_winner': winner,
                'confidence': confidence,
                'home_win_prob': home_win_prob,
                'away_win_prob': 1 - home_win_prob,
                'high_confidence': high_confidence
            }
            
            all_predictions.append(prediction_data)
            successful_predictions += 1
            
            if show_details:
                # Get game time info
                matchup_key = f"{away_team} @ {home_team}"
                if matchup_key in game_schedule:
                    day, time = game_schedule[matchup_key]
                    print(f"   Game {i} Complete: {away_team} @ {home_team}")
                    print(f"   {day} at {time}")
                    print(f"   Winner: {winner} ({confidence:.1%})")
                    print(f"   Home: {home_win_prob:.1%} | Away: {(1-home_win_prob):.1%}")
                else:
                    print(f"   Game {i} Complete: {away_team} @ {home_team}")
                    print(f"   Winner: {winner} ({confidence:.1%})")
                    print(f"   Home: {home_win_prob:.1%} | Away: {(1-home_win_prob):.1%}")
                print()
        
        except Exception as e:
            if show_details:
                print(f"   Error: {e}")
                print()
            continue
    
    if not all_predictions:
        print("No successful predictions made")
        return None
    
    # Create summary DataFrame
    predictions_df = pd.DataFrame(all_predictions)
    
    # Summary statistics
    print(f"\nPREDICTION SUMMARY")
    print("="*40)
    print(f"Successful predictions: {successful_predictions}/{len(matches)}")
    print(f"Data source: {data_source}")
    
    # Show top confident predictions
    print(f"\nTOP CONFIDENT PREDICTIONS:")
    top_predictions = predictions_df.nlargest(3, 'confidence')
    for _, game in top_predictions.iterrows():
        print(f"   {game['matchup']} -> {game['predicted_winner']} ({game['confidence']:.1%})")
    
    return predictions_df

# Example: NFL Week 3 Complete Schedule (Actual 2025 Schedule)
week4_games = """
(Away) Seattle Seahawks vs. (Home) Arizona Cardinals
(Away) Minnesota Vikings vs. (Home) Pittsburgh Steelers
(Away) Washington Commanders vs. (Home) Atlanta Falcons
(Away) Los Angeles Chargers vs. (Home) New York Giants
(Away) Tennessee Titans vs. (Home) Houston Texans
(Away) Philadelphia Eagles vs. (Home) Tampa Bay Buccaneers
(Away) Carolina Panthers vs. (Home) New England Patriots
(Away) New Orleans Saints vs. (Home) Buffalo Bills
(Away) Cleveland Browns vs. (Home) Detroit Lions
(Away) Jacksonville Jaguars vs. (Home) San Francisco 49ers
(Away) Indianapolis Colts vs. (Home) Los Angeles Rams
(Away) Chicago Bears vs. (Home) Las Vegas Raiders
(Away) Baltimore Ravens vs. (Home) Kansas City Chiefs
(Away) Green Bay Packers vs. (Home) Dallas Cowboys
(Away) New York Jets vs. (Home) Miami Dolphins
(Away) Cincinnati Bengals vs. (Home) Denver Broncos
"""



# Quick prediction function for copy-paste game lists
def quick_predict_week2_games():
    """Quick function to predict Week 2 NFL games"""
    
    # Check if we have a trained predictor
    if 'predictor' in locals() or 'predictor' in globals():
        try:
            # Access predictor from global scope
            global predictor
            if predictor.final_model is not None:
                print("PREDICTING NFL WEEK 2 GAMES")
                print("="*50)
                
                results = predict_multiple_games(
                    predictor, 
                    week4_games, 
                    season=2025, 
                    week=3, 
                    show_details=True  # Show details to debug missing games
                )
                
                return results
            else:
                print("No trained model found. Train the model first.")
                return None
        except NameError:
            print("No predictor found. Train the model first.")
            return None
    else:
        print("No predictor found. Train the model first.")
        return None

def quick_predict_week1_games():
    """Quick function to predict Week 1 NFL games"""
    
    # Check if we have a trained predictor
    if 'predictor' in locals() or 'predictor' in globals():
        try:
            # Access predictor from global scope
            global predictor
            if predictor.final_model is not None:
                print("PREDICTING NFL WEEK 1 GAMES")
                print("="*50)
                
                results = predict_multiple_games(
                    predictor, 
                    season=2025, 
                    week=1, 
                    show_details=True  # Show details to debug missing games
                )
                
                return results
            else:
                print("No trained model found. Train the model first.")
                return None
        except NameError:
            print("No predictor found. Train the model first.")
            return None
    else:
        print("No predictor found. Train the model first.")
        return None

print("MULTIPLE GAME PREDICTION SYSTEM READY!")
print("\nAvailable Functions:")
print("  predict_multiple_games(predictor, games_text, season, week)")
print("  quick_predict_week3_games()  # NEW: Week 3 predictions")
print("\nUsage:")
print("  # Predict all Week 3 games:")
print("  results = quick_predict_week3_games()")
print("\n  # Custom game list:")
print("  my_games = '(Away) KC vs. (Home) BUF\\n(Away) DAL vs. (Home) PHI'")
print("  predict_multiple_games(predictor, my_games, 2025, 2)")

MULTIPLE GAME PREDICTION SYSTEM READY!

Available Functions:
  predict_multiple_games(predictor, games_text, season, week)
  quick_predict_week3_games()  # NEW: Week 3 predictions

Usage:
  # Predict all Week 3 games:
  results = quick_predict_week3_games()

  # Custom game list:
  my_games = '(Away) KC vs. (Home) BUF\n(Away) DAL vs. (Home) PHI'
  predict_multiple_games(predictor, my_games, 2025, 2)


In [20]:
# RUN WEEK 3 GAME PREDICTIONS
print("EXECUTING NFL WEEK 3 PREDICTIONS")
print("="*50)

# Check if model is trained and ready
if 'predictor' in locals() and hasattr(predictor, 'final_model') and predictor.final_model is not None:
    print("Model is trained and ready!")
    print("Predicting all Week 3 games...")
    print()

    # Run predictions for all Week 3 games with concise output
    week4_results = predict_multiple_games(predictor, week4_games, season=2025, week=4, show_details=False)

    if week4_results is not None:
        print(f"Found {len(week4_results)} successful predictions out of 16 expected games")
        print("\nPREDICTED WINNERS:")
        print("="*50)

        # Week 3 game schedule with times (Eastern Time) - September 2025
        game_schedule = {
            'SEA @ ARI': ('Thursday, September 25', '8:15 PM ET'),
            'MIN @ PIT': ('Sunday, September 28', '9:30 AM ET'),
            'WAS @ ATL': ('Sunday, September 28', '1:00 PM ET'),
            'NO @ BUF': ('Sunday, September 28', '1:00 PM ET'),
            'CLE @ DET': ('Sunday, September 28', '1:00 PM ET'),
            'TEN @ HOU': ('Sunday, September 28', '1:00 PM ET'),
            'CAR @ NE': ('Sunday, September 28', '1:00 PM ET'),
            'LAC @ NYG': ('Sunday, September 28', '1:00 PM ET'),
            'PHI @ TB': ('Sunday, September 28', '1:00 PM ET'),
            'IND @ LA': ('Sunday, September 28', '4:05 PM ET'),
            'JAX @ SF': ('Sunday, September 28', '4:05 PM ET'),
            'BAL @ KC': ('Sunday, September 28', '4:25 PM ET'),
            'CHI @ LV': ('Sunday, September 28', '4:25 PM ET'),
            'GB @ DAL': ('Sunday, September 28', '8:20 PM ET'),
            'NYJ @ MIA': ('Monday, September 29', '7:15 PM ET'),
            'CIN @ DEN': ('Monday, September 29', '8:15 PM ET')
        }

        
        # Display results in chronological order (not sorted by confidence)
        time_order = [
            'SEA @ ARI',
            'MIN @ PIT',
            'WAS @ ATL', 'NO @ BUF', 'CLE @ DET', 'TEN @ HOU',
            'CAR @ NE', 'LAC @ NYG', 'PHI @ TB',
            'IND @ LA', 'JAX @ SF',
            'BAL @ KC', 'CHI @ LV',
            'GB @ DAL',
            'NYJ @ MIA',
            'CIN @ DEN'
        ]
                
        for matchup in time_order:
            game_row = week4_results[week4_results['matchup'] == matchup]
            if not game_row.empty:
                game = game_row.iloc[0]
                time_info = game_schedule.get(matchup, ('', ''))
                day, time = time_info if time_info else ('', '')
                print(f"{day} {time}: {matchup} -> {game['predicted_winner']} ({game['confidence']:.1%})")
        
        print(f"\nSUMMARY:")
        print("-" * 30)
        print(f"Games Predicted: {len(week4_results)}")
        print(f"Average Confidence: {week4_results['confidence'].mean():.1%}")

        # FINAL RESULTS - Clean format for screenshots
        print(f"\n\nFINAL WEEK 4 PREDICTIONS")
        print("=" * 50)
        print(f"{'MATCHUP':<20} {'WINNER':<12} {'CONFIDENCE':<12}")
        print("-" * 50)
        
        # Team name mapping for display
        team_names = {
            'WAS': 'Commanders', 'GB': 'Packers', 'NYG': 'Giants', 'DAL': 'Cowboys',
            'SEA': 'Seahawks', 'PIT': 'Steelers', 'LA': 'Rams', 'TEN': 'Titans',
            'BUF': 'Bills', 'NYJ': 'Jets', 'NE': 'Patriots', 'MIA': 'Dolphins',
            'JAX': 'Jaguars', 'CIN': 'Bengals', 'SF': '49ers', 'NO': 'Saints',
            'CLE': 'Browns', 'BAL': 'Ravens', 'CHI': 'Bears', 'DET': 'Lions',
            'DEN': 'Broncos', 'IND': 'Colts', 'CAR': 'Panthers', 'ARI': 'Cardinals',
            'PHI': 'Eagles', 'KC': 'Chiefs', 'ATL': 'Falcons', 'MIN': 'Vikings',
            'TB': 'Buccaneers', 'HOU': 'Texans', 'LAC': 'Chargers', 'LV': 'Raiders'
        }
        
        # Display results in chronological order
        for matchup in time_order:
            game_row = week4_results[week4_results['matchup'] == matchup]
            if not game_row.empty:
                game = game_row.iloc[0]
                winner_abbr = game['predicted_winner']
                winner_name = team_names.get(winner_abbr, winner_abbr)
                print(f"{game['matchup']:<20} {winner_name:<12} {game['confidence']:.1%}")
        
        print("-" * 50)
        print(f"Total Games: {len(week4_results)} | Avg Confidence: {week4_results['confidence'].mean():.1%}")
        
        # Save results to variable for further analysis
        print(f"\nResults saved to 'week4_results' variable")

    else:
        print("ERROR: Failed to generate predictions")
        
else:
    print("ERROR: Model not trained yet!")
    print("\nPlease run Step 2 to train the model first")

EXECUTING NFL WEEK 3 PREDICTIONS
Model is trained and ready!
Predicting all Week 3 games...

MULTIPLE GAME PREDICTIONS - Week 4, 2025
Found 16 games to predict

No 2025 data available yet, using 2024 season-end data...

PREDICTION SUMMARY
Successful predictions: 16/16
Data source: 2024 season-end

TOP CONFIDENT PREDICTIONS:
   NO @ BUF -> BUF (73.3%)
   CLE @ DET -> DET (69.4%)
   LAC @ NYG -> NYG (63.4%)
Found 16 successful predictions out of 16 expected games

PREDICTED WINNERS:
Thursday, September 25 8:15 PM ET: SEA @ ARI -> ARI (55.8%)
Sunday, September 28 9:30 AM ET: MIN @ PIT -> MIN (57.1%)
Sunday, September 28 1:00 PM ET: WAS @ ATL -> WAS (54.7%)
Sunday, September 28 1:00 PM ET: NO @ BUF -> BUF (73.3%)
Sunday, September 28 1:00 PM ET: CLE @ DET -> DET (69.4%)
Sunday, September 28 1:00 PM ET: TEN @ HOU -> HOU (62.1%)
Sunday, September 28 1:00 PM ET: CAR @ NE -> NE (51.5%)
Sunday, September 28 1:00 PM ET: LAC @ NYG -> NYG (63.4%)
Sunday, September 28 1:00 PM ET: PHI @ TB -> PHI (5