In [12]:
# Phase 3: Weekly Lineup Optimization Setup
# Create this in a new notebook: 03_weekly_optimization.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.optimize import linprog
import warnings
warnings.filterwarnings('ignore')
import nfl_data_py as nfl

print("PHASE 3: WEEKLY LINEUP OPTIMIZATION")
print("=" * 50)

# Load the dataset
df = pd.read_pickle('../data/fantasy_data_2023.pkl')

# Create metadata
metadata = {
    'positions': sorted(df['position'].unique().tolist()),
    'weeks_covered': sorted(df['week'].unique().tolist()),
    'total_records': len(df),
    'unique_players': df['player_display_name'].nunique()
}

print(f"Dataset loaded: {df.shape[0]:,} records")
print(f"Positions: {metadata['positions']}")
print(f"Weeks: {len(metadata['weeks_covered'])} weeks")

# Your league lineup requirements (10 starters)
lineup_requirements = {
    'QB': 1,
    'RB': 2,
    'WR': 2,
    'TE': 1,
    'FLEX1': 1,  # RB/WR/TE
    'FLEX2': 1,  # RB/WR/TE
    'K': 1,
    'DEF': 1
}

print(f"\nYour league lineup requirements: {lineup_requirements}")
print(f"Total starters: {sum(lineup_requirements.values())}")

# Define flex eligibility
flex_eligible_positions = ['RB', 'WR', 'TE']
print(f"FLEX eligible positions: {flex_eligible_positions}")

# potentially start:
print(f"\nPossible lineup combinations:")
print(f"- Max RBs: 4 (2 RB + 2 FLEX)")
print(f"- Max WRs: 4 (2 WR + 2 FLEX)")  
print(f"- Max TEs: 3 (1 TE + 2 FLEX)")
print(f"- Always: 1 QB, 1 K, 1 DEF")


PHASE 3: WEEKLY LINEUP OPTIMIZATION
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Weeks: 22 weeks

Your league lineup requirements: {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'FLEX1': 1, 'FLEX2': 1, 'K': 1, 'DEF': 1}
Total starters: 10
FLEX eligible positions: ['RB', 'WR', 'TE']

Possible lineup combinations:
- Max RBs: 4 (2 RB + 2 FLEX)
- Max WRs: 4 (2 WR + 2 FLEX)
- Max TEs: 3 (1 TE + 2 FLEX)
- Always: 1 QB, 1 K, 1 DEF


# Weekly Performance Features

In [None]:


def create_weekly_features(df):
    """Create features for predicting weekly performance"""
    
    # Sort by player and week
    df_sorted = df.sort_values(['player_display_name', 'week'])
    
    # Rolling averages (last N games)
    for window in [3, 5]:
        df_sorted[f'rolling_avg_{window}w'] = df_sorted.groupby('player_display_name')['custom_fantasy_points'].transform(
            lambda x: x.rolling(window=window, min_periods=1).mean().shift(1)
        )
    
    # Recent trend (slope of last 3 games)
    def calculate_recent_trend(group):
        trends = []
        for i in range(len(group)):
            if i < 2:
                trends.append(0)
            else:
                recent_points = group['custom_fantasy_points'].iloc[max(0, i-2):i+1].values
                if len(recent_points) >= 2:
                    x = np.arange(len(recent_points))
                    slope = np.polyfit(x, recent_points, 1)[0]
                    trends.append(slope)
                else:
                    trends.append(0)
        return pd.Series(trends, index=group.index)
    
    df_sorted['recent_trend'] = df_sorted.groupby('player_display_name').apply(calculate_recent_trend).reset_index(drop=True)
    
    # Games since last big game (>20 points)
    df_sorted['games_since_boom'] = 0
    for player in df_sorted['player_display_name'].unique():
        player_mask = df_sorted['player_display_name'] == player
        player_data = df_sorted[player_mask].copy()
        
        games_since = 0
        for idx in player_data.index:
            df_sorted.loc[idx, 'games_since_boom'] = games_since
            if df_sorted.loc[idx, 'custom_fantasy_points'] > 20:
                games_since = 0
            else:
                games_since += 1
    
    # Season week (early/mid/late season effects)
    df_sorted['season_week'] = df_sorted['week']
    df_sorted['is_early_season'] = (df_sorted['week'] <= 6).astype(int)
    df_sorted['is_late_season'] = (df_sorted['week'] >= 15).astype(int)
    
    return df_sorted

def create_matchup_features(df):
    """Create opponent/matchup-based features"""
    
    # This would ideally use strength of schedule data
    # For now, we'll create simplified features
    
    # Opponent team (we'll encode this later)
    df['opponent'] = df['opponent_team'] if 'opponent_team' in df.columns else 'Unknown'
    
    # Home/away (if available in your data)
    # For now, we'll create a placeholder
    df['is_home'] = np.random.choice([0, 1], size=len(df))  # Placeholder
    
    return df

def create_target_variables(df):
    """Create different target variables for different prediction tasks"""
    
    # Main target: actual fantasy points
    df['target_points'] = df['custom_fantasy_points']
    
    # Binary targets for different thresholds
    df['hit_projection'] = (df['custom_fantasy_points'] > df['custom_fantasy_points'].mean()).astype(int)
    df['boom_game'] = (df['custom_fantasy_points'] > 20).astype(int)  # Arbitrary threshold
    df['bust_game'] = (df['custom_fantasy_points'] < 5).astype(int)   # Arbitrary threshold
    
    return df

def prepare_modeling_data(df):
    """Prepare data for weekly prediction models"""
    
    # Feature engineering
    print("Creating weekly features...")
    df_features = create_weekly_features(df)
    
    print("Creating matchup features...")
    df_features = create_matchup_features(df_features)
    
    print("Creating target variables...")
    df_features = create_target_variables(df_features)
    
    # Filter out first few weeks per player (need history for features)
    df_modeling = df_features.groupby('player_display_name').apply(
        lambda x: x.iloc[2:] if len(x) > 2 else x.iloc[0:0]
    ).reset_index(drop=True)
    
    print(f"Modeling dataset shape: {df_modeling.shape}")
    print(f"Features available for weeks {df_modeling['week'].min()}-{df_modeling['week'].max()}")
    
    return df_modeling

def build_weekly_prediction_models(df_modeling):
    """Build models to predict weekly performance"""
    
    # Feature columns for modeling
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    # Remove any rows with missing features
    df_clean = df_modeling.dropna(subset=feature_cols + ['target_points'])
    
    if len(df_clean) == 0:
        print("No valid data for modeling")
        return None, None
    
    print(f"Clean modeling data: {len(df_clean)} records")
    
    # Build position-specific models
    models = {}
    model_metrics = {}
    
    for position in ['QB', 'RB', 'WR', 'TE']:
        pos_data = df_clean[df_clean['position'] == position]
        
        if len(pos_data) < 50:  # Need minimum samples
            print(f"Insufficient data for {position}: {len(pos_data)} samples")
            continue
        
        print(f"Building {position} model with {len(pos_data)} samples...")
        
        X = pos_data[feature_cols]
        y = pos_data['target_points']
        
        # Split data (use later weeks for testing)
        split_week = pos_data['week'].quantile(0.7)
        train_mask = pos_data['week'] <= split_week
        
        X_train, X_test = X[train_mask], X[~train_mask]
        y_train, y_test = y[train_mask], y[~train_mask]
        
        if len(X_test) == 0:
            print(f"No test data for {position}")
            continue
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Train model
        model = GradientBoostingRegressor(n_estimators=100, random_state=42)
        model.fit(X_train_scaled, y_train)
        
        # Predictions
        y_pred = model.predict(X_test_scaled)
        
        # Metrics
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        
        models[position] = {'model': model, 'scaler': scaler}
        model_metrics[position] = {'MAE': mae, 'RMSE': rmse, 'samples': len(pos_data)}
        
        print(f"{position} - MAE: {mae:.2f}, RMSE: {rmse:.2f}")
    
    return models, model_metrics

# Execute the weekly prediction pipeline
print("Preparing data for weekly prediction models...")
df_modeling = prepare_modeling_data(df)

print("\nBuilding weekly prediction models...")
weekly_models, metrics = build_weekly_prediction_models(df_modeling)

if weekly_models:
    print(f"\nWeekly prediction models built for {list(weekly_models.keys())}")
    print("\nModel performance:")
    for pos, metric in metrics.items():
        print(f"{pos}: MAE={metric['MAE']:.2f}, RMSE={metric['RMSE']:.2f} ({metric['samples']} samples)")
else:
    print("No weekly models could be built")

print(f"\nNext: Lineup optimization algorithms")

Preparing data for weekly prediction models...
Creating weekly features...
Creating matchup features...
Creating target variables...
Modeling dataset shape: (5481, 19)
Features available for weeks 3-22

Building weekly prediction models...
Clean modeling data: 5481 records
Building QB model with 535 samples...
QB - MAE: 6.15, RMSE: 7.83
Building RB model with 1146 samples...
RB - MAE: 5.18, RMSE: 6.92
Building WR model with 1910 samples...
WR - MAE: 5.34, RMSE: 7.28
Building TE model with 899 samples...
TE - MAE: 4.27, RMSE: 5.55

Weekly prediction models built for ['QB', 'RB', 'WR', 'TE']

Model performance:
QB: MAE=6.15, RMSE=7.83 (535 samples)
RB: MAE=5.18, RMSE=6.92 (1146 samples)
WR: MAE=5.34, RMSE=7.28 (1910 samples)
TE: MAE=4.27, RMSE=5.55 (899 samples)

Next: Lineup optimization algorithms


In [21]:
# Improved Lineup Optimization

def improved_lineup_optimization(predictions_df, lineup_requirements):
    """Improved lineup optimization with clearer FLEX handling"""
    
    if len(predictions_df) == 0:
        return None
    
    # Sort players by predicted points
    available = predictions_df.copy().sort_values('predicted_points', ascending=False)
    lineup = []
    
    # Track what we still need
    needs = {
        'QB': lineup_requirements['QB'],
        'RB': lineup_requirements['RB'], 
        'WR': lineup_requirements['WR'],
        'TE': lineup_requirements['TE'],
        'K': lineup_requirements['K'],
        'DEF': lineup_requirements['DEF'],
        'FLEX': 2  # 2 FLEX spots
    }
    
    print("Starting lineup optimization...")
    print(f"Available players: {len(available)}")
    
    # Step 1: Fill required positions (excluding FLEX-eligible positions for now)
    for position in ['QB', 'K', 'DEF']:
        needed = needs[position]
        pos_players = available[available['position'] == position].head(needed)
        
        for _, player in pos_players.iterrows():
            lineup.append({
                'player_name': player['player_name'],
                'position': player['position'],
                'predicted_points': player['predicted_points'],
                'lineup_slot': position
            })
            available = available[available['player_name'] != player['player_name']]
            needs[position] -= 1
        
        print(f"Filled {position}: {len(pos_players)} players")
    
    # Step 2: Fill minimum required RB, WR, TE
    for position in ['RB', 'WR', 'TE']:
        needed = needs[position]
        pos_players = available[available['position'] == position].head(needed)
        
        for _, player in pos_players.iterrows():
            lineup.append({
                'player_name': player['player_name'],
                'position': player['position'],
                'predicted_points': player['predicted_points'],
                'lineup_slot': position
            })
            available = available[available['player_name'] != player['player_name']]
            needs[position] -= 1
        
        print(f"Filled required {position}: {len(pos_players)} players")
    
    # Step 3: Fill FLEX spots with best remaining RB/WR/TE
    flex_eligible = available[available['position'].isin(['RB', 'WR', 'TE'])]
    flex_players = flex_eligible.head(needs['FLEX'])
    
    for i, (_, player) in enumerate(flex_players.iterrows()):
        lineup.append({
            'player_name': player['player_name'],
            'position': player['position'], 
            'predicted_points': player['predicted_points'],
            'lineup_slot': f'FLEX{i+1}'
        })
    
    print(f"Filled FLEX: {len(flex_players)} players")
    
    return lineup

def create_detailed_lineup_summary(lineup):
    """Create detailed lineup summary with clear position assignments"""
    
    if not lineup:
        return "No valid lineup found"
    
    lineup_df = pd.DataFrame(lineup)
    total_points = lineup_df['predicted_points'].sum()
    
    summary = f"\n🏈 OPTIMIZED LINEUP - Week {lineup_df.iloc[0].get('week', 'TBD')}\n"
    summary += f"💰 Projected Points: {total_points:.1f}\n"
    summary += "=" * 60 + "\n"
    
    # Order by lineup slot for clear display
    slot_order = ['QB', 'RB', 'WR', 'TE', 'FLEX1', 'FLEX2', 'K', 'DEF']
    
    for slot in slot_order:
        slot_players = lineup_df[lineup_df['lineup_slot'] == slot]
        if len(slot_players) > 0:
            for _, player in slot_players.iterrows():
                actual_pos = f"({player['position']})" if slot.startswith('FLEX') else ""
                summary += f"{slot:>5} {actual_pos:<4}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
    
    # Show bench players (if any)
    summary += "\n" + "=" * 60 + "\n"
    summary += f"💡 Lineup Strategy Analysis:\n"
    
    # Count positions in lineup
    pos_counts = lineup_df['position'].value_counts()
    summary += f"   RBs starting: {pos_counts.get('RB', 0)}\n"
    summary += f"   WRs starting: {pos_counts.get('WR', 0)}\n" 
    summary += f"   TEs starting: {pos_counts.get('TE', 0)}\n"
    
    return summary

def start_sit_recommendations(predictions_df, lineup):
    """Create start/sit recommendations"""
    
    if not lineup:
        return "No lineup to analyze"
    
    lineup_df = pd.DataFrame(lineup)
    starting_players = set(lineup_df['player_name'])
    
    # Find bench players
    bench_players = predictions_df[~predictions_df['player_name'].isin(starting_players)]
    
    recommendations = "\n🤔 START/SIT RECOMMENDATIONS\n"
    recommendations += "=" * 60 + "\n"
    
    if len(bench_players) > 0:
        recommendations += "💺 BENCH:\n"
        for _, player in bench_players.sort_values('predicted_points', ascending=False).iterrows():
            recommendations += f"   {player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
        
        # Check for potential position swaps
        recommendations += "\n💭 POTENTIAL SWAPS:\n"
        
        # Compare bench players to starting players of same position
        for _, bench_player in bench_players.iterrows():
            pos = bench_player['position']
            if pos in ['RB', 'WR', 'TE']:  # FLEX eligible
                # Find lowest scoring starter in same position or FLEX
                same_pos_starters = lineup_df[
                    (lineup_df['position'] == pos) | 
                    (lineup_df['lineup_slot'].str.startswith('FLEX'))
                ]
                
                if len(same_pos_starters) > 0:
                    lowest_starter = same_pos_starters.loc[same_pos_starters['predicted_points'].idxmin()]
                    
                    if bench_player['predicted_points'] > lowest_starter['predicted_points']:
                        point_diff = bench_player['predicted_points'] - lowest_starter['predicted_points']
                        recommendations += f"   Consider: {bench_player['player_name']} (+{point_diff:.1f}) over {lowest_starter['player_name']}\n"
    
    return recommendations

def weekly_lineup_optimizer(roster_players, week, weekly_models, df_modeling, lineup_requirements):
    """Complete weekly lineup optimization workflow"""
    
    print(f"WEEKLY LINEUP OPTIMIZER - WEEK {week}")
    print("=" * 60)
    
    # Step 1: Get predictions
    predictions = predict_weekly_performance(roster_players, week, weekly_models, df_modeling)
    
    if len(predictions) == 0:
        return "No predictions available for any roster players"
    
    print(f"✅ Generated predictions for {len(predictions)} players")
    
    # Step 2: Optimize lineup
    optimal_lineup = improved_lineup_optimization(predictions, lineup_requirements)
    
    if not optimal_lineup:
        return "Could not create optimal lineup"
    
    # Step 3: Create summaries
    lineup_summary = create_detailed_lineup_summary(optimal_lineup)
    start_sit = start_sit_recommendations(predictions, optimal_lineup)
    
    # Step 4: Show all predictions for reference
    all_predictions = "\n📊 ALL PLAYER PROJECTIONS\n"
    all_predictions += "=" * 60 + "\n"
    sorted_preds = predictions.sort_values('predicted_points', ascending=False)
    for _, player in sorted_preds.iterrows():
        all_predictions += f"{player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
    
    return lineup_summary + start_sit + all_predictions

# Test the improved system
example_roster = [
    'Brock Purdy', 'Raheem Blackshear', 'Saquon Barkley', 'Justin Jefferson', 
    'Jaxon Smith-Njigba', 'Tucker Kraft', 'Devante Adams', 'Rhamondre Stevenson',
    'Cameron Dicker', 'Buffalo', 'Tua Tagovailoa', 'Chuba Hubbard', 'Cooper Kupp',
    'Darnell Mooney','Jaylen Waddle'
]

result = weekly_lineup_optimizer(example_roster, 10, weekly_models, df_modeling, lineup_requirements)
print(result)

WEEKLY LINEUP OPTIMIZER - WEEK 10
✅ Generated predictions for 11 players
Starting lineup optimization...
Available players: 11
Filled QB: 1 players
Filled K: 0 players
Filled DEF: 0 players
Filled required RB: 2 players
Filled required WR: 2 players
Filled required TE: 0 players
Filled FLEX: 2 players

🏈 OPTIMIZED LINEUP - Week TBD
💰 Projected Points: 93.9
   QB     : Tua Tagovailoa         14.8 pts
   RB     : Rhamondre Stevenson    15.5 pts
   RB     : Saquon Barkley         13.8 pts
   WR     : Justin Jefferson       14.1 pts
   WR     : Jaylen Waddle          12.3 pts
FLEX1 (WR): Jaxon Smith-Njigba     11.7 pts
FLEX2 (WR): Cooper Kupp            11.7 pts

💡 Lineup Strategy Analysis:
   RBs starting: 2
   WRs starting: 4
   TEs starting: 0

🤔 START/SIT RECOMMENDATIONS
💺 BENCH:
   QB: Brock Purdy            14.7 pts
   RB: Chuba Hubbard           8.3 pts
   RB: Raheem Blackshear       5.7 pts
   WR: Darnell Mooney          5.1 pts

💭 POTENTIAL SWAPS:

📊 ALL PLAYER PROJECTIONS
RB: Rha

In [10]:
# Complete K and DEF Weekly Predictions

def add_k_def_to_weekly_features(df_features):
    """Add kicker and defense data to weekly features dataset"""
    
    print("Adding kickers and defenses to weekly features...")
    
    # We need to create K and DEF data from our original datasets
    # Load the kicker and defense data we created in Phase 1
    
    # For kickers - we need to recreate from play-by-play data
    # For defenses - we need to recreate from our defense dataset
    
    # Placeholder kicker data (you'd replace with actual kicker stats)
    kicker_data = []
    for week in range(1, 23):
        for kicker in ['Tyler Bass', 'Justin Tucker', 'Harrison Butker', 'Daniel Carlson', 'Younghoe Koo']:
            kicker_data.append({
                'player_display_name': kicker,
                'position': 'K',
                'week': week,
                'season': 2023,
                'custom_fantasy_points': np.random.normal(8, 4),  # Realistic kicker scoring
                'season_week': week,
                'is_early_season': 1 if week <= 6 else 0,
                'is_late_season': 1 if week >= 15 else 0,
                'is_home': np.random.choice([0, 1])
            })
    
    # Placeholder defense data  
    teams = ['DAL', 'SF', 'BUF', 'MIA', 'PHI', 'BAL', 'NYJ', 'CLE', 'DEN', 'PIT', 
             'KC', 'LAC', 'NE', 'LV', 'JAX', 'IND', 'HOU', 'TEN', 'CIN', 'MIN',
             'DET', 'GB', 'CHI', 'LAR', 'SEA', 'ARI', 'ATL', 'CAR', 'NO', 'TB',
             'WAS', 'NYG']
    
    defense_data = []
    for week in range(1, 23):
        for team in teams:
            defense_data.append({
                'player_display_name': team,
                'position': 'DEF',
                'week': week,
                'season': 2023,
                'custom_fantasy_points': np.random.normal(8, 6),  # Realistic defense scoring
                'season_week': week,
                'is_early_season': 1 if week <= 6 else 0,
                'is_late_season': 1 if week >= 15 else 0,
                'is_home': np.random.choice([0, 1])
            })
    
    # Convert to DataFrames
    kicker_df = pd.DataFrame(kicker_data)
    defense_df = pd.DataFrame(defense_data)
    
    # Add rolling averages for K and DEF
    for df_temp in [kicker_df, defense_df]:
        df_temp = df_temp.sort_values(['player_display_name', 'week'])
        for window in [3, 5]:
            df_temp[f'rolling_avg_{window}w'] = df_temp.groupby('player_display_name')['custom_fantasy_points'].transform(
                lambda x: x.rolling(window=window, min_periods=1).mean()
            )
        
        # Add other features
        df_temp['recent_trend'] = 0  # Simplified
        df_temp['games_since_boom'] = 0  # Simplified
    
    # Combine with existing features
    combined_df = pd.concat([df_features, kicker_df, defense_df], ignore_index=True)
    
    print(f"Added kickers: {len(kicker_df)} records")
    print(f"Added defenses: {len(defense_df)} records") 
    print(f"Total combined: {len(combined_df)} records")
    
    return combined_df

def build_k_def_prediction_models(df_complete):
    """Build prediction models for kickers and defenses"""
    
    print("Building K and DEF prediction models...")
    
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    models = {}
    
    for position in ['K', 'DEF']:
        pos_data = df_complete[df_complete['position'] == position]
        pos_data = pos_data.dropna(subset=feature_cols + ['custom_fantasy_points'])
        
        if len(pos_data) < 50:
            print(f"Insufficient data for {position}: {len(pos_data)} samples")
            continue
        
        print(f"Building {position} model with {len(pos_data)} samples...")
        
        X = pos_data[feature_cols]
        y = pos_data['custom_fantasy_points']
        
        # Simple train/test split
        split_idx = int(len(pos_data) * 0.7)
        X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
        y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
        
        # Scale and train
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        model = GradientBoostingRegressor(n_estimators=50, random_state=42)
        model.fit(X_train_scaled, y_train)
        
        # Test
        y_pred = model.predict(X_test_scaled)
        mae = mean_absolute_error(y_test, y_pred)
        
        models[position] = {'model': model, 'scaler': scaler}
        print(f"{position} - MAE: {mae:.2f}")
    
    return models

def predict_k_def_performance(player_roster, week, all_models, df_complete):
    """Predict K and DEF performance alongside other positions"""
    
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    predictions = []
    
    for player_name in player_roster:
        # Get player's recent data
        player_data = df_complete[
            (df_complete['player_display_name'] == player_name) & 
            (df_complete['week'] < week)
        ].sort_values('week')
        
        if len(player_data) == 0:
            continue
            
        position = player_data['position'].iloc[-1]
        
        if position not in all_models:
            continue
        
        # Create features for prediction week
        latest_data = player_data.iloc[-1].copy()
        latest_data['season_week'] = week
        latest_data['is_early_season'] = 1 if week <= 6 else 0
        latest_data['is_late_season'] = 1 if week >= 15 else 0
        latest_data['is_home'] = np.random.choice([0, 1])
        
        # Extract features
        features = latest_data[feature_cols].values.reshape(1, -1)
        
        if pd.isna(features).any():
            continue
        
        # Make prediction
        model_info = all_models[position]
        features_scaled = model_info['scaler'].transform(features)
        predicted_points = model_info['model'].predict(features_scaled)[0]
        
        predictions.append({
            'player_name': player_name,
            'position': position,
            'predicted_points': max(0, predicted_points),
            'week': week
        })
    
    return pd.DataFrame(predictions)

def complete_lineup_optimization_with_k_def():
    """Complete lineup optimization including K and DEF"""
    
    print("Creating complete weekly prediction system with K and DEF...")
    
    # Add K and DEF to our feature dataset
    df_complete = add_k_def_to_weekly_features(df_modeling)
    
    # Build K and DEF models
    k_def_models = build_k_def_prediction_models(df_complete)
    
    # Combine with existing models
    all_models = {**weekly_models, **k_def_models}
    
    print(f"All models available: {list(all_models.keys())}")
    
    # Test with complete roster including K and DEF
    complete_roster = [
        'Josh Allen', 'Christian McCaffrey', 'Saquon Barkley', 'Tyreek Hill', 
        'Ja\'Marr Chase', 'Travis Kelce', 'Amon-Ra St. Brown', 'Kenneth Walker',
        'Mike Evans', 'Tyler Bass', 'DAL', 'Gabe Davis', 'Tony Pollard'  # Tyler Bass (K), DAL (DEF)
    ]
    
    week = 10
    
    print(f"\n🏈 COMPLETE LINEUP OPTIMIZATION - WEEK {week}")
    print("=" * 60)
    
    # Get predictions for all positions
    all_predictions = predict_k_def_performance(complete_roster, week, all_models, df_complete)
    
    if len(all_predictions) > 0:
        print(f"✅ Generated predictions for {len(all_predictions)} players:")
        print(all_predictions.sort_values('predicted_points', ascending=False))
        
        # Optimize complete lineup
        optimal_lineup = improved_lineup_optimization(all_predictions, lineup_requirements)
        
        if optimal_lineup:
            summary = create_detailed_lineup_summary(optimal_lineup)
            print(summary)
            
            # Check if we have all required positions
            lineup_df = pd.DataFrame(optimal_lineup)
            pos_counts = lineup_df['position'].value_counts()
            
            missing_positions = []
            for pos, required in [('QB', 1), ('RB', 2), ('WR', 2), ('TE', 1), ('K', 1), ('DEF', 1)]:
                if pos not in pos_counts or pos_counts[pos] < required:
                    missing_positions.append(pos)
            
            if missing_positions:
                print(f"\n⚠️  Missing positions: {missing_positions}")
                print("Need to add these players to roster or adjust requirements")
            else:
                print(f"\n✅ Complete lineup with all positions filled!")
        
    else:
        print("❌ No predictions generated")
    
    return all_models, df_complete

# Execute the complete system
print("Completing K and DEF predictions...")
complete_models, complete_dataset = complete_lineup_optimization_with_k_def()

print(f"\n✅ COMPLETE SYSTEM READY")
print(f"Models available: {list(complete_models.keys())}")
print(f"Dataset size: {len(complete_dataset):,} records")
print(f"Ready for Phase 4: Backtesting!")

Completing K and DEF predictions...
Creating complete weekly prediction system with K and DEF...
Adding kickers and defenses to weekly features...
Added kickers: 110 records
Added defenses: 704 records
Total combined: 6295 records
Building K and DEF prediction models...
Building K model with 485 samples...
K - MAE: 3.92
Building DEF model with 506 samples...
DEF - MAE: 5.62
All models available: ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']

🏈 COMPLETE LINEUP OPTIMIZATION - WEEK 10
✅ Generated predictions for 10 players:
           player_name position  predicted_points  week
0           Josh Allen       QB         21.289498    10
1  Christian McCaffrey       RB         21.066205    10
3          Tyreek Hill       WR         15.568534    10
6    Amon-Ra St. Brown       WR         14.706459    10
2       Saquon Barkley       RB         13.824908    10
7           Mike Evans       WR         12.223520    10
9         Tony Pollard       RB         11.904207    10
4        Ja'Marr Chase       WR   

In [14]:
# Use Real Kicker and Defense Data from Phase 1

def load_real_k_def_data():
    """Load the actual kicker and defense data we created in Phase 1"""
    
    print("Loading real kicker and defense data from Phase 1...")
    
    # We need to recreate the kicker and defense data using our Phase 1 functions
    # Let's load the full PBP data and recreate them
    
    print("Loading play-by-play data for kickers...")
    pbp_2023 = nfl.import_pbp_data([2023])
    
    print("Loading schedule data for defenses...")
    schedules_2023 = nfl.import_schedules([2023])
    
    # Recreate kicker stats using our Phase 1 function
    print("Recreating kicker stats...")
    
    def fg_distance_points(distance):
        if pd.isna(distance):
            return 0
        elif distance <= 19:
            return 3
        elif 20 <= distance <= 29:
            return 3
        elif 30 <= distance <= 39:
            return 3
        elif 40 <= distance <= 49:
            return 4
        elif 50 <= distance <= 59:
            return 5
        elif distance >= 60:
            return 6
        else:
            return 0

    def create_kicker_stats_correct(pbp_data):
        fg_data = pbp_data[pbp_data['field_goal_attempt'] == 1].copy()
        fg_data = fg_data[fg_data['kicker_player_name'].notna()]
        
        xp_data = pbp_data[pbp_data['extra_point_attempt'] == 1].copy()
        xp_data = xp_data[xp_data['kicker_player_name'].notna()]
        
        kicker_stats_list = []
        
        for kicker in fg_data['kicker_player_name'].unique():
            kicker_fg = fg_data[fg_data['kicker_player_name'] == kicker]
            kicker_xp = xp_data[xp_data['kicker_player_name'] == kicker]
            
            for week in pbp_data['week'].unique():
                week_fg = kicker_fg[kicker_fg['week'] == week]
                week_xp = kicker_xp[kicker_xp['week'] == week]
                
                if len(week_fg) > 0 or len(week_xp) > 0:
                    fg_made = len(week_fg[week_fg['field_goal_result'] == 'made'])
                    fg_missed = len(week_fg[week_fg['field_goal_result'].isin(['missed', 'blocked'])])
                    
                    fg_points = 0
                    for _, fg in week_fg[week_fg['field_goal_result'] == 'made'].iterrows():
                        fg_points += fg_distance_points(fg['kick_distance'])
                    
                    xp_made = len(week_xp[week_xp['extra_point_result'] == 'good'])
                    xp_missed = len(week_xp[week_xp['extra_point_result'] == 'failed'])
                    
                    season = pbp_data['season'].iloc[0] if 'season' in pbp_data.columns else 2023
                    
                    kicker_stats_list.append({
                        'season': season,
                        'week': week,
                        'player_display_name': kicker,
                        'position': 'K',
                        'fg_made': fg_made,
                        'fg_missed': fg_missed,
                        'fg_points': fg_points,
                        'xp_made': xp_made,
                        'xp_missed': xp_missed
                    })
        
        return pd.DataFrame(kicker_stats_list)

    def calculate_kicker_fantasy_points(kicker_df):
        df = kicker_df.copy()
        df['custom_fantasy_points'] = (
            df['fg_points'] +
            df['xp_made'] * 1 +
            df['xp_missed'] * -1 +
            df['fg_missed'] * -1
        )
        return df
    
    # Create kicker data
    kicker_stats = create_kicker_stats_correct(pbp_2023)
    kicker_data = calculate_kicker_fantasy_points(kicker_stats)
    
    print(f"Created kicker data: {len(kicker_data)} records")
    
    # Recreate defense stats using our Phase 1 function
    print("Recreating defense stats...")
    
    def points_allowed_score(points_allowed):
        if pd.isna(points_allowed):
            return 0
        elif points_allowed == 0:
            return 10
        elif 1 <= points_allowed <= 6:
            return 7
        elif 7 <= points_allowed <= 13:
            return 4
        elif 14 <= points_allowed <= 20:
            return 1
        elif 21 <= points_allowed <= 27:
            return -1
        elif 28 <= points_allowed <= 34:
            return -1
        elif points_allowed >= 35:
            return -4
        else:
            return 0

    def create_team_defense_stats(pbp_data, schedules_data):
        pbp_clean = pbp_data[pbp_data['defteam'].notna()].copy()
        
        defense_stats = pbp_clean.groupby(['season', 'week', 'defteam']).agg({
            'sack': 'sum',
            'interception': 'sum', 
            'fumble_forced': 'sum',
            'safety': 'sum',
            'punt_blocked': 'sum',
        }).reset_index()
        
        # Handle fumble recoveries
        fumble_recoveries = pbp_clean[pbp_clean['fumble_recovery_1_team'].notna()].groupby(['season', 'week', 'fumble_recovery_1_team']).size().reset_index(name='fumble_recovery')
        fumble_recoveries = fumble_recoveries.rename(columns={'fumble_recovery_1_team': 'defteam'})
        
        # Handle defensive TDs
        def_tds = pbp_clean[pbp_clean['td_team'].notna()].copy()
        def_tds['is_def_td'] = def_tds['td_team'] == def_tds['defteam']
        defensive_tds = def_tds[def_tds['is_def_td']].groupby(['season', 'week', 'defteam']).size().reset_index(name='defense_td')
        
        # Merge all defensive stats
        defense_stats = defense_stats.merge(fumble_recoveries, on=['season', 'week', 'defteam'], how='left')
        defense_stats = defense_stats.merge(defensive_tds, on=['season', 'week', 'defteam'], how='left')
        
        stat_columns = ['sack', 'interception', 'fumble_forced', 'safety', 'punt_blocked', 'fumble_recovery', 'defense_td']
        defense_stats[stat_columns] = defense_stats[stat_columns].fillna(0)
        
        # Add points allowed
        schedule_away = schedules_data[['season', 'week', 'away_team', 'home_score']].copy()
        schedule_away.columns = ['season', 'week', 'team', 'points_allowed']
        
        schedule_home = schedules_data[['season', 'week', 'home_team', 'away_score']].copy()
        schedule_home.columns = ['season', 'week', 'team', 'points_allowed']
        
        points_allowed = pd.concat([schedule_away, schedule_home], ignore_index=True)
        
        defense_stats = defense_stats.merge(
            points_allowed, 
            left_on=['season', 'week', 'defteam'], 
            right_on=['season', 'week', 'team'], 
            how='left'
        ).drop('team', axis=1)
        
        return defense_stats

    def calculate_defense_fantasy_points(defense_df):
        df = defense_df.copy()
        df['points_allowed_score'] = df['points_allowed'].apply(points_allowed_score)
        
        df['custom_fantasy_points'] = (
            df['defense_td'] * 6 +
            df['sack'] * 1 +
            df['interception'] * 2 +
            df['fumble_recovery'] * 2 +
            df['safety'] * 2 +
            df['fumble_forced'] * 1 +
            df['punt_blocked'] * 2 +
            df['points_allowed_score']
        )
        
        return df
    
    # Create defense data
    defense_stats = create_team_defense_stats(pbp_2023, schedules_2023)
    defense_data = calculate_defense_fantasy_points(defense_stats)
    
    # Rename defteam to player_display_name for consistency
    defense_data['player_display_name'] = defense_data['defteam']
    defense_data['position'] = 'DEF'
    
    print(f"Created defense data: {len(defense_data)} records")
    
    return kicker_data, defense_data

def add_real_k_def_features(df_features, kicker_data, defense_data):
    """Add real K and DEF data to weekly features with proper feature engineering"""
    
    print("Adding feature engineering to real K and DEF data...")
    
    # Process each dataset separately
    processed_datasets = []
    
    for dataset_name, dataset in [('Kicker', kicker_data), ('Defense', defense_data)]:
        print(f"Processing {dataset_name} data...")
        print(f"Original columns: {dataset.columns.tolist()}")
        
        # Make a copy and sort
        ds = dataset.copy()
        ds = ds.sort_values(['player_display_name', 'week'])
        
        # Rolling averages
        for window in [3, 5]:
            ds[f'rolling_avg_{window}w'] = ds.groupby('player_display_name')['custom_fantasy_points'].transform(
                lambda x: x.rolling(window=window, min_periods=1).mean()
            )
        
        # Recent trend (simplified)
        ds['recent_trend'] = ds.groupby('player_display_name')['custom_fantasy_points'].transform(
            lambda x: x.diff().fillna(0)
        )
        
        # Games since boom (>15 points for K/DEF)
        ds['games_since_boom'] = 0
        for player in ds['player_display_name'].unique():
            player_mask = ds['player_display_name'] == player
            player_data = ds[player_mask].copy()
            
            games_since = 0
            for idx in player_data.index:
                ds.loc[idx, 'games_since_boom'] = games_since
                if ds.loc[idx, 'custom_fantasy_points'] > 15:  # Boom threshold for K/DEF
                    games_since = 0
                else:
                    games_since += 1
        
        # Season features
        ds['season_week'] = ds['week']
        ds['is_early_season'] = (ds['week'] <= 6).astype(int)
        ds['is_late_season'] = (ds['week'] >= 15).astype(int)
        ds['is_home'] = np.random.choice([0, 1], size=len(ds))  # Placeholder
        
        processed_datasets.append(ds)
        print(f"{dataset_name} processed: {len(ds)} records")
    
    # Get the feature columns that exist in our main dataset
    main_feature_cols = df_features.columns.tolist()
    print(f"Main dataset columns: {main_feature_cols}")
    
    # Find common columns
    all_datasets = [df_features] + processed_datasets
    
    # Get intersection of all column sets
    common_cols = set(main_feature_cols)
    for ds in processed_datasets:
        common_cols = common_cols.intersection(set(ds.columns.tolist()))
    
    common_cols = list(common_cols)
    print(f"Common columns: {common_cols}")
    
    # If we don't have enough common columns, select essential ones
    essential_cols = ['player_display_name', 'position', 'week', 'season', 'custom_fantasy_points']
    feature_cols = ['rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
                   'season_week', 'is_early_season', 'is_late_season', 'is_home']
    
    target_cols = essential_cols + feature_cols
    
    # Check which columns exist in each dataset and create final datasets
    final_datasets = []
    
    for i, ds in enumerate([df_features] + processed_datasets):
        available_cols = [col for col in target_cols if col in ds.columns]
        missing_cols = [col for col in target_cols if col not in ds.columns]
        
        if missing_cols:
            print(f"Dataset {i}: Missing columns {missing_cols}")
            # Add missing columns with default values
            for col in missing_cols:
                if col in feature_cols:
                    ds[col] = 0  # Default value for features
                else:
                    ds[col] = None  # Default for essential cols
        
        final_datasets.append(ds[target_cols])
    
    # Combine all datasets
    combined_df = pd.concat(final_datasets, ignore_index=True)
    
    print(f"Final combined dataset: {len(combined_df)} records")
    print(f"Final columns: {combined_df.columns.tolist()}")
    
    return combined_df

# Load and integrate real K and DEF data
print("Loading real kicker and defense data from Phase 1...")
real_kicker_data, real_defense_data = load_real_k_def_data()

print("\nSample kicker data:")
print(real_kicker_data[['player_display_name', 'week', 'custom_fantasy_points']].head())

print("\nSample defense data:")
print(real_defense_data[['player_display_name', 'week', 'custom_fantasy_points']].head())

# Add to our modeling dataset
df_complete_real = add_real_k_def_features(df_modeling, real_kicker_data, real_defense_data)

print(f"\n✅ Real K and DEF data integrated!")
print(f"Complete dataset: {len(df_complete_real):,} records")
print(f"Positions: {sorted(df_complete_real['position'].unique())}")

# Update our models with real data
print("\nBuilding models with real K and DEF data...")
k_def_models_real = build_k_def_prediction_models(df_complete_real)

# Combine with existing models
all_models_real = {**weekly_models, **k_def_models_real}

print(f"\n✅ Complete model suite ready: {list(all_models_real.keys())}")

Loading real kicker and defense data from Phase 1...
Loading real kicker and defense data from Phase 1...
Loading play-by-play data for kickers...
2023 done.
Downcasting floats.
Loading schedule data for defenses...
Recreating kicker stats...
Created kicker data: 563 records
Recreating defense stats...
Created defense data: 570 records

Sample kicker data:
  player_display_name  week  custom_fantasy_points
0            M.Prater     1                     12
1            M.Prater     2                      8
2            M.Prater     3                     14
3            M.Prater     4                      4
4            M.Prater     5                      1

Sample defense data:
  player_display_name  week  custom_fantasy_points
0                 ARI     1                   23.0
1                 ATL     1                   15.0
2                 BAL     1                   14.0
3                 BUF     1                    7.0
4                 CAR     1                    6.0
Adding 

In [20]:
# Test Complete System with Real K/DEF

def test_complete_lineup_system():
    """Test the complete lineup optimization with real K and DEF data"""
    
    # Example roster with real player names from our dataset
    test_roster = [
        # QBs
        'Josh Allen', 'Lamar Jackson',
        # RBs  
        'Christian McCaffrey', 'Saquon Barkley', 'Tony Pollard', 'Kenneth Walker',
        # WRs
        'Tyreek Hill', 'Ja\'Marr Chase', 'Amon-Ra St. Brown', 'Mike Evans', 'Gabe Davis',
        # TEs
        'Travis Kelce', 'T.J. Hockenson', 
        # Kickers (real names from our data)
        'T.Bass', 'J.Tucker',
        # Defenses (real team names from our data)
        'BUF', 'DAL', 'SF'
    ]
    
    test_week = 12
    
    print(f"🏈 TESTING COMPLETE SYSTEM - WEEK {test_week}")
    print("=" * 60)
    print(f"Test roster: {len(test_roster)} players")
    print(f"Positions available: {sorted(df_complete_real['position'].unique())}")
    
    # Use our updated prediction function
    def predict_complete_roster(roster, week, models, dataset):
        """Predict performance for complete roster including K and DEF"""
        
        feature_cols = [
            'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
            'season_week', 'is_early_season', 'is_late_season', 'is_home'
        ]
        
        predictions = []
        
        for player_name in roster:
            # Get player's recent data
            player_data = dataset[
                (dataset['player_display_name'] == player_name) & 
                (dataset['week'] < week)
            ].sort_values('week')
            
            if len(player_data) == 0:
                print(f"  No data found for {player_name}")
                continue
                
            position = player_data['position'].iloc[-1]
            
            if position not in models:
                print(f"  No model for position {position}")
                continue
            
            # Create features for prediction week
            latest_data = player_data.iloc[-1].copy()
            latest_data['season_week'] = week
            latest_data['is_early_season'] = 1 if week <= 6 else 0
            latest_data['is_late_season'] = 1 if week >= 15 else 0
            latest_data['is_home'] = np.random.choice([0, 1])
            
            # Extract features
            features = latest_data[feature_cols].values.reshape(1, -1)
            
            if pd.isna(features).any():
                print(f"  Missing features for {player_name}")
                continue
            
            # Make prediction
            model_info = models[position]
            features_scaled = model_info['scaler'].transform(features)
            predicted_points = model_info['model'].predict(features_scaled)[0]
            
            predictions.append({
                'player_name': player_name,
                'position': position,
                'predicted_points': max(0, predicted_points),
                'week': week
            })
        
        return pd.DataFrame(predictions)
    
    # Get predictions
    predictions = predict_complete_roster(test_roster, test_week, all_models_real, df_complete_real)
    
    if len(predictions) > 0:
        print(f"\n✅ Generated predictions for {len(predictions)} players:")
        
        # Show predictions by position
        for pos in ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']:
            pos_preds = predictions[predictions['position'] == pos]
            if len(pos_preds) > 0:
                print(f"\n{pos}:")
                for _, player in pos_preds.sort_values('predicted_points', ascending=False).iterrows():
                    print(f"  {player['player_name']}: {player['predicted_points']:.1f} pts")
        
        # Optimize lineup
        print(f"\n🎯 OPTIMIZING LINEUP...")
        optimal_lineup = improved_lineup_optimization(predictions, lineup_requirements)
        
        if optimal_lineup:
            summary = create_detailed_lineup_summary(optimal_lineup)
            print(summary)
            
            # Validate all positions filled
            lineup_df = pd.DataFrame(optimal_lineup)
            pos_counts = lineup_df['position'].value_counts()
            
            required_positions = {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'K': 1, 'DEF': 1}
            missing = []
            
            for pos, needed in required_positions.items():
                actual = pos_counts.get(pos, 0)
                if actual < needed:
                    missing.append(f"{pos}: need {needed}, have {actual}")
            
            if missing:
                print(f"\n⚠️  Missing positions: {missing}")
            else:
                print(f"\n🎉 PERFECT! All positions filled with real players and data!")
                
                # Show some additional insights
                total_proj = lineup_df['predicted_points'].sum()
                print(f"\nLineup Analysis:")
                print(f"  Total projected: {total_proj:.1f} points")
                print(f"  Average per player: {total_proj/len(lineup_df):.1f} points")
                print(f"  Positions: {dict(pos_counts)}")
        
        else:
            print("Lineup optimization failed")
    
    else:
        print("No predictions generated")

# Run the complete system test
test_complete_lineup_system()

print("SYSTEM STATUS:")
print(f"Models: {list(all_models_real.keys())}")
print(f"Dataset: {len(df_complete_real):,} records")


🏈 TESTING COMPLETE SYSTEM - WEEK 12
Test roster: 18 players
Positions available: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
  No data found for Kenneth Walker

✅ Generated predictions for 17 players:

QB:
  Lamar Jackson: 19.8 pts
  Josh Allen: 17.0 pts

RB:
  Christian McCaffrey: 20.2 pts
  Saquon Barkley: 13.6 pts
  Tony Pollard: 9.6 pts

WR:
  Tyreek Hill: 25.4 pts
  Amon-Ra St. Brown: 19.4 pts
  Ja'Marr Chase: 15.9 pts
  Mike Evans: 13.2 pts
  Gabe Davis: 10.0 pts

TE:
  Travis Kelce: 12.4 pts
  T.J. Hockenson: 9.8 pts

K:
  T.Bass: 12.6 pts
  J.Tucker: 9.7 pts

DEF:
  DAL: 20.2 pts
  BUF: 18.2 pts
  SF: 5.9 pts

🎯 OPTIMIZING LINEUP...
Starting lineup optimization...
Available players: 17
Filled QB: 1 players
Filled K: 1 players
Filled DEF: 1 players
Filled required RB: 2 players
Filled required WR: 2 players
Filled required TE: 1 players
Filled FLEX: 2 players

🏈 OPTIMIZED LINEUP - Week TBD
💰 Projected Points: 172.6
   QB     : Lamar Jackson          19.8 pts
   RB     : Christian McCaf

In [18]:
# Final System Test with Available Kickers

# Show available kickers in our dataset
available_kickers = df_complete_real[df_complete_real['position'] == 'K']['player_display_name'].unique()
print("Available kickers in dataset:")
for kicker in sorted(available_kickers)[:10]:  # Show first 10
    print(f"  {kicker}")

# Test with available kickers
final_test_roster = [
    # QBs
    'Josh Allen', 'Lamar Jackson',
    # RBs  
    'Christian McCaffrey', 'Saquon Barkley', 'Tony Pollard',
    # WRs
    'Tyreek Hill', 'Ja\'Marr Chase', 'Amon-Ra St. Brown', 'Mike Evans', 'Gabe Davis',
    # TEs
    'Travis Kelce', 'T.J. Hockenson', 
    # Kickers (use available ones)
    'H.Butker', 'J.Elliott',  # Available in our dataset
    # Defenses
    'BUF', 'DAL', 'SF'
]

print(f"\n🏈 FINAL COMPLETE SYSTEM TEST")
print("=" * 60)

# Quick prediction test with available kickers
def quick_final_test():
    predictions = predict_complete_roster(final_test_roster, 12, all_models_real, df_complete_real)
    
    if len(predictions) > 0:
        print(f"✅ Predictions for {len(predictions)} players:")
        
        # Check we have all positions
        pos_counts = predictions['position'].value_counts()
        print(f"Position coverage: {dict(pos_counts)}")
        
        # Quick optimization
        optimal_lineup = improved_lineup_optimization(predictions, lineup_requirements)
        
        if optimal_lineup:
            lineup_df = pd.DataFrame(optimal_lineup)
            total_points = lineup_df['predicted_points'].sum()
            pos_final = lineup_df['position'].value_counts()
            
            print(f"🎯 FINAL OPTIMIZED LINEUP:")
            print(f"  Total projected: {total_points:.1f} points")
            print(f"  Positions filled: {dict(pos_final)}")
            
            # Check completeness
            required = {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'K': 1, 'DEF': 1}
            all_filled = all(pos_final.get(pos, 0) >= need for pos, need in required.items())
            
            if all_filled:
                print(f"🎉 PERFECT! Complete 10-starter lineup!")
                print(f"🚀 System ready for production use!")
                return True
            else:
                missing = [f"{pos}({need})" for pos, need in required.items() if pos_final.get(pos, 0) < need]
                print(f"⚠️  Still missing: {missing}")
                return False
    
    return False

# Test the final system
def predict_complete_roster(roster, week, models, dataset):
    """Final prediction function"""
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    predictions = []
    
    for player_name in roster:
        player_data = dataset[
            (dataset['player_display_name'] == player_name) & 
            (dataset['week'] < week)
        ].sort_values('week')
        
        if len(player_data) == 0:
            continue
            
        position = player_data['position'].iloc[-1]
        
        if position not in models:
            continue
        
        latest_data = player_data.iloc[-1].copy()
        latest_data['season_week'] = week
        latest_data['is_early_season'] = 1 if week <= 6 else 0
        latest_data['is_late_season'] = 1 if week >= 15 else 0
        latest_data['is_home'] = np.random.choice([0, 1])
        
        features = latest_data[feature_cols].values.reshape(1, -1)
        
        if pd.isna(features).any():
            continue
        
        model_info = models[position]
        features_scaled = model_info['scaler'].transform(features)
        predicted_points = model_info['model'].predict(features_scaled)[0]
        
        predictions.append({
            'player_name': player_name,
            'position': position,
            'predicted_points': max(0, predicted_points),
            'week': week
        })
    
    return pd.DataFrame(predictions)

success = quick_final_test()

if success:
    print(f"\n🏆 FANTASY FOOTBALL ML SYSTEM COMPLETE!")
    print(f"=" * 60)
    print(f"✅ All 6 positions: QB, RB, WR, TE, K, DEF")
    print(f"✅ Custom scoring system implemented") 
    print(f"✅ Age-adjusted draft rankings")
    print(f"✅ Weekly prediction models (MAE 2-6 points)")
    print(f"✅ 10-starter lineup optimization")
    print(f"✅ 2-FLEX format support")
    print(f"✅ Real player/team data")
    print(f"📊 6,614 total records")
    print(f"🎯 Ready for 2024 backtesting and 2025 projections!")
    
    print(f"\n📝 COMMIT THIS SUCCESS:")
    print(f"git add .")
    print(f"git commit -m 'Complete fantasy ML system: all positions working'")
    print(f"git push origin main")
else:
    print(f"⚠️  Final system test needs adjustment")

Available kickers in dataset:
  A.Carlson
  A.Seibert
  B.Aubrey
  B.Grupe
  B.Maher
  B.McManus
  C.Boswell
  C.Dicker
  C.McLaughlin
  C.Ryland

🏈 FINAL COMPLETE SYSTEM TEST
✅ Predictions for 17 players:
Position coverage: {'WR': 5, 'RB': 3, 'DEF': 3, 'QB': 2, 'TE': 2, 'K': 2}
Starting lineup optimization...
Available players: 17
Filled QB: 1 players
Filled K: 1 players
Filled DEF: 1 players
Filled required RB: 2 players
Filled required WR: 2 players
Filled required TE: 1 players
Filled FLEX: 2 players
🎯 FINAL OPTIMIZED LINEUP:
  Total projected: 166.7 points
  Positions filled: {'WR': 4, 'RB': 2, 'QB': 1, 'K': 1, 'DEF': 1, 'TE': 1}
🎉 PERFECT! Complete 10-starter lineup!
🚀 System ready for production use!

🏆 FANTASY FOOTBALL ML SYSTEM COMPLETE!
✅ All 6 positions: QB, RB, WR, TE, K, DEF
✅ Custom scoring system implemented
✅ Age-adjusted draft rankings
✅ Weekly prediction models (MAE 2-6 points)
✅ 10-starter lineup optimization
✅ 2-FLEX format support
✅ Real player/team data
📊 6,614 to

In [22]:
# Full 15-Player Roster Optimization

def optimize_best_lineup_from_roster(full_roster, week, models, dataset, lineup_requirements):
    """
    Select the optimal 10 starters from a 15-player roster based on weekly projections
    
    Parameters:
    - full_roster: All 15 players on your roster
    - week: Week to optimize for
    - models: Prediction models
    - dataset: Complete dataset
    - lineup_requirements: Position requirements for starting lineup
    """
    
    print(f"WEEKLY LINEUP OPTIMIZATION - WEEK {week}")
    print("=" * 60)
    print(f"Analyzing full roster: {len(full_roster)} players")
    
    # Get predictions for entire roster
    all_predictions = predict_complete_roster(full_roster, week, models, dataset)
    
    if len(all_predictions) == 0:
        print("ERROR: No predictions available for any roster players")
        return None
    
    print(f"\nWeekly projections for all roster players:")
    predictions_sorted = all_predictions.sort_values('predicted_points', ascending=False)
    for _, player in predictions_sorted.iterrows():
        print(f"  {player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts")
    
    # Check roster composition
    roster_composition = all_predictions['position'].value_counts()
    print(f"\nRoster composition:")
    for pos, count in roster_composition.items():
        print(f"  {pos}: {count} players")
    
    # Validate we can make a legal lineup
    required_positions = ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']
    validation_issues = []
    
    for pos in required_positions:
        available = roster_composition.get(pos, 0)
        required = lineup_requirements.get(pos, 0)
        if available < required:
            validation_issues.append(f"{pos}: need {required}, have {available}")
    
    # Check FLEX requirements (total RB/WR/TE needed)
    flex_positions = ['RB', 'WR', 'TE']
    total_flex_available = sum(roster_composition.get(pos, 0) for pos in flex_positions)
    total_flex_needed = sum(lineup_requirements.get(pos, 0) for pos in flex_positions) + 2  # +2 for FLEX spots
    
    if total_flex_available < total_flex_needed:
        validation_issues.append(f"RB/WR/TE: need {total_flex_needed} total, have {total_flex_available}")
    
    if validation_issues:
        print(f"\nROSTER ISSUES:")
        for issue in validation_issues:
            print(f"  {issue}")
        return None
    
    # Optimize the starting lineup from full roster
    print(f"\nOptimizing best 10 starters from 15-player roster...")
    optimal_lineup = improved_lineup_optimization(all_predictions, lineup_requirements)
    
    if not optimal_lineup:
        print("ERROR: Could not create optimal lineup")
        return None
    
    # Create comprehensive analysis
    lineup_df = pd.DataFrame(optimal_lineup)
    starting_players = set(lineup_df['player_name'])
    bench_players = [p for p in full_roster if p not in starting_players]
    
    # Get bench player predictions
    bench_predictions = all_predictions[~all_predictions['player_name'].isin(starting_players)]
    
    # Create detailed summary
    lineup_summary = create_detailed_lineup_summary(optimal_lineup)
    
    # Add bench and decision analysis
    bench_analysis = f"\nBENCH ANALYSIS:\n"
    bench_analysis += "=" * 60 + "\n"
    bench_analysis += f"Players not starting (5 bench spots):\n"
    
    if len(bench_predictions) > 0:
        for _, player in bench_predictions.sort_values('predicted_points', ascending=False).iterrows():
            bench_analysis += f"  {player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
    
    # Add decision rationale
    decision_analysis = f"\nDECISION RATIONALE:\n"
    decision_analysis += "=" * 60 + "\n"
    
    # Show positional decisions
    for position in ['QB', 'RB', 'WR', 'TE', 'K', 'DEF']:
        pos_players = all_predictions[all_predictions['position'] == position].sort_values('predicted_points', ascending=False)
        
        if len(pos_players) > 1:
            starters = [p for p in pos_players['player_name'] if p in starting_players]
            benched = [p for p in pos_players['player_name'] if p not in starting_players]
            
            if starters and benched:
                decision_analysis += f"\n{position} decisions:\n"
                for starter in starters:
                    starter_proj = pos_players[pos_players['player_name'] == starter]['predicted_points'].iloc[0]
                    decision_analysis += f"  STARTING: {starter} ({starter_proj:.1f} pts)\n"
                for bench in benched:
                    bench_proj = pos_players[pos_players['player_name'] == bench]['predicted_points'].iloc[0]
                    decision_analysis += f"  BENCH:    {bench} ({bench_proj:.1f} pts)\n"
    
    # Calculate total points and efficiency
    total_starting_points = lineup_df['predicted_points'].sum()
    total_bench_points = bench_predictions['predicted_points'].sum() if len(bench_predictions) > 0 else 0
    total_roster_points = total_starting_points + total_bench_points
    
    efficiency_analysis = f"\nROSTER EFFICIENCY:\n"
    efficiency_analysis += "=" * 60 + "\n"
    efficiency_analysis += f"Starting lineup total: {total_starting_points:.1f} pts\n"
    efficiency_analysis += f"Bench total:          {total_bench_points:.1f} pts\n"
    efficiency_analysis += f"Full roster total:    {total_roster_points:.1f} pts\n"
    efficiency_analysis += f"Starting efficiency:  {total_starting_points/total_roster_points*100:.1f}%\n"
    
    return {
        'optimal_lineup': optimal_lineup,
        'starting_players': list(starting_players),
        'bench_players': bench_players,
        'all_predictions': all_predictions,
        'lineup_summary': lineup_summary,
        'bench_analysis': bench_analysis,
        'decision_analysis': decision_analysis,
        'efficiency_analysis': efficiency_analysis,
        'total_projected_points': total_starting_points
    }

def example_full_roster_optimization():
    """Example of optimizing from full 15-player roster"""
    
    # Realistic 15-player fantasy roster
    my_full_roster = [
        # QBs (2)
        'Josh Allen', 'Lamar Jackson',
        # RBs (4)
        'Christian McCaffrey', 'Saquon Barkley', 'Tony Pollard', 'AJ Dillon',
        # WRs (5)  
        'Tyreek Hill', 'Ja\'Marr Chase', 'Amon-Ra St. Brown', 'Mike Evans', 'Gabe Davis',
        # TEs (2)
        'Travis Kelce', 'T.J. Hockenson',
        # K (1)
        'H.Butker',
        # DEF (1) 
        'DAL'
    ]
    
    print(f"FULL ROSTER OPTIMIZATION EXAMPLE")
    print("=" * 60)
    print(f"My 15-player roster:")
    for i, player in enumerate(my_full_roster, 1):
        print(f"{i:2d}. {player}")
    
    # Optimize the best lineup
    result = optimize_best_lineup_from_roster(
        full_roster=my_full_roster,
        week=12,
        models=all_models_real,
        dataset=df_complete_real,
        lineup_requirements=lineup_requirements
    )
    
    if result:
        print(result['lineup_summary'])
        print(result['bench_analysis'])
        print(result['decision_analysis'])
        print(result['efficiency_analysis'])
        
        print(f"\nSUMMARY:")
        print(f"- Starting 10: {len(result['starting_players'])} players")
        print(f"- Bench 5: {len(result['bench_players'])} players") 
        print(f"- Total projected: {result['total_projected_points']:.1f} points")
        
        return result
    else:
        print("Optimization failed")
        return None

def predict_complete_roster(roster, week, models, dataset):
    """Prediction function for complete roster"""
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    predictions = []
    
    for player_name in roster:
        player_data = dataset[
            (dataset['player_display_name'] == player_name) & 
            (dataset['week'] < week)
        ].sort_values('week')
        
        if len(player_data) == 0:
            continue
            
        position = player_data['position'].iloc[-1]
        
        if position not in models:
            continue
        
        latest_data = player_data.iloc[-1].copy()
        latest_data['season_week'] = week
        latest_data['is_early_season'] = 1 if week <= 6 else 0
        latest_data['is_late_season'] = 1 if week >= 15 else 0
        latest_data['is_home'] = np.random.choice([0, 1])
        
        features = latest_data[feature_cols].values.reshape(1, -1)
        
        if pd.isna(features).any():
            continue
        
        model_info = models[position]
        features_scaled = model_info['scaler'].transform(features)
        predicted_points = model_info['model'].predict(features_scaled)[0]
        
        predictions.append({
            'player_name': player_name,
            'position': position,
            'predicted_points': max(0, predicted_points),
            'week': week
        })
    
    return pd.DataFrame(predictions)

# Run the full roster optimization example
example_result = example_full_roster_optimization()

print(f"\nFULL ROSTER OPTIMIZATION SYSTEM READY")
print(f"- Analyzes all 15 roster players")
print(f"- Selects optimal 10 starters based on projections")
print(f"- Provides detailed decision rationale")
print(f"- Shows bench analysis and roster efficiency")

FULL ROSTER OPTIMIZATION EXAMPLE
My 15-player roster:
 1. Josh Allen
 2. Lamar Jackson
 3. Christian McCaffrey
 4. Saquon Barkley
 5. Tony Pollard
 6. AJ Dillon
 7. Tyreek Hill
 8. Ja'Marr Chase
 9. Amon-Ra St. Brown
10. Mike Evans
11. Gabe Davis
12. Travis Kelce
13. T.J. Hockenson
14. H.Butker
15. DAL
WEEKLY LINEUP OPTIMIZATION - WEEK 12
Analyzing full roster: 15 players

Weekly projections for all roster players:
  WR: Tyreek Hill            25.4 pts
  RB: Christian McCaffrey    20.6 pts
  DEF: DAL                    20.2 pts
  QB: Lamar Jackson          19.9 pts
  WR: Amon-Ra St. Brown      19.4 pts
  QB: Josh Allen             16.9 pts
  WR: Ja'Marr Chase          15.9 pts
  WR: Mike Evans             13.2 pts
  RB: Saquon Barkley         13.1 pts
  TE: Travis Kelce           11.9 pts
  RB: AJ Dillon              11.4 pts
  TE: T.J. Hockenson         10.4 pts
  WR: Gabe Davis             10.0 pts
  RB: Tony Pollard            9.6 pts
  K: H.Butker                7.1 pts

Roster com

# exporting models for phase 4 

In [None]:
# Export Models and Data from Phase 3

import pickle
import os
from datetime import datetime

def export_all_models_and_data():
    """Export all trained models and processed data for use in Phase 4"""
    
    # Create models directory
    os.makedirs('models', exist_ok=True)
    
    print("Exporting models and data from Phase 3...")
    
    # 1. Export weekly prediction models
    print("1. Exporting weekly prediction models...")
    with open('models/weekly_prediction_models.pkl', 'wb') as f:
        pickle.dump(weekly_models, f)
    print("   Saved: weekly_prediction_models.pkl")
    
    # 2. Export complete models (including K and DEF)
    print("2. Exporting complete model suite...")
    with open('models/all_models_complete.pkl', 'wb') as f:
        pickle.dump(all_models_real, f)
    print("   Saved: all_models_complete.pkl")
    
    # 3. Export complete dataset with features
    print("3. Exporting complete dataset...")
    df_complete_real.to_pickle('models/complete_dataset_with_features.pkl')
    print("   Saved: complete_dataset_with_features.pkl")
    
    # 4. Export draft rankings and analysis
    if 'final_rankings' in globals():
        print("4. Exporting draft rankings...")
        final_rankings.to_pickle('models/draft_rankings_2023.pkl')
        print("   Saved: draft_rankings_2023.pkl")
    
    # 5. Export lineup requirements and metadata
    print("5. Exporting configuration...")
    config_data = {
        'lineup_requirements': lineup_requirements,
        'scoring_settings': {
            'passing_yards': 0.04, 'passing_tds': 4, 'interceptions': -2,
            'rushing_yards': 0.1, 'rushing_tds': 6, 'receiving_yards': 0.1,
            'receiving_tds': 6, 'receptions': 1, 'rushing_fumbles_lost': -2,
            'receiving_fumbles_lost': -2, 'special_teams_tds': 6
        },
        'defense_scoring': {
            'defense_td': 6, 'sacks': 1, 'interceptions': 2, 'fumble_recovery': 2,
            'safety': 2, 'forced_fumble': 1, 'blocked_kick': 2
        },
        'kicking_scoring': {
            'fg_0_19': 3, 'fg_20_29': 3, 'fg_30_39': 3,
            'fg_40_49': 4, 'fg_50_59': 5, 'fg_60_plus': 6,
            'pat_made': 1, 'pat_missed': -1, 'fg_missed': -1
        },
        'feature_columns': [
            'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
            'season_week', 'is_early_season', 'is_late_season', 'is_home'
        ],
        'export_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'model_performance': metrics if 'metrics' in globals() else None
    }
    
    with open('models/config_and_metadata.pkl', 'wb') as f:
        pickle.dump(config_data, f)
    print("   Saved: config_and_metadata.pkl")
    
    # 6. Export key functions
    print("6. Exporting key functions...")
    
    # Create a functions module
    functions_to_export = {
        'predict_complete_roster': predict_complete_roster,
        'improved_lineup_optimization': improved_lineup_optimization,
        'create_detailed_lineup_summary': create_detailed_lineup_summary,
        'optimize_best_lineup_from_roster': optimize_best_lineup_from_roster
    }
    
    with open('models/key_functions.pkl', 'wb') as f:
        pickle.dump(functions_to_export, f)
    print("   Saved: key_functions.pkl")
    
    # 7. Create import script for Phase 4
    print("7. Creating import script for Phase 4...")
    
    import_script = '''# Import script for Phase 4 - Load all models and data from Phase 3

import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler

def load_phase3_models_and_data():
    """Load all models and data exported from Phase 3"""
    
    print("Loading models and data from Phase 3...")
    
    # Load models
    with open('../models/all_models_complete.pkl', 'rb') as f:
        all_models = pickle.load(f)
    print(f"✓ Loaded models: {list(all_models.keys())}")
    
    # Load complete dataset
    df_complete = pd.read_pickle('../models/complete_dataset_with_features.pkl')
    print(f"✓ Loaded dataset: {df_complete.shape[0]:,} records")
    
    # Load configuration
    with open('../models/config_and_metadata.pkl', 'rb') as f:
        config = pickle.load(f)
    print(f"✓ Loaded configuration and metadata")
    
    # Load functions
    with open('../models/key_functions.pkl', 'rb') as f:
        functions = pickle.load(f)
    print(f"✓ Loaded key functions: {list(functions.keys())}")
    
    # Load draft rankings if available
    try:
        draft_rankings = pd.read_pickle('../models/draft_rankings_2023.pkl')
        print(f"✓ Loaded draft rankings: {len(draft_rankings)} players")
    except FileNotFoundError:
        draft_rankings = None
        print("! Draft rankings not found")
    
    return {
        'models': all_models,
        'dataset': df_complete,
        'config': config,
        'functions': functions,
        'draft_rankings': draft_rankings
    }

def setup_phase4_environment():
    """Set up the Phase 4 environment with all Phase 3 components"""
    
    # Load everything
    phase3_data = load_phase3_models_and_data()
    
    # Extract to global scope for easy use
    global all_models_real, df_complete_real, lineup_requirements, scoring_settings
    global predict_complete_roster, improved_lineup_optimization
    global create_detailed_lineup_summary, optimize_best_lineup_from_roster
    
    all_models_real = phase3_data['models']
    df_complete_real = phase3_data['dataset']
    lineup_requirements = phase3_data['config']['lineup_requirements']
    scoring_settings = phase3_data['config']['scoring_settings']
    
    # Load functions
    predict_complete_roster = phase3_data['functions']['predict_complete_roster']
    improved_lineup_optimization = phase3_data['functions']['improved_lineup_optimization']
    create_detailed_lineup_summary = phase3_data['functions']['create_detailed_lineup_summary']
    optimize_best_lineup_from_roster = phase3_data['functions']['optimize_best_lineup_from_roster']
    
    print("\\n✓ Phase 4 environment ready!")
    print(f"Available models: {list(all_models_real.keys())}")
    print(f"Dataset: {len(df_complete_real):,} records")
    print(f"Lineup requirements: {lineup_requirements}")
    
    return phase3_data

# Example usage in Phase 4:
# phase3_data = setup_phase4_environment()
'''
    
    with open('models/load_phase3_models.py', 'w') as f:
        f.write(import_script)
    print("   Saved: load_phase3_models.py")
    
    # 8. Show export summary
    print(f"\nEXPORT SUMMARY:")
    print(f"=" * 50)
    
    # Check file sizes
    models_dir = 'models'
    total_size = 0
    
    for filename in os.listdir(models_dir):
        filepath = os.path.join(models_dir, filename)
        if os.path.isfile(filepath):
            size_mb = os.path.getsize(filepath) / (1024 * 1024)
            total_size += size_mb
            print(f"  {filename:<35} {size_mb:>8.2f} MB")
    
    print(f"  {'TOTAL':<35} {total_size:>8.2f} MB")
    
    print(f"\nFiles exported to 'models/' directory:")
    print(f"✓ Models: all_models_complete.pkl")
    print(f"✓ Data: complete_dataset_with_features.pkl") 
    print(f"✓ Config: config_and_metadata.pkl")
    print(f"✓ Functions: key_functions.pkl")
    print(f"✓ Import script: load_phase3_models.py")
    
    print(f"\nTo use in Phase 4:")
    print(f"  exec(open('models/load_phase3_models.py').read())")
    print(f"  phase3_data = setup_phase4_environment()")

# Check that all required variables exist before exporting
required_vars = ['weekly_models', 'all_models_real', 'df_complete_real', 'lineup_requirements']
missing_vars = [var for var in required_vars if var not in globals()]

if missing_vars:
    print(f"ERROR: Missing required variables: {missing_vars}")
    print(f"Make sure you've run all the previous Phase 3 code first")
else:
    # Export everything
    export_all_models_and_data()
    

Exporting models and data from Phase 3...
1. Exporting weekly prediction models...
   Saved: weekly_prediction_models.pkl
2. Exporting complete model suite...
   Saved: all_models_complete.pkl
3. Exporting complete dataset...
   Saved: complete_dataset_with_features.pkl
5. Exporting configuration...
   Saved: config_and_metadata.pkl
6. Exporting key functions...
   Saved: key_functions.pkl
7. Creating import script for Phase 4...
   Saved: load_phase3_models.py

EXPORT SUMMARY:
  all_models_complete.pkl                 0.63 MB
  key_functions.pkl                       0.00 MB
  config_and_metadata.pkl                 0.00 MB
  load_phase3_models.py                   0.00 MB
  complete_dataset_with_features.pkl      0.62 MB
  weekly_prediction_models.pkl            0.50 MB
  TOTAL                                   1.75 MB

Files exported to 'models/' directory:
✓ Models: all_models_complete.pkl
✓ Data: complete_dataset_with_features.pkl
✓ Config: config_and_metadata.pkl
✓ Functions: key