In [4]:
# Phase 3: Weekly Lineup Optimization Setup
# Create this in a new notebook: 03_weekly_optimization.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.optimize import linprog
import warnings
warnings.filterwarnings('ignore')

print("PHASE 3: WEEKLY LINEUP OPTIMIZATION")
print("=" * 50)

# Load the dataset
df = pd.read_pickle('../data/fantasy_data_2023.pkl')

# Create metadata
metadata = {
    'positions': sorted(df['position'].unique().tolist()),
    'weeks_covered': sorted(df['week'].unique().tolist()),
    'total_records': len(df),
    'unique_players': df['player_display_name'].nunique()
}

print(f"Dataset loaded: {df.shape[0]:,} records")
print(f"Positions: {metadata['positions']}")
print(f"Weeks: {len(metadata['weeks_covered'])} weeks")

# Your league lineup requirements (10 starters)
lineup_requirements = {
    'QB': 1,
    'RB': 2,
    'WR': 2,
    'TE': 1,
    'FLEX1': 1,  # RB/WR/TE
    'FLEX2': 1,  # RB/WR/TE
    'K': 1,
    'DEF': 1
}

print(f"\nYour league lineup requirements: {lineup_requirements}")
print(f"Total starters: {sum(lineup_requirements.values())}")

# Define flex eligibility
flex_eligible_positions = ['RB', 'WR', 'TE']
print(f"FLEX eligible positions: {flex_eligible_positions}")

# potentially start:
print(f"\nPossible lineup combinations:")
print(f"- Max RBs: 4 (2 RB + 2 FLEX)")
print(f"- Max WRs: 4 (2 WR + 2 FLEX)")  
print(f"- Max TEs: 3 (1 TE + 2 FLEX)")
print(f"- Always: 1 QB, 1 K, 1 DEF")


PHASE 3: WEEKLY LINEUP OPTIMIZATION
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Weeks: 22 weeks

Your league lineup requirements: {'QB': 1, 'RB': 2, 'WR': 2, 'TE': 1, 'FLEX1': 1, 'FLEX2': 1, 'K': 1, 'DEF': 1}
Total starters: 10
FLEX eligible positions: ['RB', 'WR', 'TE']

Possible lineup combinations:
- Max RBs: 4 (2 RB + 2 FLEX)
- Max WRs: 4 (2 WR + 2 FLEX)
- Max TEs: 3 (1 TE + 2 FLEX)
- Always: 1 QB, 1 K, 1 DEF


# Weekly Performance Features

In [None]:


def create_weekly_features(df):
    """Create features for predicting weekly performance"""
    
    # Sort by player and week
    df_sorted = df.sort_values(['player_display_name', 'week'])
    
    # Rolling averages (last N games)
    for window in [3, 5]:
        df_sorted[f'rolling_avg_{window}w'] = df_sorted.groupby('player_display_name')['custom_fantasy_points'].transform(
            lambda x: x.rolling(window=window, min_periods=1).mean().shift(1)
        )
    
    # Recent trend (slope of last 3 games)
    def calculate_recent_trend(group):
        trends = []
        for i in range(len(group)):
            if i < 2:
                trends.append(0)
            else:
                recent_points = group['custom_fantasy_points'].iloc[max(0, i-2):i+1].values
                if len(recent_points) >= 2:
                    x = np.arange(len(recent_points))
                    slope = np.polyfit(x, recent_points, 1)[0]
                    trends.append(slope)
                else:
                    trends.append(0)
        return pd.Series(trends, index=group.index)
    
    df_sorted['recent_trend'] = df_sorted.groupby('player_display_name').apply(calculate_recent_trend).reset_index(drop=True)
    
    # Games since last big game (>20 points)
    df_sorted['games_since_boom'] = 0
    for player in df_sorted['player_display_name'].unique():
        player_mask = df_sorted['player_display_name'] == player
        player_data = df_sorted[player_mask].copy()
        
        games_since = 0
        for idx in player_data.index:
            df_sorted.loc[idx, 'games_since_boom'] = games_since
            if df_sorted.loc[idx, 'custom_fantasy_points'] > 20:
                games_since = 0
            else:
                games_since += 1
    
    # Season week (early/mid/late season effects)
    df_sorted['season_week'] = df_sorted['week']
    df_sorted['is_early_season'] = (df_sorted['week'] <= 6).astype(int)
    df_sorted['is_late_season'] = (df_sorted['week'] >= 15).astype(int)
    
    return df_sorted

def create_matchup_features(df):
    """Create opponent/matchup-based features"""
    
    # This would ideally use strength of schedule data
    # For now, we'll create simplified features
    
    # Opponent team (we'll encode this later)
    df['opponent'] = df['opponent_team'] if 'opponent_team' in df.columns else 'Unknown'
    
    # Home/away (if available in your data)
    # For now, we'll create a placeholder
    df['is_home'] = np.random.choice([0, 1], size=len(df))  # Placeholder
    
    return df

def create_target_variables(df):
    """Create different target variables for different prediction tasks"""
    
    # Main target: actual fantasy points
    df['target_points'] = df['custom_fantasy_points']
    
    # Binary targets for different thresholds
    df['hit_projection'] = (df['custom_fantasy_points'] > df['custom_fantasy_points'].mean()).astype(int)
    df['boom_game'] = (df['custom_fantasy_points'] > 20).astype(int)  # Arbitrary threshold
    df['bust_game'] = (df['custom_fantasy_points'] < 5).astype(int)   # Arbitrary threshold
    
    return df

def prepare_modeling_data(df):
    """Prepare data for weekly prediction models"""
    
    # Feature engineering
    print("Creating weekly features...")
    df_features = create_weekly_features(df)
    
    print("Creating matchup features...")
    df_features = create_matchup_features(df_features)
    
    print("Creating target variables...")
    df_features = create_target_variables(df_features)
    
    # Filter out first few weeks per player (need history for features)
    df_modeling = df_features.groupby('player_display_name').apply(
        lambda x: x.iloc[2:] if len(x) > 2 else x.iloc[0:0]
    ).reset_index(drop=True)
    
    print(f"Modeling dataset shape: {df_modeling.shape}")
    print(f"Features available for weeks {df_modeling['week'].min()}-{df_modeling['week'].max()}")
    
    return df_modeling

def build_weekly_prediction_models(df_modeling):
    """Build models to predict weekly performance"""
    
    # Feature columns for modeling
    feature_cols = [
        'rolling_avg_3w', 'rolling_avg_5w', 'recent_trend', 'games_since_boom',
        'season_week', 'is_early_season', 'is_late_season', 'is_home'
    ]
    
    # Remove any rows with missing features
    df_clean = df_modeling.dropna(subset=feature_cols + ['target_points'])
    
    if len(df_clean) == 0:
        print("No valid data for modeling")
        return None, None
    
    print(f"Clean modeling data: {len(df_clean)} records")
    
    # Build position-specific models
    models = {}
    model_metrics = {}
    
    for position in ['QB', 'RB', 'WR', 'TE']:
        pos_data = df_clean[df_clean['position'] == position]
        
        if len(pos_data) < 50:  # Need minimum samples
            print(f"Insufficient data for {position}: {len(pos_data)} samples")
            continue
        
        print(f"Building {position} model with {len(pos_data)} samples...")
        
        X = pos_data[feature_cols]
        y = pos_data['target_points']
        
        # Split data (use later weeks for testing)
        split_week = pos_data['week'].quantile(0.7)
        train_mask = pos_data['week'] <= split_week
        
        X_train, X_test = X[train_mask], X[~train_mask]
        y_train, y_test = y[train_mask], y[~train_mask]
        
        if len(X_test) == 0:
            print(f"No test data for {position}")
            continue
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Train model
        model = GradientBoostingRegressor(n_estimators=100, random_state=42)
        model.fit(X_train_scaled, y_train)
        
        # Predictions
        y_pred = model.predict(X_test_scaled)
        
        # Metrics
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        
        models[position] = {'model': model, 'scaler': scaler}
        model_metrics[position] = {'MAE': mae, 'RMSE': rmse, 'samples': len(pos_data)}
        
        print(f"{position} - MAE: {mae:.2f}, RMSE: {rmse:.2f}")
    
    return models, model_metrics

# Execute the weekly prediction pipeline
print("Preparing data for weekly prediction models...")
df_modeling = prepare_modeling_data(df)

print("\nBuilding weekly prediction models...")
weekly_models, metrics = build_weekly_prediction_models(df_modeling)

if weekly_models:
    print(f"\nWeekly prediction models built for {list(weekly_models.keys())}")
    print("\nModel performance:")
    for pos, metric in metrics.items():
        print(f"{pos}: MAE={metric['MAE']:.2f}, RMSE={metric['RMSE']:.2f} ({metric['samples']} samples)")
else:
    print("No weekly models could be built")

print(f"\nNext: Lineup optimization algorithms")

Preparing data for weekly prediction models...
Creating weekly features...
Creating matchup features...
Creating target variables...
Modeling dataset shape: (5481, 19)
Features available for weeks 3-22

Building weekly prediction models...
Clean modeling data: 5481 records
Building QB model with 535 samples...
QB - MAE: 6.15, RMSE: 7.83
Building RB model with 1146 samples...
RB - MAE: 5.18, RMSE: 6.92
Building WR model with 1910 samples...
WR - MAE: 5.34, RMSE: 7.28
Building TE model with 899 samples...
TE - MAE: 4.27, RMSE: 5.55

Weekly prediction models built for ['QB', 'RB', 'WR', 'TE']

Model performance:
QB: MAE=6.15, RMSE=7.83 (535 samples)
RB: MAE=5.18, RMSE=6.92 (1146 samples)
WR: MAE=5.34, RMSE=7.28 (1910 samples)
TE: MAE=4.27, RMSE=5.55 (899 samples)

Next: Lineup optimization algorithms


In [8]:
# Improved Lineup Optimization

def improved_lineup_optimization(predictions_df, lineup_requirements):
    """Improved lineup optimization with clearer FLEX handling"""
    
    if len(predictions_df) == 0:
        return None
    
    # Sort players by predicted points
    available = predictions_df.copy().sort_values('predicted_points', ascending=False)
    lineup = []
    
    # Track what we still need
    needs = {
        'QB': lineup_requirements['QB'],
        'RB': lineup_requirements['RB'], 
        'WR': lineup_requirements['WR'],
        'TE': lineup_requirements['TE'],
        'K': lineup_requirements['K'],
        'DEF': lineup_requirements['DEF'],
        'FLEX': 2  # 2 FLEX spots
    }
    
    print("Starting lineup optimization...")
    print(f"Available players: {len(available)}")
    
    # Step 1: Fill required positions (excluding FLEX-eligible positions for now)
    for position in ['QB', 'K', 'DEF']:
        needed = needs[position]
        pos_players = available[available['position'] == position].head(needed)
        
        for _, player in pos_players.iterrows():
            lineup.append({
                'player_name': player['player_name'],
                'position': player['position'],
                'predicted_points': player['predicted_points'],
                'lineup_slot': position
            })
            available = available[available['player_name'] != player['player_name']]
            needs[position] -= 1
        
        print(f"Filled {position}: {len(pos_players)} players")
    
    # Step 2: Fill minimum required RB, WR, TE
    for position in ['RB', 'WR', 'TE']:
        needed = needs[position]
        pos_players = available[available['position'] == position].head(needed)
        
        for _, player in pos_players.iterrows():
            lineup.append({
                'player_name': player['player_name'],
                'position': player['position'],
                'predicted_points': player['predicted_points'],
                'lineup_slot': position
            })
            available = available[available['player_name'] != player['player_name']]
            needs[position] -= 1
        
        print(f"Filled required {position}: {len(pos_players)} players")
    
    # Step 3: Fill FLEX spots with best remaining RB/WR/TE
    flex_eligible = available[available['position'].isin(['RB', 'WR', 'TE'])]
    flex_players = flex_eligible.head(needs['FLEX'])
    
    for i, (_, player) in enumerate(flex_players.iterrows()):
        lineup.append({
            'player_name': player['player_name'],
            'position': player['position'], 
            'predicted_points': player['predicted_points'],
            'lineup_slot': f'FLEX{i+1}'
        })
    
    print(f"Filled FLEX: {len(flex_players)} players")
    
    return lineup

def create_detailed_lineup_summary(lineup):
    """Create detailed lineup summary with clear position assignments"""
    
    if not lineup:
        return "No valid lineup found"
    
    lineup_df = pd.DataFrame(lineup)
    total_points = lineup_df['predicted_points'].sum()
    
    summary = f"\n🏈 OPTIMIZED LINEUP - Week {lineup_df.iloc[0].get('week', 'TBD')}\n"
    summary += f"💰 Projected Points: {total_points:.1f}\n"
    summary += "=" * 60 + "\n"
    
    # Order by lineup slot for clear display
    slot_order = ['QB', 'RB', 'WR', 'TE', 'FLEX1', 'FLEX2', 'K', 'DEF']
    
    for slot in slot_order:
        slot_players = lineup_df[lineup_df['lineup_slot'] == slot]
        if len(slot_players) > 0:
            for _, player in slot_players.iterrows():
                actual_pos = f"({player['position']})" if slot.startswith('FLEX') else ""
                summary += f"{slot:>5} {actual_pos:<4}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
    
    # Show bench players (if any)
    summary += "\n" + "=" * 60 + "\n"
    summary += f"💡 Lineup Strategy Analysis:\n"
    
    # Count positions in lineup
    pos_counts = lineup_df['position'].value_counts()
    summary += f"   RBs starting: {pos_counts.get('RB', 0)}\n"
    summary += f"   WRs starting: {pos_counts.get('WR', 0)}\n" 
    summary += f"   TEs starting: {pos_counts.get('TE', 0)}\n"
    
    return summary

def start_sit_recommendations(predictions_df, lineup):
    """Create start/sit recommendations"""
    
    if not lineup:
        return "No lineup to analyze"
    
    lineup_df = pd.DataFrame(lineup)
    starting_players = set(lineup_df['player_name'])
    
    # Find bench players
    bench_players = predictions_df[~predictions_df['player_name'].isin(starting_players)]
    
    recommendations = "\n🤔 START/SIT RECOMMENDATIONS\n"
    recommendations += "=" * 60 + "\n"
    
    if len(bench_players) > 0:
        recommendations += "💺 BENCH:\n"
        for _, player in bench_players.sort_values('predicted_points', ascending=False).iterrows():
            recommendations += f"   {player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
        
        # Check for potential position swaps
        recommendations += "\n💭 POTENTIAL SWAPS:\n"
        
        # Compare bench players to starting players of same position
        for _, bench_player in bench_players.iterrows():
            pos = bench_player['position']
            if pos in ['RB', 'WR', 'TE']:  # FLEX eligible
                # Find lowest scoring starter in same position or FLEX
                same_pos_starters = lineup_df[
                    (lineup_df['position'] == pos) | 
                    (lineup_df['lineup_slot'].str.startswith('FLEX'))
                ]
                
                if len(same_pos_starters) > 0:
                    lowest_starter = same_pos_starters.loc[same_pos_starters['predicted_points'].idxmin()]
                    
                    if bench_player['predicted_points'] > lowest_starter['predicted_points']:
                        point_diff = bench_player['predicted_points'] - lowest_starter['predicted_points']
                        recommendations += f"   Consider: {bench_player['player_name']} (+{point_diff:.1f}) over {lowest_starter['player_name']}\n"
    
    return recommendations

def weekly_lineup_optimizer(roster_players, week, weekly_models, df_modeling, lineup_requirements):
    """Complete weekly lineup optimization workflow"""
    
    print(f"\n🏈 WEEKLY LINEUP OPTIMIZER - WEEK {week}")
    print("=" * 60)
    
    # Step 1: Get predictions
    predictions = predict_weekly_performance(roster_players, week, weekly_models, df_modeling)
    
    if len(predictions) == 0:
        return "No predictions available for any roster players"
    
    print(f"✅ Generated predictions for {len(predictions)} players")
    
    # Step 2: Optimize lineup
    optimal_lineup = improved_lineup_optimization(predictions, lineup_requirements)
    
    if not optimal_lineup:
        return "❌ Could not create optimal lineup"
    
    # Step 3: Create summaries
    lineup_summary = create_detailed_lineup_summary(optimal_lineup)
    start_sit = start_sit_recommendations(predictions, optimal_lineup)
    
    # Step 4: Show all predictions for reference
    all_predictions = "\n📊 ALL PLAYER PROJECTIONS\n"
    all_predictions += "=" * 60 + "\n"
    sorted_preds = predictions.sort_values('predicted_points', ascending=False)
    for _, player in sorted_preds.iterrows():
        all_predictions += f"{player['position']}: {player['player_name']:<20} {player['predicted_points']:>6.1f} pts\n"
    
    return lineup_summary + start_sit + all_predictions

# Test the improved system
example_roster = [
    'Josh Allen', 'Christian McCaffrey', 'Saquon Barkley', 'Tyreek Hill', 
    'Ja\'Marr Chase', 'Travis Kelce', 'Amon-Ra St. Brown', 'Kenneth Walker',
    'Mike Evans', 'Tyler Bass', 'Dallas', 'Gabe Davis', 'Tony Pollard'
]

result = weekly_lineup_optimizer(example_roster, 10, weekly_models, df_modeling, lineup_requirements)
print(result)


🏈 WEEKLY LINEUP OPTIMIZER - WEEK 10
✅ Generated predictions for 10 players
Starting lineup optimization...
Available players: 10
Filled QB: 1 players
Filled K: 0 players
Filled DEF: 0 players
Filled required RB: 2 players
Filled required WR: 2 players
Filled required TE: 1 players
Filled FLEX: 2 players

🏈 OPTIMIZED LINEUP - Week TBD
💰 Projected Points: 119.5
   QB     : Josh Allen             21.3 pts
   RB     : Christian McCaffrey    24.1 pts
   RB     : Saquon Barkley         13.6 pts
   WR     : Tyreek Hill            15.6 pts
   WR     : Amon-Ra St. Brown      14.7 pts
   TE     : Travis Kelce            6.1 pts
FLEX1 (WR): Mike Evans             12.2 pts
FLEX2 (RB): Tony Pollard           11.9 pts

💡 Lineup Strategy Analysis:
   RBs starting: 3
   WRs starting: 3
   TEs starting: 1

🤔 START/SIT RECOMMENDATIONS
💺 BENCH:
   WR: Ja'Marr Chase          10.6 pts
   WR: Gabe Davis              7.7 pts

💭 POTENTIAL SWAPS:

📊 ALL PLAYER PROJECTIONS
RB: Christian McCaffrey    24.1 pts
Q