In [25]:
# Phase 2: Draft Prediction Model Setup
# Create this in a new notebook: 02_draft_modeling.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')
import nfl_data_py as nfl

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

import os 

if os.path.exists('../data/fantasy_data_2023.pkl'):
    df = pd.read_pickle('../data/fantasy_data_2023.pkl')
    metadata_path = '../data/dataset_metadata.json'
else:
    print('data.pkl not found')

# Load metadata if we found the data
if df is not None:
    print("Skipping corrupted metadata file, creating from dataset...")
    
    # Create metadata directly from the dataset
    metadata = {
        'positions': sorted(df['position'].unique().tolist()),
        'weeks_covered': sorted(df['week'].unique().tolist()),
        'total_records': len(df),
        'unique_players': df['player_display_name'].nunique()
    }
    
    # Recreate your custom scoring settings
    metadata['scoring_settings'] = {
        'passing_yards': 0.04,  # 1 point per 25 yards
        'passing_tds': 4,
        'interceptions': -2,
        'rushing_yards': 0.1,   # 1 point per 10 yards
        'rushing_tds': 6,
        'receiving_yards': 0.1, # 1 point per 10 yards
        'receiving_tds': 6,
        'receptions': 1,        # PPR
        'rushing_fumbles_lost': -2,
        'receiving_fumbles_lost': -2,
        'special_teams_tds': 6
    }
    
    metadata['defense_scoring'] = {
        'defense_td': 6,
        'sacks': 1,
        'interceptions': 2,
        'fumble_recovery': 2,
        'safety': 2,
        'forced_fumble': 1,
        'blocked_kick': 2,
        'points_allowed_tiers': {
            '0': 10, '1-6': 7, '7-13': 4, '14-20': 1,
            '21-27': -1, '28-34': -1, '35+': -4
        }
    }
    
    metadata['kicking_scoring'] = {
        'fg_0_19': 3, 'fg_20_29': 3, 'fg_30_39': 3,
        'fg_40_49': 4, 'fg_50_59': 5, 'fg_60_plus': 6,
        'pat_made': 1, 'pat_missed': -1, 'fg_missed': -1
    }
    
    print("Metadata recreated from dataset")
    
    # Show what we have
    print(f"Dataset loaded: {df.shape[0]:,} records")
    print(f"Positions: {metadata['positions']}")
    print(f"Weeks: {len(metadata['weeks_covered'])} weeks")
    print(f"Unique players/teams: {metadata['unique_players']}")
else:
    print("Cannot proceed without dataset")
    metadata = None

print(f"Dataset loaded: {df.shape[0]:,} records")
print(f"Positions: {metadata['positions']}")
print(f"Scoring system: Custom league scoring loaded")

# Basic data inspection
print(f"\nDataset Overview:")
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"Date range: Weeks {df['week'].min()}-{df['week'].max()}")

# Position breakdown
print(f"\nPosition Distribution:")
position_counts = df['position'].value_counts()
print(position_counts)

# Sample the data
print(f"\nFirst 5 records:")
print(df.head())

Skipping corrupted metadata file, creating from dataset...
Metadata recreated from dataset
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Weeks: 22 weeks
Unique players/teams: 620
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Scoring system: Custom league scoring loaded

Dataset Overview:
Shape: (6670, 6)
Columns: ['season', 'week', 'player_display_name', 'position', 'position_group', 'custom_fantasy_points']
Date range: Weeks 1-22

Position Distribution:
WR     2322
RB     1403
TE     1122
QB      690
DEF     570
K       563
Name: position, dtype: int64

First 5 records:
   season  week player_display_name position position_group  \
0    2023     1       Aaron Rodgers       QB             QB   
1    2023     4      Marcedes Lewis       TE             TE   
2    2023     7      Marcedes Lewis       TE             TE   
3    2023    11      Marcedes Lewis       TE             TE   
4    2023    14      Marcedes Lewis       T

# Feature Engineering Functions

In [26]:
# Feature Engineering Functions

def create_rolling_averages(df, player_col='player_display_name', weeks=[3, 5, 8]):
    """Create rolling average features for fantasy points"""
    df_sorted = df.sort_values([player_col, 'week'])
    
    for window in weeks:
        df_sorted[f'rolling_avg_{window}w'] = df_sorted.groupby(player_col)['custom_fantasy_points'].transform(
            lambda x: x.rolling(window=window, min_periods=1).mean()
        )
    
    return df_sorted

def calculate_consistency_metrics(df, player_col='player_display_name'):
    """Calculate player consistency metrics"""
    player_stats = df.groupby(player_col)['custom_fantasy_points'].agg([
        'mean', 'std', 'min', 'max', 'count'
    ]).reset_index()
    
    player_stats['coefficient_of_variation'] = player_stats['std'] / player_stats['mean']
    player_stats['boom_rate'] = df.groupby(player_col).apply(
        lambda x: (x['custom_fantasy_points'] > x['custom_fantasy_points'].mean() + x['custom_fantasy_points'].std()).mean()
    ).values
    player_stats['bust_rate'] = df.groupby(player_col).apply(
        lambda x: (x['custom_fantasy_points'] < x['custom_fantasy_points'].mean() - x['custom_fantasy_points'].std()).mean()
    ).values
    
    return player_stats

def create_season_trends(df, player_col='player_display_name'):
    """Create features for season-long trends"""
    df_sorted = df.sort_values([player_col, 'week'])
    
    # Week-over-week change
    df_sorted['points_change'] = df_sorted.groupby(player_col)['custom_fantasy_points'].diff()
    
    # Season trend (linear regression slope)
    def calculate_trend(group):
        if len(group) > 2:
            x = np.arange(len(group))
            y = group['custom_fantasy_points'].values
            slope = np.polyfit(x, y, 1)[0]
            return pd.Series([slope] * len(group))
        return pd.Series([0] * len(group))
    
    df_sorted['season_trend'] = df_sorted.groupby(player_col).apply(calculate_trend).reset_index(drop=True)
    
    return df_sorted

def calculate_positional_rankings(df):
    """Calculate weekly positional rankings"""
    df['position_rank'] = df.groupby(['week', 'position'])['custom_fantasy_points'].rank(ascending=False)
    df['position_percentile'] = df.groupby(['week', 'position'])['custom_fantasy_points'].rank(pct=True)
    
    return df

def create_targets_for_modeling(df, target_weeks=None):
    """Create target variables for season-long prediction"""
    if target_weeks is None:
        target_weeks = range(15, 18)  # Fantasy playoffs
    
    # Calculate total points in target weeks
    playoff_points = df[df['week'].isin(target_weeks)].groupby('player_display_name')['custom_fantasy_points'].sum().reset_index()
    playoff_points.columns = ['player_display_name', 'playoff_total_points']
    
    # Calculate season total (weeks 1-14 for training)
    regular_season = df[df['week'] <= 14].groupby('player_display_name')['custom_fantasy_points'].sum().reset_index()
    regular_season.columns = ['player_display_name', 'regular_season_total']
    
    # Merge targets
    targets = regular_season.merge(playoff_points, on='player_display_name', how='left')
    targets['playoff_total_points'] = targets['playoff_total_points'].fillna(0)
    
    return targets

# Apply feature engineering
print("Creating rolling averages...")
df_features = create_rolling_averages(df)

print("Calculating consistency metrics...")
consistency = calculate_consistency_metrics(df_features)

print("Creating season trends...")
df_features = create_season_trends(df_features)

print("Calculating positional rankings...")
df_features = calculate_positional_rankings(df_features)

print("Creating modeling targets...")
targets = create_targets_for_modeling(df_features)

print(f"Feature engineering complete. Dataset shape: {df_features.shape}")
print(f"Available features: {[col for col in df_features.columns if col not in df.columns]}")

Creating rolling averages...
Calculating consistency metrics...
Creating season trends...
Calculating positional rankings...
Creating modeling targets...
Feature engineering complete. Dataset shape: (6670, 13)
Available features: ['rolling_avg_3w', 'rolling_avg_5w', 'rolling_avg_8w', 'points_change', 'season_trend', 'position_rank', 'position_percentile']


# Draft Value Prediction Model

In [22]:
# Draft Value Prediction Model

def calculate_replacement_level(df, position_limits={'QB': 12, 'RB': 24, 'WR': 36, 'TE': 12, 'K': 12, 'DEF': 12}):
    """Calculate replacement level player values for each position"""
    replacement_values = {}
    
    for position in position_limits.keys():
        if position in df['position'].values:
            pos_players = df[df['position'] == position]
            
            # Get season totals for each player
            season_totals = pos_players.groupby('player_display_name')['custom_fantasy_points'].sum().reset_index()
            season_totals = season_totals.sort_values('custom_fantasy_points', ascending=False)
            
            # Replacement level is the value of the player at the position limit
            if len(season_totals) >= position_limits[position]:
                replacement_values[position] = season_totals.iloc[position_limits[position] - 1]['custom_fantasy_points']
            else:
                replacement_values[position] = season_totals['custom_fantasy_points'].min()
    
    return replacement_values

def create_draft_features(df_features, consistency, targets):
    """Create features for draft value prediction"""
    
    # Merge consistency metrics
    draft_data = targets.merge(consistency, left_on='player_display_name', right_on='player_display_name', how='left')
    
    # Add position information
    player_positions = df_features.groupby('player_display_name')['position'].first().reset_index()
    draft_data = draft_data.merge(player_positions, on='player_display_name', how='left')
    
    # Add early season performance (weeks 1-6)
    early_season = df_features[df_features['week'] <= 6].groupby('player_display_name').agg({
        'custom_fantasy_points': ['mean', 'sum'],
        'rolling_avg_3w': 'last',
        'rolling_avg_5w': 'last',
        'position_rank': 'mean',
        'season_trend': 'last'
    }).reset_index()
    
    early_season.columns = ['player_display_name', 'early_ppg', 'early_total', 'early_3w_avg', 'early_5w_avg', 'early_avg_rank', 'early_trend']
    draft_data = draft_data.merge(early_season, on='player_display_name', how='left')
    
    # Fill missing values
    numeric_cols = draft_data.select_dtypes(include=[np.number]).columns
    draft_data[numeric_cols] = draft_data[numeric_cols].fillna(0)
    
    return draft_data

def calculate_value_above_replacement(draft_data, replacement_values):
    """Calculate Value Above Replacement Player (VARP)"""
    draft_data['replacement_level'] = draft_data['position'].map(replacement_values)
    draft_data['varp_regular'] = draft_data['regular_season_total'] - draft_data['replacement_level']
    draft_data['varp_playoff'] = draft_data['playoff_total_points'] - (draft_data['replacement_level'] * 3/17)  # Adjust for 3 weeks vs 17
    
    return draft_data

def build_season_projection_model(draft_data):
    """Build model to project season-long performance"""
    
    # Filter to players with sufficient games
    model_data = draft_data[draft_data['count'] >= 8].copy()
    
    # Feature columns for modeling
    feature_cols = [
        'mean', 'std', 'coefficient_of_variation', 'boom_rate', 'bust_rate',
        'early_ppg', 'early_total', 'early_3w_avg', 'early_5w_avg', 
        'early_avg_rank', 'early_trend'
    ]
    
    # Remove any remaining missing values
    model_data = model_data.dropna(subset=feature_cols + ['regular_season_total'])
    
    if len(model_data) == 0:
        print("No valid data for modeling")
        return None, None, None
    
    X = model_data[feature_cols]
    y = model_data['regular_season_total']
    
    # Split by position for position-specific models
    models = {}
    scalers = {}
    metrics = {}
    
    for position in model_data['position'].unique():
        pos_data = model_data[model_data['position'] == position]
        
        if len(pos_data) < 10:  # Need minimum samples
            continue
            
        X_pos = pos_data[feature_cols]
        y_pos = pos_data['regular_season_total']
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X_pos, y_pos, test_size=0.3, random_state=42)
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Train model
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train_scaled, y_train)
        
        # Predictions
        y_pred = model.predict(X_test_scaled)
        
        # Metrics
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        r2 = r2_score(y_test, y_pred)
        
        models[position] = model
        scalers[position] = scaler
        metrics[position] = {'MAE': mae, 'RMSE': rmse, 'R2': r2}
        
        print(f"{position} Model - MAE: {mae:.2f}, RMSE: {rmse:.2f}, R2: {r2:.3f}")
    
    return models, scalers, metrics

def create_draft_rankings(draft_data, models, scalers):
    """Create draft rankings based on projected value"""
    
    feature_cols = [
        'mean', 'std', 'coefficient_of_variation', 'boom_rate', 'bust_rate',
        'early_ppg', 'early_total', 'early_3w_avg', 'early_5w_avg', 
        'early_avg_rank', 'early_trend'
    ]
    
    draft_rankings = draft_data.copy()
    draft_rankings['projected_points'] = 0
    
    # Generate projections for each position
    for position in models.keys():
        pos_mask = draft_rankings['position'] == position
        pos_data = draft_rankings[pos_mask]
        
        if len(pos_data) > 0 and not pos_data[feature_cols].isna().all().any():
            X_pos = pos_data[feature_cols].fillna(0)
            X_scaled = scalers[position].transform(X_pos)
            predictions = models[position].predict(X_scaled)
            
            draft_rankings.loc[pos_mask, 'projected_points'] = predictions
    
    # Calculate projected VARP
    draft_rankings['projected_varp'] = draft_rankings['projected_points'] - draft_rankings['replacement_level']
    
    # Overall rankings
    draft_rankings['overall_rank'] = draft_rankings['projected_varp'].rank(ascending=False)
    draft_rankings['position_rank'] = draft_rankings.groupby('position')['projected_varp'].rank(ascending=False)
    
    return draft_rankings

# Execute the draft modeling pipeline
print("Calculating replacement level values...")
replacement_values = calculate_replacement_level(df_features)
print("Replacement values by position:", replacement_values)

print("Creating draft features...")
draft_data = create_draft_features(df_features, consistency, targets)

print("Calculating value above replacement...")
draft_data = calculate_value_above_replacement(draft_data, replacement_values)

print("Building season projection models...")
models, scalers, metrics = build_season_projection_model(draft_data)

if models:
    print("Creating draft rankings...")
    draft_rankings = create_draft_rankings(draft_data, models, scalers)
    
    print(f"Draft rankings created for {len(draft_rankings)} players")
    print("\nTop 20 Overall Draft Rankings:")
    top_20 = draft_rankings.nsmallest(20, 'overall_rank')[
        ['player_display_name', 'position', 'projected_points', 'projected_varp', 'overall_rank', 'position_rank']
    ]
    print(top_20)
else:
    print("Model building failed")

Calculating replacement level values...
Replacement values by position: {'QB': 265.78, 'RB': 196.7, 'WR': 190.20000000000002, 'TE': 150.9, 'K': 143.0, 'DEF': 169.0}
Creating draft features...
Calculating value above replacement...
Building season projection models...
K Model - MAE: 6.90, RMSE: 9.04, R2: 0.733
WR Model - MAE: 13.44, RMSE: 18.22, R2: 0.926
RB Model - MAE: 12.76, RMSE: 16.32, R2: 0.933
DEF Model - MAE: 14.75, RMSE: 21.12, R2: 0.369
TE Model - MAE: 12.25, RMSE: 15.76, R2: 0.890
QB Model - MAE: 31.02, RMSE: 38.48, R2: 0.786
Creating draft rankings...
Draft rankings created for 580 players

Top 20 Overall Draft Rankings:
     player_display_name position  projected_points  projected_varp  \
555          Tyreek Hill       WR          288.4400         98.2400   
100  Christian McCaffrey       RB          272.4160         75.7160   
338         Keenan Allen       WR          259.5250         69.3250   
84           CeeDee Lamb       WR          249.9642         59.7642   
1    

In [23]:
# Injury and Multi-Season Adjustments

def create_injury_adjustment_framework():
    """Framework for adjusting projections based on known injuries/situations"""
    
    # Known major 2024 injuries/situations that affect 2025 projections
    injury_adjustments = {
        # Players with major 2024 injuries
        'Aaron Rodgers': {'injury_discount': 0.15, 'reason': '2024 Achilles recovery'},
        'Nick Chubb': {'injury_discount': 0.20, 'reason': '2024 knee injury'},
        'J.K. Dobbins': {'injury_discount': 0.15, 'reason': 'Injury history'},
        'Calvin Ridley': {'injury_discount': 0.10, 'reason': 'Team change, inconsistency'},
        
        # Age-related decline adjustments
        'Travis Kelce': {'age_discount': 0.08, 'reason': 'Age 35+ TE decline'},
        'Mike Evans': {'age_discount': 0.05, 'reason': 'Age-related decline'},
        'Adam Thielen': {'age_discount': 0.12, 'reason': 'Age 34+ WR decline'},
        
        # Positive situation changes
        'Calvin Ridley': {'situation_boost': 0.05, 'reason': 'New team opportunity'},
        'Saquon Barkley': {'situation_boost': 0.10, 'reason': 'Eagles upgrade'},
    }
    
    return injury_adjustments

def apply_injury_adjustments(draft_rankings, adjustments):
    """Apply injury/situation adjustments to projections"""
    
    adjusted_rankings = draft_rankings.copy()
    adjustment_log = []
    
    for player, adj_dict in adjustments.items():
        if player in adjusted_rankings['player_display_name'].values:
            player_idx = adjusted_rankings['player_display_name'] == player
            original_proj = adjusted_rankings.loc[player_idx, 'projected_points'].iloc[0]
            
            # Apply all adjustment factors
            total_adjustment = 1.0
            reasons = []
            
            if 'injury_discount' in adj_dict:
                total_adjustment *= (1 - adj_dict['injury_discount'])
                reasons.append(f"Injury discount: -{adj_dict['injury_discount']:.1%}")
                
            if 'age_discount' in adj_dict:
                total_adjustment *= (1 - adj_dict['age_discount'])
                reasons.append(f"Age discount: -{adj_dict['age_discount']:.1%}")
                
            if 'situation_boost' in adj_dict:
                total_adjustment *= (1 + adj_dict['situation_boost'])
                reasons.append(f"Situation boost: +{adj_dict['situation_boost']:.1%}")
            
            # Apply adjustment
            new_proj = original_proj * total_adjustment
            adjusted_rankings.loc[player_idx, 'projected_points'] = new_proj
            adjusted_rankings.loc[player_idx, 'projected_varp'] = new_proj - adjusted_rankings.loc[player_idx, 'replacement_level'].iloc[0]
            
            adjustment_log.append({
                'player': player,
                'original_projection': original_proj,
                'adjusted_projection': new_proj,
                'adjustment_factor': total_adjustment,
                'reasons': '; '.join(reasons)
            })
    
    # Recalculate rankings
    adjusted_rankings['overall_rank'] = adjusted_rankings['projected_varp'].rank(ascending=False)
    adjusted_rankings['position_rank'] = adjusted_rankings.groupby('position')['projected_varp'].rank(ascending=False)
    
    return adjusted_rankings, adjustment_log

def create_multi_season_weights():
    """Create weighting system for multi-season data"""
    
    # Weight recent seasons more heavily
    season_weights = {
        2024: 0.50,  # Most recent season
        2023: 0.35,  # Previous season  
        2022: 0.15   # Two seasons ago
    }
    
    return season_weights

def incorporate_recent_performance(player_name, recent_games_data=None):
    """Framework for incorporating 2024 performance data"""
    
    # This would be where you'd load 2024 data if available
    # For now, showing the framework
    
    if recent_games_data is not None:
        # Calculate 2024 performance metrics
        recent_avg = recent_games_data['fantasy_points'].mean()
        recent_trend = np.polyfit(range(len(recent_games_data)), recent_games_data['fantasy_points'], 1)[0]
        
        return {
            'recent_avg': recent_avg,
            'recent_trend': recent_trend,
            'games_played_2024': len(recent_games_data)
        }
    
    return None

def create_2025_draft_projections(draft_rankings):
    """Create final 2025 projections with all adjustments"""
    
    # Apply injury adjustments
    adjustments = create_injury_adjustment_framework()
    adjusted_rankings, adj_log = apply_injury_adjustments(draft_rankings, adjustments)
    
    print("Applied injury/situation adjustments:")
    for adj in adj_log:
        print(f"{adj['player']}: {adj['original_projection']:.1f} -> {adj['adjusted_projection']:.1f} ({adj['reasons']})")
    
    return adjusted_rankings, adj_log

def compare_rankings(original, adjusted):
    """Compare original vs adjusted rankings"""
    
    comparison = original[['player_display_name', 'position', 'overall_rank']].merge(
        adjusted[['player_display_name', 'overall_rank']], 
        on='player_display_name', 
        suffixes=('_original', '_adjusted')
    )
    
    comparison['rank_change'] = comparison['overall_rank_original'] - comparison['overall_rank_adjusted']
    comparison = comparison.sort_values('rank_change', ascending=False)
    
    return comparison

# Apply adjustments to create 2025 projections
print("Creating 2025-adjusted draft projections...")
adjusted_rankings, adjustment_log = create_2025_draft_projections(draft_rankings)

print(f"\nTop 20 Adjusted Rankings for 2025:")
top_20_adjusted = adjusted_rankings.nsmallest(20, 'overall_rank')[
    ['player_display_name', 'position', 'projected_points', 'projected_varp', 'overall_rank']
]
print(top_20_adjusted)

print(f"\nBiggest ranking changes:")
ranking_changes = compare_rankings(draft_rankings, adjusted_rankings)
print("Players moving up most:")
print(ranking_changes.head(5)[['player_display_name', 'position', 'rank_change']])
print("\nPlayers moving down most:")
print(ranking_changes.tail(5)[['player_display_name', 'position', 'rank_change']])

Creating 2025-adjusted draft projections...
Applied injury/situation adjustments:
Aaron Rodgers: 73.8 -> 62.7 (Injury discount: -15.0%)
Nick Chubb: 122.3 -> 97.9 (Injury discount: -20.0%)
J.K. Dobbins: 139.6 -> 118.7 (Injury discount: -15.0%)
Calvin Ridley: 173.5 -> 182.1 (Situation boost: +5.0%)
Travis Kelce: 186.7 -> 171.7 (Age discount: -8.0%)
Mike Evans: 205.9 -> 195.6 (Age discount: -5.0%)
Adam Thielen: 205.2 -> 180.6 (Age discount: -12.0%)
Saquon Barkley: 169.1 -> 186.0 (Situation boost: +10.0%)

Top 20 Adjusted Rankings for 2025:
     player_display_name position  projected_points  projected_varp  \
555          Tyreek Hill       WR        288.440000       98.240000   
100  Christian McCaffrey       RB        272.416000       75.716000   
338         Keenan Allen       WR        259.525000       69.325000   
84           CeeDee Lamb       WR        249.964200       59.764200   
1             A.J. Brown       WR        238.791400       48.591400   
127             DJ Moore       

In [30]:
# Simple Age Matching Approach

def simple_age_matching():
    """Simple approach to add ages to our rankings"""
    
    print("Step 1: Check roster data structure")
    print(f"Roster data shape: {roster_data.shape}")
    print(f"Roster columns: {roster_data.columns.tolist()}")
    
    # Look at some sample roster data
    print("\nSample roster data:")
    print(roster_data[['player_name', 'position', 'age', 'birth_date']].head())
    
    print(f"\nStep 2: Check our rankings data")
    print(f"Rankings shape: {draft_rankings.shape}")
    print(f"Rankings columns: {draft_rankings.columns.tolist()}")
    
    # Sample rankings data
    print("\nSample rankings data:")
    print(draft_rankings[['player_display_name', 'position']].head())
    
    print("\nStep 3: Try direct name matching")
    
    # Create a simple roster lookup
    roster_lookup = {}
    for _, row in roster_data.iterrows():
        name = row['player_name']
        if pd.notna(name) and pd.notna(row['age']):
            roster_lookup[name] = {
                'age': row['age'],
                'birth_date': row['birth_date']
            }
    
    print(f"Created roster lookup for {len(roster_lookup)} players")
    
    # Try matching with rankings
    rankings_with_age = draft_rankings.copy()
    rankings_with_age['age'] = None
    rankings_with_age['birth_date'] = None
    
    # Strategy 1: Direct matching with player_display_name
    matches_strategy1 = 0
    for idx, row in rankings_with_age.iterrows():
        player_name = row['player_display_name']
        if player_name in roster_lookup:
            rankings_with_age.loc[idx, 'age'] = roster_lookup[player_name]['age']
            rankings_with_age.loc[idx, 'birth_date'] = roster_lookup[player_name]['birth_date']
            matches_strategy1 += 1
    
    print(f"Strategy 1 (direct matching): {matches_strategy1} matches")
    
    # Strategy 2: Try with some common name variations
    matches_strategy2 = 0
    unmatched_mask = pd.isna(rankings_with_age['age'])
    unmatched_players = rankings_with_age[unmatched_mask]['player_display_name'].tolist()
    
    for idx, row in rankings_with_age[unmatched_mask].iterrows():
        player_name = row['player_display_name']
        
        # Try some common variations
        variations = [
            player_name,
            player_name.replace('.', ''),  # Remove periods
            player_name.replace("'", ''),  # Remove apostrophes
        ]
        
        # Try splitting and reversing (Last, First format)
        parts = player_name.split()
        if len(parts) >= 2:
            variations.append(f"{parts[-1]}, {' '.join(parts[:-1])}")
        
        found_match = False
        for variation in variations:
            if variation in roster_lookup:
                rankings_with_age.loc[idx, 'age'] = roster_lookup[variation]['age']
                rankings_with_age.loc[idx, 'birth_date'] = roster_lookup[variation]['birth_date']
                matches_strategy2 += 1
                found_match = True
                break
        
        if found_match:
            continue
    
    print(f"Strategy 2 (name variations): {matches_strategy2} additional matches")
    
    total_matches = matches_strategy1 + matches_strategy2
    match_rate = total_matches / len(rankings_with_age)
    
    print(f"\nTotal matches: {total_matches} out of {len(rankings_with_age)} ({match_rate:.1%})")
    
    # Show some successful matches
    matched_data = rankings_with_age[pd.notna(rankings_with_age['age'])]
    if len(matched_data) > 0:
        print(f"\nSample successful matches:")
        sample_matches = matched_data[['player_display_name', 'position', 'age']].head()
        print(sample_matches)
        
        # Show unmatched players (first few)
        unmatched_data = rankings_with_age[pd.isna(rankings_with_age['age'])]
        if len(unmatched_data) > 0:
            print(f"\nSample unmatched players:")
            sample_unmatched = unmatched_data[['player_display_name', 'position']].head()
            print(sample_unmatched)
    
    return rankings_with_age, match_rate

def apply_age_adjustments_simple(rankings_with_age):
    """Apply age adjustments only to players with age data"""
    
    # Age curves by position
    position_age_curves = {
        'RB': {'decline_start': 28, 'decline_rate': 0.08},
        'WR': {'decline_start': 30, 'decline_rate': 0.05},
        'TE': {'decline_start': 32, 'decline_rate': 0.06},
        'QB': {'decline_start': 35, 'decline_rate': 0.04},
        'K': {'decline_start': 36, 'decline_rate': 0.03},
        'DEF': {'decline_start': 30, 'decline_rate': 0.05}
    }
    
    adjusted_rankings = rankings_with_age.copy()
    adjusted_rankings['age_adjustment'] = 1.0
    adjusted_rankings['age_category'] = 'Unknown'
    
    # Only adjust players with age data
    has_age = pd.notna(adjusted_rankings['age'])
    
    for idx, row in adjusted_rankings[has_age].iterrows():
        position = row['position']
        age = row['age']
        
        if position not in position_age_curves:
            continue
            
        curve = position_age_curves[position]
        
        if age < curve['decline_start']:
            adjusted_rankings.loc[idx, 'age_category'] = 'Prime'
            adjusted_rankings.loc[idx, 'age_adjustment'] = 1.0
        else:
            # Apply decline
            years_past_decline = age - curve['decline_start']
            decline_factor = 1 - (years_past_decline * curve['decline_rate'])
            decline_factor = max(decline_factor, 0.7)  # Cap decline at 30%
            
            adjusted_rankings.loc[idx, 'age_adjustment'] = decline_factor
            adjusted_rankings.loc[idx, 'age_category'] = 'Declining'
    
    # Apply age adjustments to projections
    adjusted_rankings['age_adjusted_points'] = (
        adjusted_rankings['projected_points'] * adjusted_rankings['age_adjustment']
    )
    adjusted_rankings['age_adjusted_varp'] = (
        adjusted_rankings['age_adjusted_points'] - adjusted_rankings['replacement_level']
    )
    
    # Recalculate rankings
    adjusted_rankings['final_overall_rank'] = adjusted_rankings['age_adjusted_varp'].rank(ascending=False)
    
    return adjusted_rankings

# Execute the simple age matching
print("Attempting simple age matching...")
rankings_with_age, match_rate = simple_age_matching()

if match_rate > 0.3:  # If we matched at least 30% of players
    print(f"\nApplying age adjustments (match rate: {match_rate:.1%})...")
    final_rankings = apply_age_adjustments_simple(rankings_with_age)
    
    # Show results
    print(f"\nAge category distribution:")
    age_dist = final_rankings['age_category'].value_counts()
    print(age_dist)
    
    # Show players with age adjustments
    age_adjusted_players = final_rankings[pd.notna(final_rankings['age'])]
    if len(age_adjusted_players) > 0:
        print(f"\nTop 10 players with age data:")
        top_with_age = age_adjusted_players.nsmallest(10, 'final_overall_rank')[
            ['player_display_name', 'position', 'age', 'age_category', 'age_adjustment', 'final_overall_rank']
        ]
        print(top_with_age)
        
        # Show players most affected by age
        declining_players = final_rankings[final_rankings['age_category'] == 'Declining']
        if len(declining_players) > 0:
            print(f"\nPlayers with age-related declines:")
            print(declining_players[['player_display_name', 'position', 'age', 'age_adjustment']].head())
    
else:
    print(f"Match rate too low ({match_rate:.1%}), skipping age adjustments")
    final_rankings = rankings_with_age

Attempting simple age matching...
Step 1: Check roster data structure
Roster data shape: (3089, 37)
Roster columns: ['season', 'team', 'position', 'depth_chart_position', 'jersey_number', 'status', 'player_name', 'first_name', 'last_name', 'birth_date', 'height', 'weight', 'college', 'player_id', 'espn_id', 'sportradar_id', 'yahoo_id', 'rotowire_id', 'pff_id', 'pfr_id', 'fantasy_data_id', 'sleeper_id', 'years_exp', 'headshot_url', 'ngs_position', 'week', 'game_type', 'status_description_abbr', 'football_name', 'esb_id', 'gsis_it_id', 'smart_id', 'entry_year', 'rookie_year', 'draft_club', 'draft_number', 'age']

Sample roster data:
        player_name position   age birth_date
0  Bernard Williams       OL   NaN        NaT
1      Jason Peters       OL  41.0 1982-01-22
2     Aaron Rodgers       QB  39.0 1983-12-02
3       Matt Prater        K  39.0 1984-08-10
4    Marcedes Lewis       TE  39.0 1984-05-19

Step 2: Check our rankings data
Rankings shape: (580, 25)
Rankings columns: ['player