In [None]:
# Phase 2: Draft Prediction Model Setup
# Create this in a new notebook: 02_draft_modeling.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)

import os 

if os.path.exists('../data/fantasy_data_2023.pkl'):
    df = pd.read_pickle('../data/fantasy_data_2023.pkl')
    metadata_path = '../data/dataset_metadata.json'
else:
    print('data.pkl not found')

# Load metadata if we found the data
if df is not None:
    print("Skipping corrupted metadata file, creating from dataset...")
    
    # Create metadata directly from the dataset
    metadata = {
        'positions': sorted(df['position'].unique().tolist()),
        'weeks_covered': sorted(df['week'].unique().tolist()),
        'total_records': len(df),
        'unique_players': df['player_display_name'].nunique()
    }
    
    # Recreate your custom scoring settings
    metadata['scoring_settings'] = {
        'passing_yards': 0.04,  # 1 point per 25 yards
        'passing_tds': 4,
        'interceptions': -2,
        'rushing_yards': 0.1,   # 1 point per 10 yards
        'rushing_tds': 6,
        'receiving_yards': 0.1, # 1 point per 10 yards
        'receiving_tds': 6,
        'receptions': 1,        # PPR
        'rushing_fumbles_lost': -2,
        'receiving_fumbles_lost': -2,
        'special_teams_tds': 6
    }
    
    metadata['defense_scoring'] = {
        'defense_td': 6,
        'sacks': 1,
        'interceptions': 2,
        'fumble_recovery': 2,
        'safety': 2,
        'forced_fumble': 1,
        'blocked_kick': 2,
        'points_allowed_tiers': {
            '0': 10, '1-6': 7, '7-13': 4, '14-20': 1,
            '21-27': -1, '28-34': -1, '35+': -4
        }
    }
    
    metadata['kicking_scoring'] = {
        'fg_0_19': 3, 'fg_20_29': 3, 'fg_30_39': 3,
        'fg_40_49': 4, 'fg_50_59': 5, 'fg_60_plus': 6,
        'pat_made': 1, 'pat_missed': -1, 'fg_missed': -1
    }
    
    print("Metadata recreated from dataset")
    
    # Show what we have
    print(f"Dataset loaded: {df.shape[0]:,} records")
    print(f"Positions: {metadata['positions']}")
    print(f"Weeks: {len(metadata['weeks_covered'])} weeks")
    print(f"Unique players/teams: {metadata['unique_players']}")
else:
    print("Cannot proceed without dataset")
    metadata = None

print(f"Dataset loaded: {df.shape[0]:,} records")
print(f"Positions: {metadata['positions']}")
print(f"Scoring system: Custom league scoring loaded")

# Basic data inspection
print(f"\nDataset Overview:")
print(f"Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"Date range: Weeks {df['week'].min()}-{df['week'].max()}")

# Position breakdown
print(f"\nPosition Distribution:")
position_counts = df['position'].value_counts()
print(position_counts)

# Sample the data
print(f"\nFirst 5 records:")
print(df.head())

🏈 PHASE 2: DRAFT PREDICTION MODEL
/Users/evanwise/Projects/Fantasy Football/notebooks
Skipping corrupted metadata file, creating from dataset...
Metadata recreated from dataset
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Weeks: 22 weeks
Unique players/teams: 620
Dataset loaded: 6,670 records
Positions: ['DEF', 'K', 'QB', 'RB', 'TE', 'WR']
Scoring system: Custom league scoring loaded

Dataset Overview:
Shape: (6670, 6)
Columns: ['season', 'week', 'player_display_name', 'position', 'position_group', 'custom_fantasy_points']
Date range: Weeks 1-22

Position Distribution:
WR     2322
RB     1403
TE     1122
QB      690
DEF     570
K       563
Name: position, dtype: int64

First 5 records:
   season  week player_display_name position position_group  \
0    2023     1       Aaron Rodgers       QB             QB   
1    2023     4      Marcedes Lewis       TE             TE   
2    2023     7      Marcedes Lewis       TE             TE   
3    2023    11     