# Minor League Baseball (MiLB) Prospect Analysis

This notebook shows how to analyze minor league data to evaluate prospects.

## Why Analyze MiLB Data?

- **Identify breakout prospects** before they reach the majors
- **Track player development** across levels
- **Find undervalued trade targets**
- **Predict MLB success** based on minor league performance
- **Compare players** across different organizations and levels

In [None]:
# Setup
import sys
sys.path.append('..')  # Add parent directory to path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data import MiLBFetcher
import config

# Set display options
pd.set_option('display.max_columns', 30)
pd.set_option('display.max_rows', 50)
sns.set_style('darkgrid')

print("Setup complete!")

## 1. Initialize MiLB Data Fetcher

In [None]:
# Initialize fetcher
milb_fetcher = MiLBFetcher(cache_dir=config.CACHE_DIR)

print(f"Fetcher initialized. Cache directory: {config.CACHE_DIR}")

## 2. Fetch AAA Batting Data

Let's start by looking at Triple-A hitters (one level below MLB).

In [None]:
# Fetch AAA batting stats for current season
aaa_batting = milb_fetcher.get_batting_stats(season=config.CURRENT_SEASON, level="AAA")

print(f"Fetched data for {len(aaa_batting)} AAA batters")
aaa_batting.head()

In [None]:
# Check available columns
print("Available statistics:")
print(aaa_batting.columns.tolist())

## 3. Identify Top Prospects

Let's find the best performing hitters in AAA with meaningful playing time.

In [None]:
# Filter to players with meaningful playing time
min_pa = 100  # Minimum plate appearances
qualified_batters = aaa_batting[aaa_batting['PA'] >= min_pa].copy()

print(f"Qualified batters (>= {min_pa} PA): {len(qualified_batters)}")

# Sort by wRC+ (weighted runs created plus - park/league adjusted)
if 'wRC+' in qualified_batters.columns:
    top_hitters = qualified_batters.nlargest(20, 'wRC+')
    
    # Display key stats
    display_cols = ['Name', 'Team', 'Age', 'PA', 'AVG', 'OBP', 'SLG', 'HR', 'SB', 'wRC+', 'K%', 'BB%']
    available_cols = [col for col in display_cols if col in top_hitters.columns]
    
    print("\nTop 20 AAA Hitters by wRC+:")
    top_hitters[available_cols].reset_index(drop=True)

## 4. Age-Adjusted Performance

Younger players performing well in AAA are often better prospects.

In [None]:
# Visualize performance vs age
if 'wRC+' in qualified_batters.columns and 'Age' in qualified_batters.columns:
    plt.figure(figsize=(12, 7))
    
    # Scatter plot
    plt.scatter(qualified_batters['Age'], qualified_batters['wRC+'], alpha=0.6, s=80)
    
    # Add trend line
    z = np.polyfit(qualified_batters['Age'], qualified_batters['wRC+'], 1)
    p = np.poly1d(z)
    plt.plot(qualified_batters['Age'].sort_values(), 
             p(qualified_batters['Age'].sort_values()), 
             "r--", alpha=0.8, linewidth=2, label='Trend')
    
    # Reference lines
    plt.axhline(y=100, color='green', linestyle='--', alpha=0.5, label='League Average (100)')
    plt.axhline(y=120, color='orange', linestyle='--', alpha=0.5, label='Above Average (120)')
    
    plt.xlabel('Age', fontsize=12)
    plt.ylabel('wRC+ (100 = league average)', fontsize=12)
    plt.title('AAA Batting Performance by Age', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Find young high performers
    young_stars = qualified_batters[
        (qualified_batters['Age'] <= 24) & 
        (qualified_batters['wRC+'] >= 115)
    ].sort_values('wRC+', ascending=False)
    
    print(f"\nYoung stars (Age <= 24, wRC+ >= 115): {len(young_stars)}")
    if len(young_stars) > 0:
        print("\nTop young prospects:")
        display_cols = ['Name', 'Team', 'Age', 'PA', 'AVG', 'OBP', 'SLG', 'HR', 'wRC+']
        available_cols = [col for col in display_cols if col in young_stars.columns]
        print(young_stars[available_cols].head(10).to_string(index=False))

## 5. Compare Performance Across Levels

Let's see how players perform as they move through the system.

In [None]:
# Fetch batting data for all levels
all_levels = milb_fetcher.get_all_levels_batting(season=config.CURRENT_SEASON)

print(f"Total players across all levels: {len(all_levels)}")

# Filter for qualified batters
all_levels_qualified = all_levels[all_levels['PA'] >= 50].copy()

# Compare league-average stats by level
if 'wRC+' in all_levels_qualified.columns:
    level_stats = all_levels_qualified.groupby('Level').agg({
        'PA': 'sum',
        'AVG': 'mean',
        'OBP': 'mean',
        'SLG': 'mean',
        'HR': 'sum',
        'K%': 'mean',
        'BB%': 'mean'
    }).round(3)
    
    # Order by level
    level_order = ['Rk', 'A', 'A+', 'AA', 'AAA']
    level_stats = level_stats.reindex([l for l in level_order if l in level_stats.index])
    
    print("\nAverage Stats by Level:")
    print(level_stats)

## 6. Pitching Analysis

Now let's look at minor league pitchers.

In [None]:
# Fetch AAA pitching stats
aaa_pitching = milb_fetcher.get_pitching_stats(season=config.CURRENT_SEASON, level="AAA")

print(f"Fetched data for {len(aaa_pitching)} AAA pitchers")

# Filter for qualified pitchers (minimum innings)
min_ip = 20
qualified_pitchers = aaa_pitching[aaa_pitching['IP'] >= min_ip].copy()

print(f"Qualified pitchers (>= {min_ip} IP): {len(qualified_pitchers)}")

# Top pitchers by FIP (Fielding Independent Pitching)
if 'FIP' in qualified_pitchers.columns:
    top_pitchers = qualified_pitchers.nsmallest(20, 'FIP')  # Lower is better
    
    display_cols = ['Name', 'Team', 'Age', 'IP', 'ERA', 'FIP', 'WHIP', 'K/9', 'BB/9', 'K%', 'BB%']
    available_cols = [col for col in display_cols if col in top_pitchers.columns]
    
    print("\nTop 20 AAA Pitchers by FIP:")
    top_pitchers[available_cols].reset_index(drop=True)

## 7. Strikeout vs Walk Rate (Pitcher Command)

Elite pitchers have high strikeout rates and low walk rates.

In [None]:
if 'K%' in qualified_pitchers.columns and 'BB%' in qualified_pitchers.columns:
    plt.figure(figsize=(12, 8))
    
    # Scatter plot
    scatter = plt.scatter(
        qualified_pitchers['BB%'], 
        qualified_pitchers['K%'],
        c=qualified_pitchers['Age'],
        cmap='viridis',
        alpha=0.6,
        s=100
    )
    
    plt.colorbar(scatter, label='Age')
    
    # Reference lines
    plt.axvline(x=8, color='red', linestyle='--', alpha=0.5, label='8% BB Rate')
    plt.axhline(y=25, color='green', linestyle='--', alpha=0.5, label='25% K Rate')
    
    # Highlight elite zone (top-right)
    plt.fill_between([0, 8], [25, 25], [50, 50], alpha=0.1, color='green', label='Elite Zone')
    
    plt.xlabel('Walk Rate (BB%)', fontsize=12)
    plt.ylabel('Strikeout Rate (K%)', fontsize=12)
    plt.title('AAA Pitcher Command: K% vs BB%', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Find elite command pitchers
    elite_command = qualified_pitchers[
        (qualified_pitchers['K%'] >= 25) & 
        (qualified_pitchers['BB%'] <= 8)
    ].sort_values('FIP')
    
    print(f"\nElite command pitchers (K% >= 25%, BB% <= 8%): {len(elite_command)}")
    if len(elite_command) > 0:
        display_cols = ['Name', 'Team', 'Age', 'IP', 'ERA', 'FIP', 'K%', 'BB%']
        available_cols = [col for col in display_cols if col in elite_command.columns]
        print("\nTop prospects with elite command:")
        print(elite_command[available_cols].head(10).to_string(index=False))

## Next Steps for MiLB Analysis

You can extend this analysis by:

1. **Track individual prospects** - Monitor their progression through levels
2. **Build promotion models** - Predict when players will reach MLB
3. **Compare organizations** - Which teams develop players best?
4. **Historical analysis** - How did past prospects perform in MLB?
5. **Combine with Statcast** - When prospects reach MLB, compare their minor league stats to MLB performance

The combination of MiLB prospect tracking + MLB Statcast data gives you a complete player development pipeline!