In [1]:
# install packages and load libraries

import pandas as pd
from datetime import datetime, timedelta, date
from pybaseball import statcast
from pybaseball import playerid_lookup

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV

import matplotlib.pyplot as plt

import time

import os

from scipy.stats import nbinom

import numpy as np

import joblib

import unicodedata

from xgboost import XGBRegressor




In [2]:
# 1. Load existing model_df and team_k_stats
model_df = pd.read_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\model_df.csv")
team_k_stats = pd.read_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\team_k_stats.csv")

# 2. Find latest date
latest_date_str = model_df['game_date'].max()
latest_date = pd.to_datetime(latest_date_str) + timedelta(days=1)
today = datetime.today()

# 3. Scrape missing Statcast data
print(f"📈 Scraping Statcast from {latest_date.date()} to {today.date()}...")
new_data = statcast(start_dt=latest_date.strftime("%Y-%m-%d"), end_dt=today.strftime("%Y-%m-%d"))

# 4. Check if any new data
if new_data.empty:
    print("✅ No new games found. model_df is already up to date.")
else:
    print(f"✅ Found {len(new_data)} new rows. Processing...")

    # Assign batting_team manually
    new_data['batting_team'] = new_data.apply(
        lambda x: x['away_team'] if x['inning_topbot'] == 'Top' else x['home_team'], axis=1
    )

    # Determine if pitcher is home/away based on inning side
    new_data['is_home_game'] = new_data['inning_topbot'].apply(lambda x: 1 if x == 'Top' else 0)

    # Correct player_team based on is_home_game
    new_data['player_team'] = new_data.apply(
        lambda x: x['home_team'] if x['is_home_game'] == 1 else x['away_team'], axis=1
    )

    # 5. Filter only pitching events
    pitching_data = new_data[~new_data['pitch_type'].isna()]

    # 6. Build features
    pitcher_game = pitching_data.groupby(
        ['player_name', 'game_date', 'pitcher', 'home_team', 'away_team', 'player_team']
    ).agg(
        avg_velocity=('release_speed', 'mean'),
        pct_LHB_faced=('stand', lambda x: (x == 'L').mean()),
        p_throws=('p_throws', 'first'),
        total_strikeouts=('events', lambda x: (x == 'strikeout').sum()),
        is_home_game=('is_home_game', 'first')
    ).reset_index()

    # Static placeholders
    pitcher_game['pitcher_days_since_prev_game'] = 5
    pitcher_game['pitcher_days_until_next_game'] = 5
    pitcher_game['ks_last_5_games'] = 5  # placeholder until recalculation
    pitcher_game['k_pct_last_5_games'] = 0.22

    # Add opponent_team
    pitcher_game['opponent_team'] = pitcher_game.apply(
        lambda x: x['away_team'] if x['is_home_game'] == 1 else x['home_team'], axis=1
    )

    # Placeholder for k_pct_60d
    pitcher_game['k_pct_60d'] = 0.22

    # 7. Reorder columns to match model_df
    columns_to_keep = [
        'player_name', 'game_date', 'avg_velocity', 'pct_LHB_faced', 'p_throws',
        'pitcher_days_since_prev_game', 'pitcher_days_until_next_game',
        'home_team', 'away_team', 'player_team', 'total_strikeouts', 'is_home_game',
        'ks_last_5_games', 'k_pct_last_5_games', 'opponent_team', 'k_pct_60d'
    ]
    pitcher_game = pitcher_game[columns_to_keep]

    # 8. Append new games to model_df
    model_df['game_date'] = pd.to_datetime(model_df['game_date'])
    model_df = pd.concat([model_df, pitcher_game], ignore_index=True)

    # 9. Recalculate ks_last_5_games
    model_df = model_df.sort_values(['player_name', 'game_date'])
    model_df['ks_last_5_games'] = (
        model_df.groupby('player_name')['total_strikeouts']
        .rolling(window=5, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True)
    )

    # 10. Rebuild team_k_stats 
    print("🔄 Rebuilding team K% stats...")

    # Only plate appearances (where events exist) and .copy()
    pa_data = pitching_data[pitching_data['events'].notna()].copy()

    # Only 2025 season games
    pa_data['game_year'] = pd.to_datetime(pa_data['game_date']).dt.year
    pa_data = pa_data[pa_data['game_year'] == 2025]

    # Correct team K% calculation
    team_k_stats = pa_data.groupby(['batting_team', 'p_throws']).agg(
        total_pas=('batter', 'count'),
        total_strikeouts=('events', lambda x: (x == 'strikeout').sum())
    ).reset_index()

    team_k_stats['k_pct_60d'] = team_k_stats['total_strikeouts'] / team_k_stats['total_pas']

    # 11. Save updated files
    model_df.to_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\model_df.csv", index=False)
    team_k_stats.to_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\team_k_stats.csv", index=False)

    print("Updated model_df and team_k_stats saved successfully!")


📈 Scraping Statcast from 2025-04-08 to 2025-04-08...
This is a large query, it may take a moment to complete


100%|██████████| 1/1 [00:00<00:00,  8.33it/s]

✅ No new games found. model_df is already up to date.





In [54]:
# testing best parameters for the model, will implement below
# THIS CHUNK HAS ALREADY BEEN RUN - PRINT STATEMENT WILL SHOW THE BEST PARAMETERS FOR XGBOOST


# today = date.today()
# dynamic_seed = int(today.strftime("%Y%m%d"))

# # 1. Define model
# xgb = XGBRegressor(random_state=dynamic_seed)

# # 2. Define search space
# param_dist = {
#     'n_estimators': [100, 200, 300, 400, 500],
#     'learning_rate': [0.01, 0.03, 0.05, 0.1, 0.2],
#     'max_depth': [3, 4, 5, 6, 7],
#     'subsample': [0.7, 0.8, 0.9, 1.0],
#     'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
#     'min_child_weight': [1, 3, 5],
#     'gamma': [0, 0.1, 0.2]
# }

# # 3. Setup RandomizedSearchCV
# random_search = RandomizedSearchCV(
#     estimator=xgb,
#     param_distributions=param_dist,
#     n_iter=30,             # Try 30 random combos
#     scoring='neg_mean_squared_error',  # How to judge "better"
#     cv=3,                  # 3-fold cross validation
#     verbose=1,
#     random_state=dynamic_seed,
#     n_jobs=-1              # Use all cores
# )

# final_features = [
#     'avg_velocity',
#     'pct_LHB_faced',
#     'pitcher_days_since_prev_game',
#     'pitcher_days_until_next_game',
#     'ks_last_5_games',
#     'k_pct_60d',
#     'is_home_game'  # Added
# ]

# X = model_df[final_features]
# y = model_df['total_strikeouts']

# model_df = model_df.sort_values('game_date').reset_index(drop=True)
# model_df['recency_rank'] = model_df.index
# model_df['recency_score'] = model_df['recency_rank'] / model_df['recency_rank'].max()
# model_df['sample_weight'] = model_df['recency_score'] ** 2

# sample_weight = model_df['sample_weight']

# # 4. Fit
# random_search.fit(X, y, sample_weight=sample_weight)

# # 5. Best model
# best_xgb = random_search.best_estimator_

# print("Best Parameters Found:")
# print(random_search.best_params_)

best_params = {
    'subsample': 0.7,
    'n_estimators': 300,
    'min_child_weight': 1,
    'max_depth': 3,
    'learning_rate': 0.03,
    'gamma': 0.2,
    'colsample_bytree': 1.0
}

print("Best Parameters Found:")
print(best_params)


Best Parameters Found:
{'subsample': 0.7, 'n_estimators': 300, 'min_child_weight': 1, 'max_depth': 3, 'learning_rate': 0.03, 'gamma': 0.2, 'colsample_bytree': 1.0}


In [4]:
# 1. Load existing model_df and team_k_stats
model_df = pd.read_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\model_df.csv")
team_k_stats = pd.read_csv(r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\team_k_stats.csv")

# 2. Prepare final features
final_features = [
    'avg_velocity',
    'pct_LHB_faced',
    'pitcher_days_since_prev_game',
    'pitcher_days_until_next_game',
    'ks_last_5_games',
    'k_pct_60d',
    'is_home_game'  # Added
]

X = model_df[final_features]
y = model_df['total_strikeouts']

# 3. Create recency sample weights
model_df = model_df.sort_values('game_date').reset_index(drop=True)
model_df['recency_rank'] = model_df.index
model_df['recency_score'] = model_df['recency_rank'] / model_df['recency_rank'].max()
model_df['sample_weight'] = model_df['recency_score'] ** 2

sample_weight = model_df['sample_weight']

today = date.today()
dynamic_seed = int(today.strftime("%Y%m%d"))

# 4. Train XGBoost model
xgb_final_model = XGBRegressor(
    n_estimators=300,       
    learning_rate=0.03,     )
    max_depth=3,            
    subsample=0.7,          
    colsample_bytree=1.0,   
    random_state=dynamic_seed,
    verbosity=1,
    min_child_weight = 1,
    gamma = 0.2
)

xgb_final_model.fit(X, y, sample_weight=sample_weight)

print("Model retrained successfully!")

# 5. Save model
joblib.dump(xgb_final_model, r"C:\Users\ianat\OneDrive\Documents\Gambling\MLB\Data\xgb_final_model.pkl")
print("XGB Model saved successfully!")


Model retrained successfully!
XGB Model saved successfully!


In [31]:
# removing any accents in players names to make it easier to lookup
def normalize_name(name):
    return unicodedata.normalize('NFKD', name).encode('ASCII', 'ignore').decode('utf-8')

def predict_strikeouts(pitcher_name, opponent_team, is_home_game=1, alt_lines=None, n_sims=10000, manual_under_odds=None):
    # 1. Get the latest record for this pitcher
    # Apply normalization to both sides
    normalized_pitcher_name = normalize_name(pitcher_name)
    model_df['player_name_normalized'] = model_df['player_name'].apply(normalize_name)

    # Then match
    pitcher_rows = model_df[model_df['player_name_normalized'] == normalized_pitcher_name]
    if pitcher_rows.empty:
        raise ValueError(f"No data found for pitcher: {pitcher_name}. Check name spelling or model_df contents.")

    row = pitcher_rows.sort_values('game_date').iloc[-1]

    # 2. Get pitcher's throwing hand
    p_throws = row.get('p_throws', 'R')  # fallback to RHP if not found

    # 3. Lookup opponent K% vs pitcher handedness
    if opponent_team not in team_k_stats['batting_team'].values:
        print(f"Warning: Opponent team '{opponent_team}' not found in team_k_stats. Using default 0.22 k_pct.")
        k_pct_60d = 0.22
    else:
        opponent_k_row = team_k_stats[
            (team_k_stats['batting_team'] == opponent_team) & 
            (team_k_stats['p_throws'] == p_throws)
        ]
        k_pct_60d = opponent_k_row['k_pct_60d'].values[0] if not opponent_k_row.empty else 0.22

    # 4. Safely pull features with fallbacks
    input_features = {
        'avg_velocity': row['avg_velocity'] if pd.notna(row['avg_velocity']) else 93,
        'pct_LHB_faced': row['pct_LHB_faced'] if pd.notna(row['pct_LHB_faced']) else 0.35,
        'pitcher_days_since_prev_game': row['pitcher_days_since_prev_game'] if pd.notna(row['pitcher_days_since_prev_game']) else 5,
        'pitcher_days_until_next_game': row['pitcher_days_until_next_game'] if pd.notna(row['pitcher_days_until_next_game']) else 5,
        'ks_last_5_games': row['ks_last_5_games'] if pd.notna(row['ks_last_5_games']) else 5,
        'k_pct_60d': k_pct_60d,
        'is_home_game': is_home_game
    }

    # 5. Predict
    input_df = pd.DataFrame([input_features])
    prediction = xgb_final_model.predict(input_df)[0]


    # 6. Monte Carlo EV simulation (if alt lines provided)
    ladder_results = None
    under_result = None

    if alt_lines:
        # Calculate Negative Binomial parameters
        today = date.today()
        dynamic_seed = int(today.strftime("%Y%m%d"))

        rng = np.random.default_rng(seed=dynamic_seed)
        sim_ks = rng.poisson(lam=prediction, size=n_sims)

        # --- Process Over Ladder Bets ---
        results = []
        for line, odds in alt_lines:
            true_prob = np.mean(sim_ks > line)

            # Breakeven calculation
            decimal_odds = (odds / 100) + 1 if odds > 0 else (100 / abs(odds)) + 1
            breakeven_prob = 1 / decimal_odds

            # EV clean method
            ev_percentage = (true_prob - breakeven_prob) * 100

            results.append({
                'Line': line,
                'Odds': odds,
                'True_Prob': round(true_prob, 4),
                'Breakeven_%': round(breakeven_prob * 100, 2),
                'EV_%': round(ev_percentage, 2)
            })

        ladder_results = pd.DataFrame(results)

        # ADD BET/PASS column based on dynamic thresholds
        rung_thresholds = []
        recommendations = []

        for i, ev in enumerate(ladder_results['EV_%']):
            required_ev = 7 + i  # 7% + 1% per rung
            rung_thresholds.append(required_ev)
            if ev >= required_ev:
                recommendations.append('BET ✅')
            else:
                recommendations.append('PASS ❌')

        ladder_results['Required_EV_%'] = rung_thresholds
        ladder_results['Recommendation'] = recommendations

        # --- Process Under Bet on First Rung ---
        first_line, first_over_odds = alt_lines[0]

        # Manually entered Under odds (if given), otherwise assume mirror
        if manual_under_odds is not None:
            first_under_odds = manual_under_odds
        else:
            first_under_odds = -first_over_odds if first_over_odds > 0 else abs(first_over_odds)

        true_prob_under = np.mean(sim_ks <= first_line)

        decimal_under_odds = (first_under_odds / 100) + 1 if first_under_odds > 0 else (100 / abs(first_under_odds)) + 1
        breakeven_prob_under = 1 / decimal_under_odds

        ev_percentage_under = (true_prob_under - breakeven_prob_under) * 100

        # Threshold for Under EV (can tweak)
        under_required_ev = 7.0  # Example: Require +3% EV or better

        # Recommendation logic
        under_recommendation = 'BET ✅' if ev_percentage_under >= under_required_ev else 'PASS ❌'

        under_result = pd.DataFrame([{
            'Line': round(first_line, 1),
            'Odds': int(first_under_odds),
            'True_Prob': round(true_prob_under * 100, 2),
            'Breakeven_%': round(breakeven_prob_under * 100, 2),
            'EV_%': round(ev_percentage_under, 2),
            'Required_EV_%': under_required_ev,  # Show required threshold
            'Recommendation': under_recommendation  # ✅ BET or PASS
        }])

        

    return round(prediction, 2), input_features, ladder_results, under_result




In [33]:
# Example alt_lines you want to test
alt_lines = [
    (5.5, -152),
    (6.5, +138),   # Over 4.5 strikeouts at -120 odds
    (7.5, +280),  # Over 5.5 strikeouts at +190 odds
    (8.5, +560),
    #(6.5, +700)       # Over 6.5 strikeouts at +400 odds
]

# Manually specify real Under odds for first rung if you know them (optional)
manual_under_odds = +120 # example real Under odds

# Call the function
prediction, input_features, ladder_results, under_result = predict_strikeouts(
    pitcher_name="Shane Baz",   # Pitcher name (must match model_df exactly!)
    opponent_team="COL",             # Opponent team abbreviation (e.g., BOS for Red Sox)
    is_home_game=0,             # Home/Away flag (optional, currently unused)
    alt_lines=alt_lines,             # Ladder lines and odds
    n_sims=10000,                    # Number of Monte Carlo simulations
    manual_under_odds=manual_under_odds  # Optional manual Under odds
)

# Print results
print("Prediction:", prediction)
print("Ladder Results (Over bets):")
print(ladder_results)
print("Under Result:")
print(under_result)




Prediction: 6.3
Ladder Results (Over bets):
   Line  Odds  True_Prob  Breakeven_%  EV_%  Required_EV_% Recommendation
0   5.5  -152     0.5901        60.32 -1.31              7         PASS ❌
1   6.5   138     0.4330        42.02  1.28              8         PASS ❌
2   7.5   280     0.2941        26.32  3.09              9         PASS ❌
3   8.5   560     0.1849        15.15  3.34             10         PASS ❌
Under Result:
   Line  Odds  True_Prob  Breakeven_%  EV_%  Required_EV_% Recommendation
0   5.5   120      40.99        45.45 -4.46            7.0         PASS ❌


In [30]:
team_k_stats.head()

Unnamed: 0,batting_team,p_throws,total_pas,total_strikeouts,k_pct_60d
0,ATH,L,49,9,0.183673
1,ATH,R,338,63,0.186391
2,ATL,L,68,15,0.220588
3,ATL,R,217,63,0.290323
4,AZ,L,129,20,0.155039


In [None]:

# # 1. Scrape last 7 days of data
# today = datetime.today()
# start_date = today - timedelta(days=11)

# # Format dates
# start_dt = start_date.strftime('%Y-%m-%d')
# end_dt = today.strftime('%Y-%m-%d')

# print(f"📈 Scraping Statcast from {start_dt} to {end_dt}...")

# pitching_data = statcast(start_dt=start_dt, end_dt=end_dt)

# # 2. Filter only pitching events (where pitch_type is not NaN)
# pitching_data = pitching_data[~pitching_data['pitch_type'].isna()]

# # 3. Assign batting_team properly
# pitching_data['batting_team'] = pitching_data.apply(
#     lambda x: x['away_team'] if x['inning_topbot'] == 'Top' else x['home_team'],
#     axis=1
# )

# print(f"Pulled {len(pitching_data)} pitches for testing.")


Scraping Statcast from 2025-03-28 to 2025-04-08...
This is a large query, it may take a moment to complete


  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_cop

Pulled 40116 pitches for testing.


In [9]:
# 4. Filter only plate appearances (where events are not NaN)
# pa_data = pitching_data[pitching_data['events'].notna()].copy()

# # 5. Keep only 2025 season games
# pa_data['game_year'] = pd.to_datetime(pa_data['game_date']).dt.year
# pa_data = pa_data[pa_data['game_year'] == 2025]

# # 6. Build correct team_k_stats
# team_k_stats = pa_data.groupby(['batting_team', 'p_throws']).agg(
#     total_pas=('batter', 'count'),
#     total_strikeouts=('events', lambda x: (x == 'strikeout').sum())
# ).reset_index()

# # 7. Calculate K%
# team_k_stats['k_pct_60d'] = team_k_stats['total_strikeouts'] / team_k_stats['total_pas']

print(team_k_stats.head())


  batting_team p_throws  total_pas  total_strikeouts  k_pct_60d
0          ATH        L         49                 9   0.183673
1          ATH        R        338                63   0.186391
2          ATL        L         68                15   0.220588
3          ATL        R        217                63   0.290323
4           AZ        L        129                20   0.155039


In [13]:
# Step 1: Create opponent-level aggregates
opponent_df = model_df.groupby(['opponent_team', 'p_throws', 'is_home_game']).agg(
    avg_ks_vs_team=('total_strikeouts', 'mean'),
    games_played=('total_strikeouts', 'count'),
    avg_k_pct_60d=('k_pct_60d', 'mean')
).reset_index()

# Step 2: Estimate a league-average baseline
league_avg_ks = model_df['total_strikeouts'].mean()

# Step 3: Create the target adjustment variable
opponent_df['ks_adjustment'] = opponent_df['avg_ks_vs_team'] - league_avg_ks

# Now ready to train opponent adjustment model!

opponent_df.head(10)


Unnamed: 0,opponent_team,p_throws,is_home_game,avg_ks_vs_team,games_played,avg_k_pct_60d,ks_adjustment
0,ATH,L,0,3.5,2,0.183673,1.51801
1,ATH,L,1,0.4,5,0.183673,-1.58199
2,ATH,R,0,1.357143,14,0.186391,-0.624847
3,ATH,R,1,2.0,27,0.186391,0.01801
4,ATL,L,0,1.959502,321,0.220588,-0.022489
5,ATL,L,1,2.048128,374,0.220588,0.066138
6,ATL,R,0,2.130806,1055,0.290323,0.148815
7,ATL,R,1,2.007442,1075,0.290323,0.025452
8,AZ,L,0,1.7875,400,0.155039,-0.19449
9,AZ,L,1,1.821826,449,0.155039,-0.160164


In [10]:
# Merge k_pct_60d
model_df = model_df.drop(columns=['k_pct_60d'])

model_df = model_df.merge(
    team_k_stats[['batting_team', 'p_throws', 'k_pct_60d']],
    left_on=['opponent_team', 'p_throws'],
    right_on=['batting_team', 'p_throws'],
    how='left'
)

# Drop extra columns
model_df = model_df.drop(columns=['batting_team'])

# Fill missing k_pct_60d with a reasonable default
model_df['k_pct_60d'] = model_df['k_pct_60d'].fillna(0.22)


In [12]:
model_df.head(10)

Unnamed: 0,player_name,game_date,avg_velocity,pitch_type,pct_LHB_faced,p_throws,pitcher_days_since_prev_game,pitcher_days_until_next_game,home_team,away_team,player_team,total_strikeouts,is_home_game,ks_last_5_games,k_pct_last_5_games,opponent_team,recency_rank,recency_score,sample_weight,k_pct_60d
0,Yency Almonte,2021-04-01,94.6,{'FF': 1.0},0.333333,R,,2.0,COL,LAD,COL,0,1,0.0,0.380952,LAD,0,0.0,0.0,0.205
1,Diego Castillo,2021-04-01,95.833333,"{'SI': 0.6666666666666666, 'FF': 0.33333333333...",0.0,R,,1.0,MIA,TB,TB,2,0,2.0,0.148148,MIA,1,1.2e-05,1.38379e-10,0.271028
2,Alex Reyes,2021-04-01,92.3,"{'FF': 0.4, 'SL': 0.4, 'SI': 0.2}",0.4,R,,4.0,CIN,STL,STL,1,0,1.0,0.333333,CIN,2,2.4e-05,5.53516e-10,0.240157
3,Cam Bedrosian,2021-04-01,87.833333,"{'FF': 0.6666666666666666, 'SL': 0.33333333333...",0.0,R,,2.0,CIN,STL,CIN,1,1,1.0,0.173913,STL,3,3.5e-05,1.245411e-09,0.193798
4,Shane Bieber,2021-04-01,86.796296,"{'KC': 0.3333333333333333, 'FF': 0.33333333333...",0.555556,R,,6.0,DET,CLE,CLE,12,0,12.0,0.25,DET,4,4.7e-05,2.214064e-09,0.24359
5,Chad Green,2021-04-01,93.14,"{'FF': 0.6, 'SI': 0.2, 'CU': 0.2}",0.2,R,,2.0,NYY,TOR,NYY,0,1,0.0,0.130435,TOR,5,5.9e-05,3.459475e-09,0.191781
6,Adam Kolarek,2021-04-01,85.18,"{'SI': 0.6, 'SL': 0.2, 'FF': 0.2}",0.6,L,,3.0,OAK,HOU,OAK,0,1,0.0,0.16,HOU,6,7.1e-05,4.981644e-09,0.333333
7,Drew Pomeranz,2021-04-01,92.275,"{'FF': 0.75, 'KC': 0.25}",0.5,L,,4.0,SD,AZ,SD,3,1,3.0,0.277778,AZ,7,8.2e-05,6.780571e-09,0.155039
8,Jimmy Nelson,2021-04-01,88.4,"{'FF': 0.4, 'KC': 0.4, 'SL': 0.2}",0.2,R,,3.0,COL,LAD,LAD,1,0,1.0,0.277778,COL,8,9.4e-05,8.856256e-09,0.295276
9,Yusmeiro Petit,2021-04-01,85.733333,"{'FC': 0.6666666666666666, 'FF': 0.33333333333...",0.333333,R,,3.0,OAK,HOU,OAK,0,1,0.0,0.058824,HOU,9,0.000106,1.12087e-08,0.271875
