In [1]:
import pandas as pd
from datascience import *



In [28]:
import pandas as pd
import numpy as np
from datascience import Table

# ============================================================================
# FINAL FIXED KC ROYALS ANALYSIS
# ============================================================================

# Step 1: Load Data
print("Step 1: Loading datasets...")
# Using try-except or standard load
df_br = pd.read_excel('BR Baseball Data 2021-2025.xlsb', engine='pyxlsb')
df_savant = pd.read_csv('Savant Pitcher 2021-2025.csv')

# Step 2: Filter for KC Royals
print("Step 2: Filtering for KC Royals...")
kc_royals_df = df_br[df_br['Team'] == 'KCR'].copy()

# Step 3: Create a Clean Join Key
# This handles "Wacha, Michael" -> "michael wacha" and "Michael Wacha" -> "michael wacha"
print("Step 3: Standardizing names...")

def create_join_key(name):
    if pd.isna(name): 
        return ""
    name = str(name).replace('"', '').lower().strip()
    if ',' in name:
        parts = [p.strip() for p in name.split(',')]
        return f"{parts[1]} {parts[0]}"
    return name

kc_royals_df['Join_Key'] = kc_royals_df['Player'].apply(create_join_key)
df_savant['Join_Key'] = df_savant['last_name, first_name'].apply(create_join_key)

# Step 4: DEDUPLICATE SAVANT DATA
# If Wacha appears 5 times in Savant, this averages his stats into 1 row
print("Step 4: Aggregating Savant stats to prevent duplicate rows...")
df_savant_unique = df_savant.groupby('Join_Key').mean(numeric_only=True).reset_index()

# Step 5: Join
print("Step 5: Joining tables...")
combined_df = kc_royals_df.merge(
    df_savant_unique, 
    on='Join_Key', 
    how='left', 
    suffixes=('', '_savant_duplicate')
)

# Step 6: Final Cleanup
# Drop the helper key and any redundant columns created by the join
print("Step 6: Cleaning up columns and removing duplicate records...")
combined_df = combined_df.drop(columns=['Join_Key'])
# Remove any columns that ended up with the duplicate suffix
cols_to_keep = [c for c in combined_df.columns if not c.endswith('_savant_duplicate')]
combined_df = combined_df[cols_to_keep]

# Remove any truly duplicate rows
combined_df = combined_df.drop_duplicates()

# Step 7: Convert to datascience Table
print("Step 7: Converting to final Table object...")
kc_royals_table = Table.from_df(combined_df)

# Display Results
print("\n" + "="*80)
print(f"ANALYSIS COMPLETE: {kc_royals_table.num_rows} total rows.")
print("="*80)
kc_royals_table.show(10)

# Export the final Table to a CSV file
kc_royals_table.to_csv('kc_royals_combined_analysis.csv')
print("Successfully exported to 'kc_royals_combined_analysis.csv'")

Step 1: Loading datasets...
Step 2: Filtering for KC Royals...
Step 3: Standardizing names...
Step 4: Aggregating Savant stats to prevent duplicate rows...
Step 5: Joining tables...
Step 6: Cleaning up columns and removing duplicate records...
Step 7: Converting to final Table object...

ANALYSIS COMPLETE: 27 total rows.


Player,Age,Team,Lg,WAR,W,L,W-L%,ERA,G,GS,GF,CG,SHO,SV,IP,H,R,ER,HR,BB,IBB,SO,HBP,BK,WP,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/BB,Awards,Player-additional,player_id,year,player_age,p_game,p_formatted_ip,pa,ab,hit,single,double,triple,home_run,strikeout,walk,k_percent,bb_percent,batting_avg,slg_percent,on_base_percent,on_base_plus_slg,isolated_power,babip,p_earned_run,p_run,p_save,p_blown_save,p_out,p_win,p_loss,p_wild_pitch,p_balk,p_shutout,p_era,p_opp_batting_avg,p_opp_on_base_avg,p_total_stolen_base,p_quality_start,p_run_support,xba,xslg,woba,xwoba,xobp,xiso,avg_swing_speed,fast_swing_rate,blasts_contact,blasts_swing,squared_up_contact,squared_up_swing,avg_swing_length,swords,attack_angle,attack_direction,ideal_angle_rate,vertical_swing_path,exit_velocity_avg,launch_angle_avg,sweet_spot_percent,barrel_batted_rate,hard_hit_percent,avg_best_speed,avg_hyper_speed,whiff_percent,swing_percent
Michael Wacha,33,KCR,AL,2.8,10,13,0.435,3.86,31,31,0,0,0,0,172.2,166,74,74,15,45,0,126,4,0,2,716,106,3.66,1.222,8.7,0.8,2.3,6.6,2.8,,wachami01,608379.0,2023.0,31.0,27.2,144.76,599.4,550.4,135.2,87.0,27.4,3.2,17.6,124.0,39.0,20.86,6.52,0.245,0.4054,0.298,0.7034,0.1604,0.2844,60.2,62.0,0.0,0.0,435.4,10.2,6.4,3.4,0.2,0.2,3.76,,,3.8,10.2,81.0,0.2564,0.4448,0.305,0.327,0.3098,0.1886,71.3,21.7667,13.3333,10.4,32.4,25.2333,7.4,15.3333,12.4667,-5.36667,47.4333,32.7,88.12,15.6,36.64,8.6,36.2,76.8844,93.7894,22.58,50.18
Seth Lugo,35,KCR,AL,1.7,8,7,0.533,4.15,26,26,0,0,0,0,145.1,133,70,67,27,55,1,125,6,0,5,611,99,5.09,1.294,8.2,1.7,3.4,7.7,2.27,,lugose01,607625.0,2023.0,33.0,38.6,121.7,503.6,460.8,109.8,71.2,21.0,2.2,15.4,114.0,35.2,23.8,7.4,0.2384,0.396,0.2998,0.6958,0.1576,0.2868,47.6,50.2,0.8,1.4,365.8,7.8,5.6,2.8,0.0,0.0,3.564,,,3.4,10.2,62.6,0.2454,0.4222,0.302,0.3164,0.3074,0.1766,71.9333,24.7,17.2,13.7333,36.8333,29.3667,7.3,16.3333,10.8667,-2.03333,53.0,32.8667,89.52,11.08,33.98,9.04,39.94,79.01,94.272,23.88,45.32
Michael Lorenzen,33,KCR,AL,0.4,7,11,0.389,4.64,27,26,0,0,0,0,141.2,149,76,73,25,39,0,127,6,0,12,606,89,4.59,1.327,9.5,1.6,2.5,8.1,3.26,,lorenmi01,547179.0,2023.0,31.0,25.4,110.1,460.2,412.4,99.0,63.0,19.8,1.2,15.0,88.2,40.6,18.88,9.36,0.2384,0.3898,0.3154,0.7052,0.1514,0.2696,51.2,53.4,1.0,0.0,331.0,6.4,6.8,5.4,0.2,0.2,4.392,,,4.0,7.4,55.8,0.246,0.4092,0.31,0.3212,0.3226,0.1636,71.5667,22.6333,13.3333,10.4667,34.4,27.0,7.36667,12.0,11.1667,-2.8,50.2,32.5,88.88,12.72,32.82,7.0,39.72,78.0648,94.0456,23.98,47.56
Noah Cameron*,25,KCR,AL,3.8,9,7,0.563,2.99,24,24,0,0,0,0,138.1,109,46,46,18,43,1,114,3,0,3,556,137,4.18,1.099,7.1,1.2,2.8,7.4,2.65,ROY-4,camerno01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Bailey Falter*,28,KCR,AL,-0.7,0,2,0.0,11.25,4,2,1,0,0,0,12.0,20,15,15,2,7,0,11,0,0,1,61,38,5.22,2.25,15.0,1.5,5.3,8.3,1.57,,falteba01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Kris Bubic*,27,KCR,AL,3.1,8,7,0.533,2.55,20,20,0,0,0,0,116.1,98,38,33,6,39,1,116,3,0,1,476,161,2.89,1.178,7.6,0.5,3.0,9.0,2.97,AS,bubickr01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Stephen Kolek,28,KCR,AL,1.1,1,2,0.333,1.91,5,5,0,0,0,0,33.0,20,9,7,1,5,0,21,0,0,0,125,217,2.71,0.758,5.5,0.3,1.4,5.7,4.2,,kolekst01,663568.0,2024.5,27.5,30.5,79.2,336.0,306.0,78.5,60.0,11.0,1.0,6.5,58.0,21.5,17.6,6.2,0.269,0.3755,0.324,0.6995,0.1065,0.313,35.5,39.5,0.0,0.5,239.0,4.0,3.5,1.0,0.0,0.5,4.36,,,2.5,4.5,48.0,0.2705,0.4115,0.308,0.327,0.327,0.141,71.25,21.4,15.55,12.85,34.7,28.6,7.0,7.0,5.5,6.35,45.3,32.75,89.6,5.5,31.85,4.95,44.5,78.1987,94.8221,19.4,48.05
Ryan Bergert,25,KCR,AL,0.2,1,2,0.333,4.43,8,8,0,0,0,0,40.2,37,21,20,4,17,1,39,0,0,1,174,93,3.75,1.328,8.2,0.9,3.8,8.6,2.29,,bergery01,686701.0,2025.0,25.0,19.0,76.1,323.0,284.0,61.0,39.0,13.0,1.0,8.0,73.0,35.0,22.6,10.8,0.215,0.352,0.302,0.654,0.137,0.26,31.0,32.0,0.0,0.0,229.0,2.0,2.0,2.0,0.0,0.0,3.66,,,2.0,1.0,33.0,0.238,0.41,0.29,0.322,0.325,0.172,71.7,19.0,11.1,8.6,32.8,25.4,7.3,7.0,10.9,-1.5,55.8,31.8,90.0,20.0,40.7,7.0,36.4,80.9985,93.7903,23.9,46.9
Daniel Lynch IV*,28,KCR,AL,1.3,6,2,0.75,3.06,57,2,8,0,0,1,67.2,66,24,23,8,26,1,45,6,0,1,289,135,4.76,1.36,8.8,1.1,3.5,6.0,1.73,,lynchda02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Carlos Est√©vez,32,KCR,AL,2.1,4,5,0.444,2.45,67,0,57,0,0,42,66.0,48,22,18,5,22,6,54,4,1,2,269,168,3.67,1.061,6.5,0.7,3.0,7.4,2.45,AS,estevca01,608032.0,2023.0,30.0,62.0,60.26,253.4,226.8,52.6,35.4,9.6,1.2,6.4,59.2,21.8,23.34,8.5,0.2298,0.3674,0.301,0.6684,0.1376,0.2822,22.4,26.6,22.4,5.0,181.2,4.0,4.8,3.4,0.2,0.0,3.33,,,1.4,0.0,13.8,0.2332,0.4046,0.289,0.306,0.3054,0.1714,71.6,22.8,11.6333,9.06667,29.2667,22.7,7.1,4.66667,7.63333,2.96667,50.4333,30.5,89.98,18.76,35.84,8.48,40.64,79.889,94.4134,23.78,50.72


Successfully exported to 'kc_royals_combined_analysis.csv'
