In [5]:
import pandas as pd
from datascience import *


In [None]:
import pandas as pd
import numpy as np
from datascience import Table

# ============================================================================
# FINAL FIXED KC ROYALS ANALYSIS
# ============================================================================

# Step 1: Load Data
print("Step 1: Loading datasets...")
# Using try-except or standard load
df_br = pd.read_excel('BR Baseball Data 2021-2025.xlsb', sheet_name=5, engine='pyxlsb')
df_savant = pd.read_csv('Savant Batter 2021-2025.csv')

# Step 2: Filter for KC Royals
print("Step 2: Filtering for KC Royals...")
kc_royals_df = df_br[df_br['Team'] == 'KCR'].copy()

# Step 3: Create a Clean Join Key
# This handles "Wacha, Michael" -> "michael wacha" and "Michael Wacha" -> "michael wacha"
print("Step 3: Standardizing names...")

def create_join_key(name):
    if pd.isna(name): 
        return ""
    name = str(name).replace('"', '').lower().strip()
    if ',' in name:
        parts = [p.strip() for p in name.split(',')]
        return f"{parts[1]} {parts[0]}"
    return name

kc_royals_df['Join_Key'] = kc_royals_df['Player'].apply(create_join_key)
df_savant['Join_Key'] = df_savant['last_name, first_name'].apply(create_join_key)

# Step 4: DEDUPLICATE SAVANT DATA
# If Wacha appears 5 times in Savant, this averages his stats into 1 row
print("Step 4: Aggregating Savant stats to prevent duplicate rows...")
df_savant_unique = df_savant.groupby('Join_Key').mean(numeric_only=True).reset_index()

# Step 5: Join
print("Step 5: Joining tables...")
combined_df = kc_royals_df.merge(
    df_savant_unique, 
    on='Join_Key', 
    how='left', 
    suffixes=('', '_savant_duplicate')
)

# Step 6: Final Cleanup
# Drop the helper key and any redundant columns created by the join
print("Step 6: Cleaning up columns and removing duplicate records...")
combined_df = combined_df.drop(columns=['Join_Key'])
# Remove any columns that ended up with the duplicate suffix
cols_to_keep = [c for c in combined_df.columns if not c.endswith('_savant_duplicate')]
combined_df = combined_df[cols_to_keep]

# Remove any truly duplicate rows
combined_df = combined_df.drop_duplicates()

# Step 7: Convert to datascience Table
print("Step 7: Converting to final Table object...")
kc_royals_table = Table.from_df(combined_df)

# Display Results
print("\n" + "="*80)
print(f"ANALYSIS COMPLETE: {kc_royals_table.num_rows} total rows.")
print("="*80)
kc_royals_table.show(10)

# Export the final Table to a CSV file
kc_royals_table.to_csv('kc_royals_batters_analysis.csv')
print("Successfully exported to 'kc_royals_batters_analysis.csv'")

Step 1: Loading datasets...
Step 2: Filtering for KC Royals...
Step 3: Standardizing names...
Step 4: Aggregating Savant stats to prevent duplicate rows...
Step 5: Joining tables...
Step 6: Cleaning up columns and removing duplicate records...
Step 7: Converting to final Table object...

ANALYSIS COMPLETE: 32 total rows.


Player,Age,Team,Lg,WAR,G,PA,AB,R,H,2B,3B,HR,RBI,SB,CS,BB,SO,BA,OBP,SLG,OPS,OPS+,rOBA,Rbat+,TB,GIDP,HBP,SH,SF,IBB,Pos,Awards,Player-additional,Player-additional.1,player_id,year,player_age,ab,pa,hit,single,double,triple,home_run,strikeout,walk,k_percent,bb_percent,batting_avg,slg_percent,on_base_percent,on_base_plus_slg,isolated_power,babip,b_rbi,b_lob,b_total_bases,r_total_caught_stealing,r_total_stolen_base,xba,xslg,woba,xwoba,xobp,xiso,avg_swing_speed,fast_swing_rate,blasts_contact,blasts_swing,squared_up_contact,squared_up_swing,avg_swing_length,swords,attack_angle,attack_direction,ideal_angle_rate,vertical_swing_path,exit_velocity_avg,launch_angle_avg,sweet_spot_percent,barrel_batted_rate
Bobby Witt Jr.,25,KCR,AL,7.1,157,687,623,99,184,47,6,23,88,38,9,49,125,0.295,0.351,0.501,0.852,136,0.37,136,312,7,8,0,7,8,*6/D,ASMVP-4GGSS,wittbo02,wittbo02,677951.0,2023.5,23.5,622.75,680.5,180.5,108.0,37.75,8.5,26.25,121.75,44.0,18.0,6.4,0.28925,0.503,0.33825,0.84125,0.21375,0.3195,93.25,221.0,314.0,10.75,37.0,0.28375,0.51725,0.356,0.3645,0.3365,0.2335,74.7,49.1,19.7667,15.7,34.1667,27.1333,7.03333,15.6667,5.63333,-0.966667,48.4333,29.7333,91.6,15.7,34.5,11.75
Vinnie Pasquantino*,27,KCR,AL,2.4,160,682,621,72,164,33,1,32,113,1,0,49,107,0.264,0.323,0.475,0.798,120,0.343,121,295,14,7,0,5,5,*3D,,pasquvi01,pasquvi01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Maikel Garcia,25,KCR,AL,5.8,160,666,595,81,170,39,5,16,74,23,9,62,84,0.286,0.351,0.449,0.8,123,0.348,123,267,14,1,3,5,0,*564/8DH9,ASMVP-14GG,garcima01,garcima01,672580.0,2023.5,23.5,414.0,457.5,109.0,77.0,21.75,3.5,6.75,76.75,35.75,18.275,6.925,0.27675,0.37575,0.32575,0.7015,0.099,0.333,46.0,156.25,158.0,4.5,20.75,0.257,0.36525,0.30775,0.2985,0.309,0.10825,70.8333,11.0,18.7333,16.0333,42.4667,36.3667,7.2,9.66667,5.46667,4.46667,49.5,28.0,89.85,6.375,33.725,3.3
Salvador Perez,35,KCR,AL,0.4,155,641,597,54,141,35,0,30,100,0,0,28,125,0.236,0.284,0.446,0.729,100,0.311,97,266,14,12,0,1,5,2D3,,perezsa02,perezsa02,521692.0,2023.0,33.0,558.0,602.2,144.0,87.4,26.2,0.2,30.2,133.6,27.4,22.24,4.48,0.2578,0.4666,0.3028,0.7694,0.2088,0.286,96.2,230.8,261.2,0.0,0.2,0.262,0.5144,0.3262,0.3474,0.3082,0.2526,72.9333,31.0,17.0667,12.7,32.3,24.0,7.9,14.3333,13.0333,-5.63333,55.2333,34.5333,91.12,17.28,36.4,12.66
Jonathan India,28,KCR,AL,0.4,136,567,497,63,116,29,0,9,45,0,4,54,106,0.233,0.323,0.346,0.669,89,0.299,91,172,9,13,1,2,0,4D75/H,,indiajo01,indiajo01,663697.0,2023.0,26.0,480.4,559.0,119.6,78.0,26.0,1.2,14.4,115.0,57.6,20.6,10.08,0.2486,0.3964,0.3442,0.7406,0.1478,0.2966,54.8,156.4,191.2,3.0,8.4,0.2476,0.4058,0.3276,0.333,0.3454,0.158,71.6667,23.5667,12.7667,10.3333,32.1333,26.0,7.26667,20.0,9.16667,-3.6,53.8333,29.1667,87.46,14.68,35.82,7.12
Mike Yastrzemski*,34,KCR,AL,1.0,50,186,156,30,37,14,0,9,18,1,0,25,22,0.237,0.339,0.5,0.839,131,0.354,128,78,3,1,0,4,0,98/7HD,,yastrmi01,yastrmi01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Adam Frazier*,33,KCR,AL,1.1,56,197,184,21,52,10,0,4,23,1,1,9,36,0.283,0.32,0.402,0.722,101,0.314,101,74,0,2,0,2,0,47/DH59,,fraziad01,fraziad01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Kyle Isbel*,28,KCR,AL,1.6,135,409,368,42,94,16,4,4,33,4,5,23,74,0.255,0.301,0.353,0.654,83,0.284,80,130,7,2,13,3,0,*8/H,,isbelky01,isbelky01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Freddy Fermin,30,KCR,AL,1.1,67,208,192,17,49,7,0,3,12,1,2,13,37,0.255,0.309,0.339,0.648,82,0.284,80,65,2,2,1,0,1,2/HD4,,fermifr01,fermifr01,666023.0,2024.0,29.0,291.667,316.667,77.6667,58.3333,12.3333,0.333333,6.66667,60.3333,18.3333,19.3,5.76667,0.267667,0.388667,0.312333,0.701,0.121,0.314333,31.3333,123.667,110.667,0.666667,1.0,0.248,0.376,0.304333,0.294333,0.295,0.128,69.7333,7.26667,11.9333,9.7,34.3333,27.8667,7.36667,16.6667,10.1333,-1.43333,51.0667,36.0,89.0667,12.3333,31.0,5.76667
Randal Grichuk,33,KCR,AL,-0.4,43,105,97,10,20,3,0,2,5,0,0,7,22,0.206,0.267,0.299,0.566,59,0.251,53,29,1,1,0,0,0,9H7/D8,,grichra01,grichra01,545341.0,2023.0,31.0,395.4,425.2,101.2,60.8,23.0,1.8,15.6,88.8,23.4,20.44,5.74,0.2572,0.4472,0.3044,0.7516,0.19,0.2912,54.2,175.8,174.6,1.2,1.2,0.2506,0.4374,0.322,0.3204,0.3006,0.1864,74.0333,42.5333,19.6333,15.5,34.0,26.9,7.66667,9.33333,9.23333,-6.16667,62.1667,28.2,90.48,14.74,32.12,9.02


Successfully exported to 'kc_royals_combined_analysis.csv'
