In [6]:
import os
import pandas as pd
from fpdf import FPDF

# map each team code to its game‐folder
team_folder_paths = {
    'CHI_DOG': '/Users/ajfoeckler/Downloads/chicago_games',
    'MIL_MIL1': '/Users/ajfoeckler/Downloads/milwaukee_games',
    'KAN_COU': '/Users/ajfoeckler/Downloads/kane_hounds_games',
    'SIO_CIT1': '/Users/ajfoeckler/Downloads/sioux_city_games',
    'FAR_RED': '/Users/ajfoeckler/Downloads/fargo_games',
    'SIO_FAL':'/Users/ajfoeckler/Downloads/SF_games',
    'KAN_CIT3':'/Users/ajfoeckler/Downloads/KC_games',
    'LIN_SAL':'/Users/ajfoeckler/Downloads/lincoln_games',
    'WIN_GOL':'/Users/ajfoeckler/Downloads/winnipeg_games',
    'LAK_COU10':'/Users/ajfoeckler/Downloads/dockhounds_games',
    'GAR_SOU':'/Users/ajfoeckler/Downloads/gary_games',
    'SIO_CIT1':'/Users/ajfoeckler/Downloads/sioux_city_games',
    'CLE_RAI':'/Users/ajfoeckler/Downloads/cleburne_dockhounds_games',
    
    # add other teams here as needed…
}

# Define the folder paths
selected_team = 'CHI_DOG'
folder_path = team_folder_paths[selected_team]
save_folder_path = '/Users/ajfoeckler/Downloads/chicago_hitting'

# Create the save directory if it doesn't exist
os.makedirs(save_folder_path, exist_ok=True)

team_pitchers = pd.read_csv('team_pitchers_tagged.csv')

# === Manual batter override setup ===
manual_batter_original_team = {
    'Hernandez, Ryan': 'LAK_COU10'
}
manual_batter_overrides = {
    'Gray, Joe': 'MIL_MIL1'
}


teams_to_load = {selected_team} | set(manual_batter_original_team.values())
folders_to_load = [team_folder_paths[team] for team in teams_to_load]

# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

# === CONFIGURATION ===
pitcher_side = 'Right'    # or 'Left'
pitcher_abbrev = 'RHP' if pitcher_side == 'Right' else 'LHP'

# Load all CSV files and concatenate them into a single DataFrame
data_frames = []
for folder in folders_to_load:
    for fn in os.listdir(folder):
        if not fn.endswith('.csv'): 
            continue
        df = pd.read_csv(os.path.join(folder, fn))
        df = df[df['PitcherThrows'] == pitcher_side]
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])
        df = df[df['Date'].dt.year == 2025]  # ✅ filter for 2025 only
        data_frames.append(df)

original_data = pd.concat(data_frames, ignore_index=True)

# Ensure 'Date' column is in datetime format
original_data = pd.concat(data_frames, ignore_index=True)
original_data['Date'] = pd.to_datetime(original_data['Date'], errors='coerce')
original_data = original_data.dropna(subset=['Date'])
original_data = original_data[original_data['Date'].dt.year == 2025]

# Normalize batter names
original_data['Batter'] = original_data['Batter'].replace({
    'Pruitt Jr, Reggie': 'Pruitt Jr, Reggie',
    'Law, Trey': 'Law, Trey',
    'DeVine, Drew': 'Devine, Drew',
}).str.strip()

# Create a dictionary mapping each batter to their team
batter_team_mapping = (
    original_data
    [['Batter','BatterTeam']]
    .drop_duplicates()
    .set_index('Batter')['BatterTeam']
    .to_dict()
)

# Prompt user to choose team for final load
unique_teams = original_data['BatterTeam'].unique()
print("Available teams:", unique_teams)
selected_team = input("Enter team code to generate report for: ").strip()

# === now load both the selected_team and any manual-original_team folders ===
manual_batter_original_team = {
    'Hernandez, Ryan': 'LAK_COU10'
}
teams_to_load = {selected_team} | set(manual_batter_original_team.values())

data_frames = []
for folder in folders_to_load:
    for fn in os.listdir(folder):
        if not fn.endswith('.csv'): 
            continue
        df = pd.read_csv(os.path.join(folder, fn))
        df = df[df['PitcherThrows'] == pitcher_side]
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])
        df = df[df['Date'].dt.year == 2025]  # ✅ filter for 2025 only
        data_frames.append(df)

original_data = pd.concat(data_frames, ignore_index=True)


# Combine full data now
full_data = pd.concat(data_frames, ignore_index=True)
full_data['Date'] = pd.to_datetime(full_data['Date'], errors='coerce')
full_data = full_data.dropna(subset=['Date'])
full_data['Batter'] = full_data['Batter'].str.strip()

# === Rebuild mapping from full_data and apply override ===
batter_team_mapping = (
    full_data[['Batter','BatterTeam']]
    .drop_duplicates()
    .set_index('Batter')['BatterTeam']
    .to_dict()
)
manual_batter_overrides = {
    'Gray, Joe': 'MIL_MIL1',
    
}
batter_team_mapping.update(manual_batter_overrides)
# ——————————————————————————————————————————————


# figure out which folders to pull
teams_to_load = { selected_team } | set(manual_batter_original_team.values())
folders_to_load = [ team_folder_paths[t] for t in teams_to_load ]

# now read _all_ of those folders’ CSVs
data_frames = []
for folder in folders_to_load:
    for fn in os.listdir(folder):
        if not fn.endswith('.csv'): 
            continue
        df = pd.read_csv(os.path.join(folder, fn))
        df = df[df['PitcherThrows'] == pitcher_side]
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])
        df = df[df['Date'].dt.year == 2025]  # ✅ filter for 2025 only
        data_frames.append(df)

original_data = pd.concat(data_frames, ignore_index=True)

# Ensure 'Date' column is in datetime format
original_data['Date'] = pd.to_datetime(original_data['Date'], errors='coerce')
original_data = original_data.dropna(subset=['Date'])

# Define the strike zone dimensions
strike_zone_top = 3.673333
strike_zone_bottom = 1.524166667
strike_zone_left = -0.83083333
strike_zone_right = 0.83083333

# Function to process the data and calculate required metrics
def process_data(data):
    data['AutoPitchType'] = data['AutoPitchType'].replace({
        'ChangeUp': 'Changeup', 'Splitter': 'Changeup'
    })

    data = data[data['TaggedHitType'] != 'undefined']
    data['Swing'] = data['PitchCall'].apply(lambda x: 1 if x in ['StrikeSwinging', 'FoulBallNotFieldable', 'InPlay'] else 0)
    data['Whiff'] = data['PitchCall'].apply(lambda x: 1 if x == 'StrikeSwinging' else 0)
    
    data['FlyBall'] = data['TaggedHitType'].apply(lambda x: 1 if x == 'FlyBall' else 0)
    data['GroundBall'] = data['TaggedHitType'].apply(lambda x: 1 if x == 'GroundBall' else 0)
    data['LineDrive'] = data['TaggedHitType'].apply(lambda x: 1 if x == 'LineDrive' else 0)
    data['Popup'] = data['TaggedHitType'].apply(lambda x: 1 if x == 'Popup' else 0)
    
    data['Barrel'] = ((data['ExitSpeed'] >= 95) & 
                      (data['Angle'] >= 5) & 
                      (data['Angle'] <= 37)).astype(int)
    
    data['InStrikeZone'] = ((data['PlateLocHeight'] <= strike_zone_top) &
                            (data['PlateLocHeight'] >= strike_zone_bottom) &
                            (data['PlateLocSide'] >= strike_zone_left) &
                            (data['PlateLocSide'] <= strike_zone_right)).astype(int)
    
    data['Chase'] = (data['Swing'] & (data['InStrikeZone'] == 0)).astype(int)
    data['BattedBallInPlay'] = data[['FlyBall', 'GroundBall', 'Popup', 'LineDrive']].sum(axis=1)
    
    grouping_columns = ['Batter', 'AutoPitchType']
    
    metrics = data.groupby(grouping_columns).agg({
        'FlyBall': 'sum',
        'GroundBall': 'sum',
        'Popup': 'sum',
        'LineDrive': 'sum',
        'Barrel': 'sum',
        'Swing': 'sum',
        'Whiff': 'sum',
        'Chase': 'sum',
        'InStrikeZone': 'sum',
        'BattedBallInPlay': 'sum',
        'PitchNo': 'count'
    }).reset_index()
    
    z_swing = data[data['InStrikeZone'] == 1].groupby(grouping_columns)['Swing'].sum().reset_index(name='Z_Swing')
    z_total = data[data['InStrikeZone'] == 1].groupby(grouping_columns)['PitchNo'].count().reset_index(name='Z_Total')
    o_swing = data[data['InStrikeZone'] == 0].groupby(grouping_columns)['Swing'].sum().reset_index(name='O_Swing')
    o_total = data[data['InStrikeZone'] == 0].groupby(grouping_columns)['PitchNo'].count().reset_index(name='O_Total')
    
    metrics = metrics.merge(z_swing, on=grouping_columns, how='left')
    metrics = metrics.merge(z_total, on=grouping_columns, how='left')
    metrics = metrics.merge(o_swing, on=grouping_columns, how='left')
    metrics = metrics.merge(o_total, on=grouping_columns, how='left')
    
    metrics['fly_ball_pct'] = (metrics['FlyBall'] / metrics['BattedBallInPlay'] * 100).round(2)
    metrics['ground_ball_pct'] = (metrics['GroundBall'] / metrics['BattedBallInPlay'] * 100).round(2)
    metrics['line_drive_pct'] = (metrics['LineDrive'] / metrics['BattedBallInPlay'] * 100).round(2)
    metrics['barrel_pct'] = (metrics['Barrel'] / metrics['BattedBallInPlay'] * 100).round(2)
    metrics['swing_pct'] = (metrics['Swing'] / metrics['PitchNo'] * 100).round(2)
    metrics['whiff_pct'] = (metrics['Whiff'] / metrics['Swing'] * 100).round(2)
    metrics['z_swing_pct'] = (metrics['Z_Swing'] / metrics['Z_Total'] * 100).round(2)
    metrics['o_swing_pct'] = (metrics['O_Swing'] / metrics['O_Total'] * 100).round(2)
    
    metrics['total_batted_balls'] = metrics['FlyBall'] + metrics['GroundBall'] + metrics['LineDrive'] + metrics['Popup']
    
    percentage_columns = [
        'Batter', 'AutoPitchType',
        'PitchNo',
        'total_batted_balls',
        'fly_ball_pct', 'ground_ball_pct', 'line_drive_pct',
        'barrel_pct', 'swing_pct', 'whiff_pct',
        'z_swing_pct', 'o_swing_pct'
    ]
    metrics = metrics[percentage_columns]
    
    return metrics

class PDF(FPDF):
    def __init__(self, team, vs_hand, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.team = team
        self.vs_hand = vs_hand

    def header(self):
        self.set_font('Arial', 'B', 12)
        # now shows, e.g., "Team: KAN_COU vs. RHP"
        self.cell(0, 10, f'Team: {self.team} vs. {self.vs_hand}', 0, 1, 'C')
    
    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(10)

    def table(self, dataframe, title):
        self.set_font('Arial', 'B', 8)
        self.cell(0, 10, title, 0, 1, 'C')
        self.ln(2)
        
        page_width = self.w - 2 * self.l_margin
        col_widths = [page_width / len(dataframe.columns)] * len(dataframe.columns)
        row_height = self.font_size * 1.2
        
        for col_name, col_width in zip(dataframe.columns, col_widths):
            self.cell(col_width, row_height, str(col_name), border=1)
        self.ln(row_height)
        
        self.set_font('Arial', '', 8)
        for row in dataframe.itertuples(index=False):
            for item, col_width in zip(row, col_widths):
                self.multi_cell(col_width, row_height, str(item), border=1, ln=3, max_line_height=self.font_size)
            self.ln(row_height)

def generate_pdf_report(dataframe, team, output_path, batter_team_mapping, vs_hand):
    pdf = PDF(team, vs_hand)
    batters_in_team = [batter for batter, batter_team in batter_team_mapping.items() if batter_team == team]
    team_data = dataframe[dataframe['Batter'].isin(batters_in_team)]

    excluded_batters = ["Cespedes, Yoelqui", "Bell, Brantley", "Soto, Jonathan", "Goosenberg, Shawn", "Martin, Robby", "Scolan, Matthew", "Moris, Max", "Adolfo, Micker", "Amaya, Carlos", "Chalus Jr., Eric", "Diaz, Juan", "Marte, Jefry", "Mezquita, Jhordany", "Ostberg, Erik", "Santiago, Glenn", "Penzetta, Nick", "Bonifacio, Jorge", "Jackson, Darryl", "Parker, Cade", "Voss, Joe", "Allgeyer, Jacob", "Machado, Carlos", "Noriega, Andres", "Broadway, Taylor", "Henley, Blair", "Howell, Korry", "McDowell, Theo", "Scott, Kristian", "Schmack, Kyle", "Contreras, Jose", "Williams, Donivan", "Cribbs, Galli", "Dunhurst, Hayden", "Phillips, Brett", "Rijo, Nilo", "Upshaw, Caleb", "Valdez, C.J.", "Jones, Thomas", "McDaniel, Chase", "Sermo, Jose", "Novak, Nick", "Pettigrew, Zion", "Ward, Je'Von", "Sparks, Lamar", "Giambrone, Trent", "Denning, Connor", "Aldrete, Carter", "Alexander, Hill", "Berglund, Michael", "Bigford, Trey", "Boswell, Bret", "Brocato, Anthony", "Brothers, Blaze", "Cantleberry, Jake", "Dillard, Thomas", "Duzenack, Camden", "Groshans, Jaxx", "Holland, Korey", "Mechals, Kade", "O'Grady, Brian", "Palmeiro, Preston", "Velez, Antonio", "Washington, Mark", "Davis, Jaylin", "Givin, Matt", "Huckstorf, Kyle", "Layer, Abdiel", "DeVine, Drew", "Dragum, Jack", "Henson, Spencer", "Stroh, Parker", "Merkel, Tyler", "Alexander, Evan", "Davis, Colten", "Dykhoff, Jake", "Green, Thomas",  "vooletich, zac", "Barfield, Jacob", "Busch, Will", "Corona, Emilio", "Gomez, Dario", "Green, Jake", "Jackson, Jaren", "Lingua, Daniel", "Martinez, Osvaldo", "Meyer, Jake", "Montano, Daniel", "Nogowski, John", "Perez, Daniel",  "Womack, Alsander", "vooletich, zach" "Gonzalez, Marcos", "Arroyo, Edwin", "Croes, Dayson", "Emery, Robert", "Cajigas, Julio", "McCurdy, Carson", "Sierra, Miguelangel", "Enriquez, Roby", "McMurray, Jake", "Schwabe, Cadyn", "Simington, Miles", "Reeves, Dalton", "Maiben, Derek", "Dorighi, Brennen", "Childs, Dwight", "Drury, Austin", "Perez, Henderson", "Ramon, Amos", "Wetherbee, Jared", "Zinn, Delvin", "Bockelie, Jacob", "Decker, Will", "Fields, Brandon", "Byrd, Justin", "Farmer, Justin", "Gulino, Michael", "Hurd, Aaron", "Perez, Yanio", "Perez, Mikey", "Smith, Chad", "Calarco, Anthony", "Campagna, Joe", "Grant-Parks, Blake", "Maberry, David", "Mcarthur, General", "Parks, Pavin", "Aguilar, Bryan", "Amaral, Danny", "Armaral, Danny", "Milam, Kevin", "Ortiz, Channey", "Hjelle, Jake", "Phillips, Dakota", "Siket, Jordan", "Christopher, Shamoy", "Eickhoff, Logan", "Foster, Kendall", "Meiners, Tate", "Seay, Mason", "Brusa, Gio", "Hall, Adam", "Holgate, Ryan", "Turner, Braxton", "Escala, Willie", "Rodriguez, Brett", "Smith, Armani", "Cushing, Jared", "Del Valle, Francisco", "Panzetta, Nick", "Barranca, Antonio", "Pike, Chad", "Marrero, Wendell", "Costes, Marty", "Crook, Narciso", "Harris, Chase", "Abbatine, Anthony", "Diaz, Gio", "Levari, Kenneth", "Quintana, Guillermo", "Rincon, Carlos", "Valera, Jackson", "Law, Trey", "Taylor, John", "Cannon, Cameron", "Davis, Jonah", "Randolph, Cornelius", "Reid, Simon", "Smith, Harrison", "Reyes, Bryan", "Doersching, Griffin", "Morales, Roy", "Alcantara, Ismael", "Dexter, Sam", "Valdez, CJ", "Livorsi, Ben", "Meza, Eric", "Quiggle, Kona", "Ward, Drew", "Mount, Drew", "Sarringar, Spencer", "Spence, Liam", "Ulrich, Wyatt",  "Cedrola, Lorenzo", "Gomez, Moises", "O'Conner, Justin", "Avelino, Abiatal", "Cannon, Cam", "Maxwell, Carson", "Waite, Jonathan", "Galan, Yosy", "Amaral, Daniel", "Bradley, Tucker", "Fry, Jared", "Anderson, Nick", "Awtry, Marshall", "Awtry , Marshall", "Baeza, Alex", "Conners, Dakota", "Epp, Alex", "Hewitt, Max", "Mattis, Gary", "Ortega, Jake", "Pita, Matt", "Roskam, Luke", "Takacs, Aaron", "Williams, Logan", "Zurbrugg, Zane"]
    team_data = pd.concat([team_data, dataframe[dataframe['Batter'].isin(["Clark, Tripp", "Williams, Jaylyn"])]])
    team_data = team_data[~team_data['Batter'].isin(excluded_batters)]

    rename_columns = {
        'fly_ball_pct': 'FlyB%',
        'ground_ball_pct': 'GB%',
        'line_drive_pct': 'LD%',
        'barrel_pct': 'Barrel%',
        'swing_pct': 'Swing%',
        'whiff_pct': 'Whiff%',
        'z_swing_pct': 'Z_Swing%',
        'o_swing_pct': 'Chase%',
        'PitchNo': '# of Pitches',
        'total_batted_balls': 'Total Batted Balls'
    }
    
    team_data = team_data.rename(columns=rename_columns)
    team_data = team_data.merge(original_data[['Batter', 'BatterSide']].drop_duplicates(), on='Batter', how='left')
    team_data = team_data.drop_duplicates(subset=['Batter', 'AutoPitchType'])

    pitch_order = ['Four-Seam', 'Sinker', 'Curveball', 'Slider', 'Changeup', 'Cutter']

    batters = team_data['Batter'].unique()
    for batter in batters:
        batter_data = team_data[team_data['Batter'] == batter]
        batter_side = batter_data['BatterSide'].iloc[0]
        pdf.add_page()
        pdf.chapter_title(f'Batter: {batter} (Side: {batter_side})')

        batter_data['PitchOrder'] = pd.Categorical(batter_data['AutoPitchType'], categories=pitch_order, ordered=True)
        pitch_data_sorted = batter_data.sort_values(by='PitchOrder')

        for pitch_type in pitch_order:
            pitch_data = pitch_data_sorted[pitch_data_sorted['AutoPitchType'] == pitch_type]
            if not pitch_data.empty:
                swing_tendencies = pitch_data[['# of Pitches', 'Swing%', 'Z_Swing%', 'Whiff%', 'Chase%']]
                pdf.table(swing_tendencies, title=f'Swing Tendencies for {pitch_type}')
                
                batted_balls_profile = pitch_data[['Total Batted Balls', 'FlyB%', 'GB%', 'LD%', 'Barrel%']]
                pdf.table(batted_balls_profile, title=f'Batted Balls Profile for {pitch_type}')
                
                pdf.ln(1.5)

    pdf.output(output_path)

metrics_df = process_data(original_data)
output_pdf = os.path.join(save_folder_path,
                          f'{selected_team}_hitting!_{pitcher_abbrev}.pdf')
generate_pdf_report(metrics_df,
                    selected_team,
                    output_pdf,
                    batter_team_mapping,
                    pitcher_abbrev)
# Create a unique PA identifier assuming one game per file
original_data['PA_ID'] = (
    original_data['Inning'].astype(str) + '_' +
    original_data['PAofInning'].astype(str) + '_' +
    original_data['Batter']
)

# Filter for team batters only
team_batters = [b for b, t in batter_team_mapping.items() if t == selected_team]
team_data = original_data[original_data['Batter'].isin(team_batters)]

# Calculate total pitches and unique PAs
pitches_per_pa_df = (
    team_data
    .groupby('Batter')
    .agg(
        Total_Pitches=('PA_ID', 'count'),
        Total_PAs=('PA_ID', 'nunique')
    )
    .assign(Pitches_per_PA=lambda df: (df['Total_Pitches'] / df['Total_PAs']).round(2))
    .sort_values('Pitches_per_PA', ascending=False)
)

print("\n=== Pitches per Plate Appearance (Season Total) ===")
print(pitches_per_pa_df[['Pitches_per_PA']])


print(f"PDF report created: {output_pdf}")

Available teams: ['LAK_COU10' 'CHI_DOG' 'FAR_RED' 'KAN_COU' 'GAR_SOU' 'KAN_CIT3' 'MIL_MIL1'
 'SIO_FAL' 'CLE_RAI' 'LIN_SAL' 'WIN_GOL' 'SIO_CIT1' 'LAK_COU11'
 'LAK_COU13']
Enter team code to generate report for: CHI_DOG

=== Pitches per Plate Appearance (Season Total) ===
                   Pitches_per_PA
Batter                           
Teter, Jacob                26.65
Stroup, Dusty               24.98
Hopkins, TJ                 24.04
Pruitt, Reggie              23.76
Sisco, Chance               22.52
Kusiak, Henry               20.16
Bell, Brantley              17.73
Turbo, Johnni                9.98
Rodriguez, Howard            9.87
Moris, Max                   9.72
Maiben, Jacob                9.52
Schmack, Kyle                8.88
Soto, Jonathan               6.00
Pettigrew, Zion              5.52
Novak, Nick                  5.29
Reyes, Bryan                 5.25
Penzetta, Nick               4.33
Scolan, Matthew              4.17
PDF report created: /Users/ajfoeckler/Downloads/c

  self.set_font('Arial', 'B', 12)
  self.cell(0, 10, f'Team: {self.team} vs. {self.vs_hand}', 0, 1, 'C')
  self.set_font('Arial', 'B', 12)
  self.cell(0, 10, title, 0, 1, 'L')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  batter_data['PitchOrder'] = pd.Categorical(batter_data['AutoPitchType'], categories=pitch_order, ordered=True)
  self.set_font('Arial', 'B', 8)
  self.cell(0, 10, title, 0, 1, 'C')
  self.set_font('Arial', '', 8)
  self.multi_cell(col_width, row_height, str(item), border=1, ln=3, max_line_height=self.font_size)
  self.set_font('Arial', 'B', 8)
  self.cell(0, 10, title, 0, 1, 'C')
  self.set_font('Arial', '', 8)
  self.multi_cell(col_width, row_height, str(item), border=1, ln=3, max_line_height=self.font_size)
  self.set_font('Arial', 'B', 8)
  self.cell