# This file fetches data and creates each players individual stats dataset
###     batting & pitching

* stored in 'batters/' and 'pitchers/' 
* filenames = player ID 

In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pprint
import re
from dateutil import parser
import time
import os

### 'Fetch b Game Log' Function
* This function reads the html content for any batting ID for any year using the baseball-reference site

In [15]:
def fetch_b_game_log(player_id, year):
    # Construct the URL for the batter's game log for the given year
    url = f'https://www.baseball-reference.com/players/gl.fcgi?id={player_id}&t=b&year={year}'
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f" BAD - Failed to fetch data for batter {player_id} in {year}")
        return None
    
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find the table containing the game logs
    table = soup.find('table', {'id': 'players_standard_batting'})
    
    # Check if the table is found
    if table is None:
        print(f"No data found for batter {player_id} in {year} - OK")
        return None
    
    # Read the table into a pandas DataFrame
    df = pd.read_html(str(table))[0]
    
    # Remove rows where 'Rk' is not a number (header rows that repeat in the table)
    df = df[pd.to_numeric(df['Rk'], errors='coerce').notnull()]
    
    # Add the year to the 'Date' column if the year is not already present
    df['Date'] = df['Date'].apply(lambda x: f"{x}, {year}" if '(' not in x else x)
    
    # Extract the value from parentheses (if present) and assign it to a new column 'dbl'
    df['dbl'] = df['Date'].str.extract(r'\((\d+)\)').astype(float)
    
    # Add the year to the 'Date' column for doubleheader dates
    df.loc[df['dbl'].notnull(), 'Date'] = df['Date'] + ', ' + str(year)
    
    # Format 'Date' to 'game_date' in YYYY-MM-DD format
    df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
    
    return df


### 'Fetch p Game Log' Function
* This function reads the html content for any pitching ID for any year using the baseball-reference site

In [16]:
def fetch_p_game_log(player_id, year):
    # Construct the URL for the pitcher's game log for the given year
    url = f'https://www.baseball-reference.com/players/gl.fcgi?id={player_id}&t=p&year={year}'
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"\n\n\n\n\n\n\n BAD - Failed to fetch data for pitcher {player_id} in {year}\n\n\n\n\n\n\n\n\n\n\n")
        return None
    
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find the table containing the game logs
    table = soup.find('table', {'id': 'pitching_gamelogs'})
    
    # Check if the table is found
    if table is None:
        print(f"No data found for pitcher {player_id} in {year} - OK")
        return None
    
    # Read the table into a pandas DataFrame
    df = pd.read_html(str(table))[0]
    
    # Remove rows where 'Rk' is not a number (header rows that repeat in the table)
    df = df[pd.to_numeric(df['Rk'], errors='coerce').notnull()]
    
    # Add the year to the 'Date' column if the year is not already present
    df['Date'] = df['Date'].apply(lambda x: f"{x}, {year}" if '(' not in x else x)
    
    # Extract the value from parentheses (if present) and assign it to a new column 'dbl'
    df['dbl'] = df['Date'].str.extract(r'\((\d+)\)').astype(float)
    
    # Add the year to the 'Date' column for doubleheader dates
    df.loc[df['dbl'].notnull(), 'Date'] = df['Date'] + ', ' + str(year)
    
    # Format 'Date' to 'game_date' in YYYY-MM-DD format
    df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
    
    return df


### 'Clean_date' Function
* This function cleans the dates in the fetched data

In [17]:
# Function to clean and parse dates
def clean_date(date_str, year):
    try:
        # Replace invisible characters like U+00A0 with a space
        date_str = date_str.replace('\xa0', ' ')
        # Remove any null characters and non-printable characters
        date_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', date_str)
        # Remove unwanted characters and extra text like "(1)" or "susp"
        date_str = re.sub(r'\(.*?\)', '', date_str)  # Remove text inside parentheses
        date_str = ''.join(char for char in date_str if char.isalnum() or char.isspace() or char == ',')
        # Remove specific unwanted words like "susp"
        date_str = date_str.replace('susp', '').strip()
        # Parse the cleaned string to a date object
        parsed_date = parser.parse(date_str)
        # Force the year to be 2021
        parsed_date = parsed_date.replace(year=year)
        # Format the date to 'YYYY-MM-DD'
        #print(parsed_date)
        return parsed_date.strftime('%Y-%m-%d')
    except Exception as e:
        # Print the error for debugging purposes
        print(f"Error parsing date '{date_str}': {e}")
        # Handle any parsing errors by returning None
        return None

## Fetch BATTING Data
* and concatenate into one dataset (per player)
* store in 'batters/...'

In [None]:
idlist = pd.read_csv('batter_ids.csv')
batter_ids = idlist.key_bbref

game_pks = pd.read_csv('game_pks.csv')

# Define the mapping from abbreviated team names to full team names
team_id_mapping = {
    'WSN': 120,
    'MIA': 146,
    'TBR': 139,
    'ATL' : 144,
    'TEX' : 140,
    'HOU' : 117,
    'SD' : 135,
    'SDP' : 135,
    'PHI' : 143,
    'BAL' : 110,
    'SEA' : 136,
    'NYM' : 121,
    'ARI' : 109,
    'LAA' : 108,
    'OAK' : 133,
    'TOR' : 141,
    'CLE' : 114,
    'STL' : 138,
    'MIN' : 142,
    'DET' : 116,
    'NYY' : 147,
    'SFG' : 137,
    'KCR' : 118,
    'CWS' : 145,
    'CHW' : 145,
    'COL' : 115,
    'BOS' : 111,
    'LAD' : 119,
    'CHC' : 112,
    'MIL' : 158,
    'CIN' : 113,
    'PIT' : 134
}

# Define the years you want to process
years = [2021, 2022, 2023, 2024, 2025]

# Loop through each batter ID
for id in batter_ids:
    # Initialize an empty dataframe for the player
    player_df = pd.DataFrame()

    # Loop through each year
    for year in years:
        # Fetch data for the player and year
        df = fetch_b_game_log(id, year)

        time.sleep(0.4)

        # Check if the fetched dataframe is None or empty
        if df is None or df.empty:
            continue  # Skip this year if no data available

        # Apply the function to the date_column and create a new column
        df['game_date'] = df['Date'].apply(lambda date : clean_date(date, year))
        df['Date'] = df['game_date']

        # Ensure the 'Date' column in df and 'game_date' column in game_pks are in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        game_pks['game_date'] = pd.to_datetime(game_pks['game_date'])

        # Map the team abbreviations to full team names
        df['team_id'] = df['Team'].map(team_id_mapping)
        df['opp_id'] = df['Opp'].map(team_id_mapping)

        # Initialize a new column in df for game_id
        df['game_id'] = None

        # Iterate over the rows in df to find the corresponding game_id in game_pks
        for index, row in df.iterrows():

            #print(f"Processing row {index}: Date={row['Date']}, Team={row['team_id']}, Opponent={row['opp_id']}")
            

            # Filter the game_pks for the matching date and teams
            game_day_matches = game_pks[
                (game_pks['game_date'] == row['Date']) &
                (
                    ((game_pks['home_id'] == row['team_id']) & (game_pks['away_id'] == row['opp_id'])) |
                    ((game_pks['home_id'] == row['opp_id']) & (game_pks['away_id'] == row['team_id']))
                )
            ]

            
            #print(f"Matches found: {len(game_day_matches)}")
            

            # Check the 'dbl' column to assign the correct game_id
            if not game_day_matches.empty:

                #print(f"Match details: {game_day_matches}")

                if row['dbl'] == 1:
                    # For the first game of a double-header
                    game_id = game_day_matches.iloc[0]['game_id']
                elif row['dbl'] == 2:
                    # For the second game of a double-header
                    if len(game_day_matches) > 1:
                        game_id = game_day_matches.iloc[1]['game_id']
                    else:
                        game_id = game_day_matches.iloc[0]['game_id']
                else:
                    # For days without double-headers or unmarked double-headers, take the first game
                    game_id = game_day_matches.iloc[0]['game_id']
                df.at[index, 'game_id'] = game_id
                #print(f"Assigned game_id: {game_id}")
            else:
                print("BAD - NO GAME MATCHES FOUND (batter).")

        # Append the data for the year to player_df
        player_df = pd.concat([player_df, df])

    # Save the player's data to a CSV file
    player_df.to_csv(f'batters/{id}_batting.csv', index=False)

No data found for batter soderty01 in 2021 - OK
No data found for batter soderty01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')


No data found for batter soderty01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter sogarer01 in 2022 - OK
No data found for batter sogarer01 in 2023 - OK
No data found for batter sogarer01 in 2024 - OK
No data found for batter sogarer01 in 2025 - OK
No data found for batter sogarni01 in 2021 - OK
No data found for batter sogarni01 in 2022 - OK
No data found for batter sogarni01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter sogarni01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter solakni01 in 2024 - OK
No data found for batter solakni01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter solando01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter solerjo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter sosaed01 in 2025 - OK
No data found for batter sosale01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter sosale01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter sotoju01 in 2025 - OK
No data found for batter sotoli01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter sotoli01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter souzast01 in 2023 - OK
No data found for batter souzast01 in 2024 - OK
No data found for batter souzast01 in 2025 - OK
No data found for batter spangco01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter spangco01 in 2023 - OK
No data found for batter spangco01 in 2024 - OK
No data found for batter spangco01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter springe01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stallja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stammcr01 in 2023 - OK
No data found for batter stammcr01 in 2024 - OK
No data found for batter stammcr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stantmi03 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stassma01 in 2023 - OK
No data found for batter stassma01 in 2024 - OK
No data found for batter stassma01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter steelju01 in 2025 - OK
No data found for batter steersp01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter steersp01 in 2025 - OK
No data found for batter stefami01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stefami01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stephty01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter stevean01 in 2022 - OK


  df = pd.read_html(str(table))[0]


No data found for batter stevean01 in 2024 - OK
No data found for batter stevean01 in 2025 - OK
No data found for batter steveca01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter steveca01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stewadj01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter stewako01 in 2022 - OK
No data found for batter stewako01 in 2023 - OK
No data found for batter stewako01 in 2024 - OK
No data found for batter stewako01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter stockro01 in 2022 - OK
No data found for batter stockro01 in 2023 - OK
No data found for batter stockro01 in 2024 - OK
No data found for batter stockro01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter stoketr01 in 2022 - OK
No data found for batter stoketr01 in 2023 - OK
No data found for batter stoketr01 in 2024 - OK
No data found for batter stoketr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter storytr01 in 2025 - OK
No data found for batter stottbr01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stottbr01 in 2025 - OK
No data found for batter stoweky01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stoweky01 in 2025 - OK
No data found for batter gordode01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter gordode01 in 2023 - OK
No data found for batter gordode01 in 2024 - OK
No data found for batter gordode01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]


No data found for batter strasst01 in 2023 - OK
No data found for batter strasst01 in 2024 - OK
No data found for batter strasst01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stratch01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter strawmy01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter strichu01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter strichu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stridsp01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stripro01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stromma01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter stubbga01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suareeu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suarejo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suarera01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter suerowa01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suerowa01 in 2025 - OK
No data found for batter sullibr01 in 2021 - OK
No data found for batter sullibr01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter sullibr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suterbr01 in 2025 - OK
No data found for batter suwinja01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suwinja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suzukku01 in 2023 - OK
No data found for batter suzukku01 in 2024 - OK
No data found for batter suzukku01 in 2025 - OK
No data found for batter suzukse01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter suzukse01 in 2025 - OK
No data found for batter swaggtr01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter swaggtr01 in 2023 - OK
No data found for batter swaggtr01 in 2024 - OK
No data found for batter swaggtr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter swansda01 in 2025 - OK
No data found for batter sweentr01 in 2021 - OK
No data found for batter sweentr01 in 2022 - OK
No data found for batter sweentr01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter sweentr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter szaputh01 in 2023 - OK
No data found for batter szaputh01 in 2024 - OK
No data found for batter szaputh01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter taillja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tapiara01 in 2024 - OK
No data found for batter tapiara01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tatisfe02 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tatisfe02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tauchmi01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tauchmi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter taverle01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tayloch03 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter taylomi02 in 2025 - OK
No data found for batter taylosa04 in 2021 - OK
No data found for batter taylosa04 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter taylosa04 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tayloty01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tejedan01 in 2022 - OK
No data found for batter tejedan01 in 2023 - OK
No data found for batter tejedan01 in 2024 - OK
No data found for batter tejedan01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tellero01 in 2025 - OK
No data found for batter tenajo01 in 2021 - OK
No data found for batter tenajo01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tenajo01 in 2025 - OK
No data found for batter teodobr01 in 2021 - OK
No data found for batter teodobr01 in 2022 - OK
No data found for batter teodobr01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter teodobr01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter terrycu01 in 2022 - OK
No data found for batter terrycu01 in 2023 - OK
No data found for batter terrycu01 in 2024 - OK
No data found for batter terrycu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thaisma01 in 2025 - OK
No data found for batter thomaal01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thomaal01 in 2025 - OK
No data found for batter thomaco01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thomaco01 in 2024 - OK
No data found for batter thomaco01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thomadi01 in 2023 - OK
No data found for batter thomadi01 in 2024 - OK
No data found for batter thomadi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thomala02 in 2025 - OK
No data found for batter thompbu01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thompbu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thompke02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thomptr01 in 2024 - OK
No data found for batter thomptr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter thompza01 in 2023 - OK
No data found for batter thompza01 in 2024 - OK
No data found for batter thompza01 in 2025 - OK
No data found for batter toglimi01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter toglimi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tomka01 in 2023 - OK
No data found for batter tomka01 in 2024 - OK
No data found for batter tomka01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tomlijo01 in 2022 - OK
No data found for batter tomlijo01 in 2023 - OK
No data found for batter tomlijo01 in 2024 - OK
No data found for batter tomlijo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter topaju01 in 2025 - OK
No data found for batter torkesp01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter torkesp01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter toroab01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter torrelu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter torregl01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter torrero01 in 2022 - OK
No data found for batter torrero01 in 2023 - OK
No data found for batter torrero01 in 2024 - OK
No data found for batter torrero01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter toussto01 in 2025 - OK
No data found for batter tovarez01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tovarez01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tovarwi01 in 2022 - OK
No data found for batter tovarwi01 in 2023 - OK
No data found for batter tovarwi01 in 2024 - OK
No data found for batter tovarwi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter trammta01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter trejoal01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter trevijo01 in 2025 - OK
No data found for batter triolja01 in 2021 - OK
No data found for batter triolja01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter triolja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter trompch01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter tropeni01 in 2022 - OK
No data found for batter tropeni01 in 2023 - OK
No data found for batter tropeni01 in 2024 - OK
No data found for batter tropeni01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter troutmi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tsutsyo01 in 2023 - OK
No data found for batter tsutsyo01 in 2024 - OK
No data found for batter tsutsyo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tuckeco01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter tuckeky01 in 2025 - OK
No data found for batter turanbr02 in 2021 - OK
No data found for batter turanbr02 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter turanbr02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter turneju01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter turnetr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter ucetaed01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter underdu01 in 2024 - OK
No data found for batter underdu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter uptonju01 in 2023 - OK
No data found for batter uptonju01 in 2024 - OK
No data found for batter uptonju01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter urenajo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter uriasju01 in 2024 - OK
No data found for batter uriasju01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter uriaslu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter uriasra01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter urquijo01 in 2024 - OK
No data found for batter urquijo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter urshegi01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter valaipa01 in 2022 - OK
No data found for batter valaipa01 in 2023 - OK
No data found for batter valaipa01 in 2024 - OK
No data found for batter valaipa01 in 2025 - OK
No data found for batter valdeen01 in 2021 - OK
No data found for batter valdeen01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter valdeen01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter valdefr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')


No data found for batter valerbr01 in 2022 - OK
No data found for batter valerbr01 in 2023 - OK
No data found for batter valerbr01 in 2024 - OK
No data found for batter valerbr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vanmejo01 in 2023 - OK
No data found for batter vanmejo01 in 2024 - OK
No data found for batter vanmejo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vargail01 in 2025 - OK
No data found for batter vargami01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vargami01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter varshda01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vaughan01 in 2025 - OK
No data found for batter vavrate01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vavrate01 in 2024 - OK
No data found for batter vavrate01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vazquch01 in 2025 - OK
No data found for batter vazqulu01 in 2021 - OK
No data found for batter vazqulu01 in 2022 - OK
No data found for batter vazqulu01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter vazqulu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter velasvi01 in 2024 - OK
No data found for batter velasvi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter velazan01 in 2024 - OK
No data found for batter velazan01 in 2025 - OK
No data found for batter velazne01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter velazne01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter verdual01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vesiaal01 in 2025 - OK
No data found for batter vientma01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vientma01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vierlma01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter viladry01 in 2022 - OK
No data found for batter viladry01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter viladry01 in 2025 - OK
No data found for batter villada01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter villada01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter villajo01 in 2023 - OK
No data found for batter villajo01 in 2024 - OK
No data found for batter villajo01 in 2025 - OK
No data found for batter vilorme01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vilorme01 in 2024 - OK
No data found for batter vilorme01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vogelda01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vogtst01 in 2023 - OK
No data found for batter vogtst01 in 2024 - OK
No data found for batter vogtst01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter voitlu01 in 2024 - OK
No data found for batter voitlu01 in 2025 - OK
No data found for batter volpean01 in 2021 - OK
No data found for batter volpean01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter volpean01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vosleja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vothau01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter vottojo01 in 2024 - OK
No data found for batter vottojo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wachami01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter waddebr01 in 2022 - OK
No data found for batter waddebr01 in 2023 - OK
No data found for batter waddebr01 in 2024 - OK
No data found for batter waddebr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wadela01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wadety01 in 2025 - OK
No data found for batter wagamer01 in 2021 - OK
No data found for batter wagamer01 in 2022 - OK
No data found for batter wagamer01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wagamer01 in 2025 - OK
No data found for batter wagnewi01 in 2021 - OK
No data found for batter wagnewi01 in 2022 - OK
No data found for batter wagnewi01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wagnewi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wainwad01 in 2024 - OK
No data found for batter wainwad01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter walkech02 in 2025 - OK
No data found for batter walkejo02 in 2021 - OK
No data found for batter walkejo02 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter walkejo02 in 2025 - OK
No data found for batter walkest01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter walkest01 in 2023 - OK
No data found for batter walkest01 in 2024 - OK
No data found for batter walkest01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter walketa01 in 2025 - OK
No data found for batter wallfo01 in 2021 - OK
No data found for batter wallfo01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wallfo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wallach01 in 2024 - OK
No data found for batter wallach01 in 2025 - OK
No data found for batter wallnma01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wallnma01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wallsta01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter walshja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter waltodo01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter waltodo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wardta01 in 2025 - OK
No data found for batter waterdr01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter waterdr01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter watsoto01 in 2022 - OK
No data found for batter watsoto01 in 2023 - OK
No data found for batter watsoto01 in 2024 - OK
No data found for batter watsoto01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter weathry01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter weavelu01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter webbja01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter webbja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter webblo01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter webbty01 in 2022 - OK
No data found for batter webbty01 in 2023 - OK
No data found for batter webbty01 in 2024 - OK
No data found for batter webbty01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter welkeco01 in 2022 - OK
No data found for batter welkeco01 in 2023 - OK
No data found for batter welkeco01 in 2024 - OK
No data found for batter welkeco01 in 2025 - OK
No data found for batter wellsau01 in 2021 - OK
No data found for batter wellsau01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wellsau01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wendljo01 in 2025 - OK
No data found for batter wendzda01 in 2021 - OK
No data found for batter wendzda01 in 2022 - OK
No data found for batter wendzda01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wendzda01 in 2025 - OK
No data found for batter westbja02 in 2021 - OK
No data found for batter westbja02 in 2022 - OK
No data found for batter westbja02 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter westbja02 in 2025 - OK
No data found for batter westbjo01 in 2021 - OK
No data found for batter westbjo01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter westbjo01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wheelza01 in 2025 - OK
No data found for batter whitcsh01 in 2021 - OK
No data found for batter whitcsh01 in 2022 - OK
No data found for batter whitcsh01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter whitcsh01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter whiteel04 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter whiteev01 in 2022 - OK
No data found for batter whiteev01 in 2023 - OK
No data found for batter whiteev01 in 2024 - OK
No data found for batter whiteev01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter whitemi03 in 2025 - OK
No data found for batter whiteaa01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter whiteaa01 in 2023 - OK
No data found for batter whiteaa01 in 2024 - OK
No data found for batter whiteaa01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter whitlga01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter widenta01 in 2023 - OK
No data found for batter widenta01 in 2024 - OK
No data found for batter widenta01 in 2025 - OK
No data found for batter wiemejo01 in 2021 - OK
No data found for batter wiemejo01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wiemejo01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wilkest01 in 2022 - OK
No data found for batter wilkest01 in 2023 - OK
No data found for batter wilkest01 in 2024 - OK
No data found for batter wilkest01 in 2025 - OK
No data found for batter willial04 in 2021 - OK
No data found for batter willial04 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter willial04 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter williju02 in 2022 - OK
No data found for batter williju02 in 2023 - OK
No data found for batter williju02 in 2024 - OK
No data found for batter williju02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter willilu01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter willima10 in 2022 - OK
No data found for batter willima10 in 2023 - OK
No data found for batter willima10 in 2024 - OK
No data found for batter willima10 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter willini01 in 2022 - OK
No data found for batter willini01 in 2023 - OK
No data found for batter willini01 in 2024 - OK
No data found for batter willini01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter willitr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wilsobr02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wilsoco01 in 2022 - OK
No data found for batter wilsoco01 in 2023 - OK
No data found for batter wilsoco01 in 2024 - OK
No data found for batter wilsoco01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wilsoja04 in 2022 - OK
No data found for batter wilsoja04 in 2023 - OK
No data found for batter wilsoja04 in 2024 - OK
No data found for batter wilsoja04 in 2025 - OK
No data found for batter wilsoja05 in 2021 - OK
No data found for batter wilsoja05 in 2022 - OK
No data found for batter wilsoja05 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wilsoja05 in 2025 - OK
No data found for batter wilsoma02 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wilsoma02 in 2023 - OK
No data found for batter wilsoma02 in 2024 - OK
No data found for batter wilsoma02 in 2025 - OK
No data found for batter wilsowe01 in 2021 - OK
No data found for batter wilsowe01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wilsowe01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter winkeje01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter winklda01 in 2022 - OK
No data found for batter winklda01 in 2023 - OK
No data found for batter winklda01 in 2024 - OK
No data found for batter winklda01 in 2025 - OK
No data found for batter winnma01 in 2021 - OK
No data found for batter winnma01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter winnma01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wisdopa01 in 2025 - OK
No data found for batter wiselbr01 in 2021 - OK
No data found for batter wiselbr01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wiselbr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wislema01 in 2023 - OK
No data found for batter wislema01 in 2024 - OK
No data found for batter wislema01 in 2025 - OK
No data found for batter wittbo02 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wittbo02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wolteto01 in 2023 - OK
No data found for batter wolteto01 in 2024 - OK
No data found for batter wolteto01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wongco01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter wongke01 in 2022 - OK
No data found for batter wongke01 in 2023 - OK
No data found for batter wongke01 in 2024 - OK
No data found for batter wongke01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wongko01 in 2024 - OK
No data found for batter wongko01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter woodal02 in 2025 - OK
No data found for batter woodja03 in 2021 - OK
No data found for batter woodja03 in 2022 - OK
No data found for batter woodja03 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter woodja03 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter woodfja01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter woodrbr01 in 2024 - OK
No data found for batter woodrbr01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wrighky01 in 2024 - OK
No data found for batter wrighky01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter wynnsau01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]


No data found for batter yajurmi01 in 2023 - OK
No data found for batter yajurmi01 in 2024 - OK
No data found for batter yajurmi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')


No data found for batter yamamjo01 in 2022 - OK
No data found for batter yamamjo01 in 2023 - OK
No data found for batter yamamjo01 in 2024 - OK
No data found for batter yamamjo01 in 2025 - OK
No data found for batter yanger01 in 2021 - OK
No data found for batter yanger01 in 2022 - OK
No data found for batter yanger01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter yanger01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter yarbrry01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter yastrmi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter yelicch01 in 2025 - OK
No data found for batter yepezju01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter yepezju01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter ynoahu01 in 2023 - OK
No data found for batter ynoahu01 in 2024 - OK
No data found for batter ynoahu01 in 2025 - OK
No data found for batter yorkeni01 in 2021 - OK
No data found for batter yorkeni01 in 2022 - OK
No data found for batter yorkeni01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter yorkeni01 in 2025 - OK
No data found for batter yoshima02 in 2021 - OK
No data found for batter yoshima02 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter yoshima02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter youngal01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter youngan02 in 2022 - OK
No data found for batter youngan02 in 2023 - OK
No data found for batter youngan02 in 2024 - OK
No data found for batter youngan02 in 2025 - OK
No data found for batter youngja03 in 2021 - OK
No data found for batter youngja03 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter youngja03 in 2025 - OK
No data found for batter youngja02 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter youngja02 in 2024 - OK
No data found for batter youngja02 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter zavalse01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter zimmebr01 in 2023 - OK
No data found for batter zimmebr01 in 2024 - OK
No data found for batter zimmebr01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter zimmery01 in 2022 - OK
No data found for batter zimmery01 in 2023 - OK
No data found for batter zimmery01 in 2024 - OK
No data found for batter zimmery01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter zimmebr02 in 2024 - OK
No data found for batter zimmebr02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter zimmejo02 in 2022 - OK
No data found for batter zimmejo02 in 2023 - OK
No data found for batter zimmejo02 in 2024 - OK
No data found for batter zimmejo02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for batter zuberty01 in 2022 - OK
No data found for batter zuberty01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for batter zuberty01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for batter zuninmi01 in 2024 - OK
No data found for batter zuninmi01 in 2025 - OK


## Fetch PITCHING Data
* and concatenate into one dataset (per player)
* store in 'pitchers/...'

In [None]:
idlist = pd.read_csv('pitcher_ids.csv')
pitcher_ids = idlist.key_bbref

game_pks = pd.read_csv('game_pks.csv')

# Define the mapping from abbreviated team names to full team names
team_id_mapping = {
    'WSN': 120,
    'MIA': 146,
    'TBR': 139,
    'ATL' : 144,
    'TEX' : 140,
    'HOU' : 117,
    'SD' : 135,
    'SDP' : 135,
    'PHI' : 143,
    'BAL' : 110,
    'SEA' : 136,
    'NYM' : 121,
    'ARI' : 109,
    'LAA' : 108,
    'OAK' : 133,
    'TOR' : 141,
    'CLE' : 114,
    'STL' : 138,
    'MIN' : 142,
    'DET' : 116,
    'NYY' : 147,
    'SFG' : 137,
    'KCR' : 118,
    'CWS' : 145,
    'CHW' : 145,
    'COL' : 115,
    'BOS' : 111,
    'LAD' : 119,
    'CHC' : 112,
    'MIL' : 158,
    'CIN' : 113,
    'PIT' : 134
}

# Define the years you want to process
years = [2021, 2022, 2023, 2024, 2025]

# Loop through each batter ID
for id in pitcher_ids:
    # Initialize an empty dataframe for the player
    player_df = pd.DataFrame()

    # Loop through each year
    for year in years:
        # Fetch data for the player and year
        df = fetch_p_game_log(id, year)

        time.sleep(1)

        # Check if the fetched dataframe is None or empty
        if df is None or df.empty:
            continue  # Skip this year if no data available

        # Apply the function to the date_column and create a new column
        df['game_date'] = df['Date'].apply(lambda date : clean_date(date, year))
        df['Date'] = df['game_date']

        # Ensure the 'Date' column in df and 'game_date' column in game_pks are in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        game_pks['game_date'] = pd.to_datetime(game_pks['game_date'])

        # Map the team abbreviations to full team names
        df['team_id'] = df['Tm'].map(team_id_mapping)
        df['opp_id'] = df['Opp'].map(team_id_mapping)

        # Initialize a new column in df for game_id
        df['game_id'] = None

        # Iterate over the rows in df to find the corresponding game_id in game_pks
        for index, row in df.iterrows():

            #print(f"Processing row {index}: Date={row['Date']}, Team={row['team_id']}, Opponent={row['opp_id']}")
            

            # Filter the game_pks for the matching date and teams
            game_day_matches = game_pks[
                (game_pks['game_date'] == row['Date']) &
                (
                    ((game_pks['home_id'] == row['team_id']) & (game_pks['away_id'] == row['opp_id'])) |
                    ((game_pks['home_id'] == row['opp_id']) & (game_pks['away_id'] == row['team_id']))
                )
            ]

            
            #print(f"Matches found: {len(game_day_matches)}")
            

            # Check the 'dbl' column to assign the correct game_id
            if not game_day_matches.empty:

                #print(f"Match details: {game_day_matches}")

                if row['dbl'] == 1:
                    # For the first game of a double-header
                    game_id = game_day_matches.iloc[0]['game_id']
                elif row['dbl'] == 2:
                    # For the second game of a double-header
                    if len(game_day_matches) > 1:
                        game_id = game_day_matches.iloc[1]['game_id']
                    else:
                        game_id = game_day_matches.iloc[0]['game_id']
                else:
                    # For days without double-headers or unmarked double-headers, take the first game
                    game_id = game_day_matches.iloc[0]['game_id']
                df.at[index, 'game_id'] = game_id
                #print(f"Assigned game_id: {game_id}")
            else:
                print("BAD - NO GAME MATCHES FOUND (pitcher).")

        # Append the data for the year to player_df
        player_df = pd.concat([player_df, df])

    # Save the player's data to a CSV file
    player_df.to_csv(f'pitchers/{id}_pitching.csv', index=False)

  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher wrighky01 in 2024 - OK
No data found for pitcher wrighky01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher wrighmi01 in 2022 - OK
No data found for pitcher wrighmi01 in 2023 - OK
No data found for pitcher wrighmi01 in 2024 - OK
No data found for pitcher wrighmi01 in 2025 - OK
No data found for pitcher wroblju01 in 2021 - OK
No data found for pitcher wroblju01 in 2022 - OK
No data found for pitcher wroblju01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher wroblju01 in 2025 - OK
No data found for pitcher wynnera01 in 2021 - OK
No data found for pitcher wynnera01 in 2022 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher wynnera01 in 2024 - OK
No data found for pitcher wynnera01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher wynnsau01 in 2023 - OK
No data found for pitcher wynnsau01 in 2024 - OK
No data found for pitcher wynnsau01 in 2025 - OK
No data found for pitcher yacabji01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher yacabji01 in 2024 - OK
No data found for pitcher yacabji01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]


No data found for pitcher yajurmi01 in 2023 - OK
No data found for pitcher yajurmi01 in 2024 - OK
No data found for pitcher yajurmi01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')


No data found for pitcher yamamjo01 in 2022 - OK
No data found for pitcher yamamjo01 in 2023 - OK
No data found for pitcher yamamjo01 in 2024 - OK
No data found for pitcher yamamjo01 in 2025 - OK
No data found for pitcher yamamyo01 in 2021 - OK
No data found for pitcher yamamyo01 in 2022 - OK
No data found for pitcher yamamyo01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher yamamyo01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher yanghy01 in 2022 - OK
No data found for pitcher yanghy01 in 2023 - OK
No data found for pitcher yanghy01 in 2024 - OK
No data found for pitcher yanghy01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher yarbrry01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher yardler01 in 2022 - OK
No data found for pitcher yardler01 in 2023 - OK
No data found for pitcher yardler01 in 2024 - OK
No data found for pitcher yardler01 in 2025 - OK
No data found for pitcher yastrmi01 in 2021 - OK
No data found for pitcher yastrmi01 in 2022 - OK
No data found for pitcher yastrmi01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher yastrmi01 in 2025 - OK
No data found for pitcher yateski01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher yateski01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher ynoahu01 in 2023 - OK
No data found for pitcher ynoahu01 in 2024 - OK
No data found for pitcher ynoahu01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher youngal01 in 2025 - OK
No data found for pitcher youngda02 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher youngda02 in 2025 - OK
No data found for pitcher zabalan01 in 2021 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zabalan01 in 2023 - OK
No data found for pitcher zabalan01 in 2024 - OK
No data found for pitcher zabalan01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zamorda01 in 2022 - OK
No data found for pitcher zamorda01 in 2023 - OK
No data found for pitcher zamorda01 in 2024 - OK
No data found for pitcher zamorda01 in 2025 - OK
No data found for pitcher zastrro01 in 2021 - OK


  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher zastrro01 in 2025 - OK
No data found for pitcher zavalse01 in 2021 - OK
No data found for pitcher zavalse01 in 2022 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zavalse01 in 2024 - OK
No data found for pitcher zavalse01 in 2025 - OK
No data found for pitcher zeferry01 in 2021 - OK
No data found for pitcher zeferry01 in 2022 - OK
No data found for pitcher zeferry01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zeferry01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df['game_date'] = pd.to_datetime(df['Date'], errors='coerce').dt.strftime('%Y-%m-%d')
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher zerpaan01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher zeuchtj01 in 2023 - OK
No data found for pitcher zeuchtj01 in 2024 - OK
No data found for pitcher zeuchtj01 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zimmeky01 in 2022 - OK
No data found for pitcher zimmeky01 in 2023 - OK
No data found for pitcher zimmeky01 in 2024 - OK
No data found for pitcher zimmeky01 in 2025 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher zimmebr02 in 2024 - OK
No data found for pitcher zimmebr02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zimmejo02 in 2022 - OK
No data found for pitcher zimmejo02 in 2023 - OK
No data found for pitcher zimmejo02 in 2024 - OK
No data found for pitcher zimmejo02 in 2025 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zuberty01 in 2022 - OK
No data found for pitcher zuberty01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zuberty01 in 2025 - OK
No data found for pitcher zulueyo01 in 2021 - OK
No data found for pitcher zulueyo01 in 2022 - OK
No data found for pitcher zulueyo01 in 2023 - OK


  df = pd.read_html(str(table))[0]


No data found for pitcher zulueyo01 in 2025 - OK
No data found for pitcher zuniggu01 in 2021 - OK
No data found for pitcher zuniggu01 in 2022 - OK


  df = pd.read_html(str(table))[0]
  df = pd.read_html(str(table))[0]


No data found for pitcher zuniggu01 in 2025 - OK


# Check for empty files

In [20]:
idlist = pd.read_csv('pitcher_ids.csv')
pitcher_ids = idlist.key_bbref

game_pks = pd.read_csv('game_pks.csv')

bad_p_ids = []

# Loop through each pitcher ID
for id in pitcher_ids:
    # Skip empty or NaN IDs
    if not id or pd.isna(id):
        continue

    # Define the file path
    player_file_path = f'pitchers/{id}_pitching.csv'
    
    # Check if the file exists and is not empty
    if os.path.exists(player_file_path) and os.path.getsize(player_file_path) > 0:
        try:
            # Attempt to read the first few rows to ensure the file has content
            player_df = pd.read_csv(player_file_path, nrows=5)
            if player_df.empty:
                print(f"File for ID {id} is empty or improperly formatted.")
                player_df = pd.DataFrame()  # Ensure player_df is an empty DataFrame
                continue
            else:
                # Now read the entire file since it's confirmed to have content
                player_df = pd.read_csv(player_file_path)
        except pd.errors.EmptyDataError:
            print(f"File for ID {id} is empty.")
            player_df = pd.DataFrame()
            bad_p_ids.append(id)
            continue
        except pd.errors.ParserError:
            print(f"File for ID {id} is improperly formatted.")
            player_df = pd.DataFrame()
            bad_p_ids.append(id)
            continue

print(bad_p_ids)

[]


# Check if the individual data exists

In [21]:
data = fetch_p_game_log('dolisra01', 2021)


  df = pd.read_html(str(table))[0]


# Handle (Fetch) the missing data

In [22]:

pitcher_ids = bad_p_ids

game_pks = pd.read_csv('game_pks.csv')

# Define the mapping from abbreviated team names to full team names
team_id_mapping = {
    'WSN': 120,
    'MIA': 146,
    'TBR': 139,
    'ATL' : 144,
    'TEX' : 140,
    'HOU' : 117,
    'SD' : 135,
    'SDP' : 135,
    'PHI' : 143,
    'BAL' : 110,
    'SEA' : 136,
    'NYM' : 121,
    'ARI' : 109,
    'LAA' : 108,
    'OAK' : 133,
    'TOR' : 141,
    'CLE' : 114,
    'STL' : 138,
    'MIN' : 142,
    'DET' : 116,
    'NYY' : 147,
    'SFG' : 137,
    'KCR' : 118,
    'CWS' : 145,
    'CHW' : 145,
    'COL' : 115,
    'BOS' : 111,
    'LAD' : 119,
    'CHC' : 112,
    'MIL' : 158,
    'CIN' : 113,
    'PIT' : 134
}

# Define the years you want to process
years = [2021, 2022, 2023, 2024, 2025]

# Loop through each batter ID
for id in pitcher_ids:
    # Initialize an empty dataframe for the player
    player_df = pd.DataFrame()

    # Loop through each year
    for year in years:
        # Fetch data for the player and year
        df = fetch_p_game_log(id, year)

        time.sleep(1)

        # Check if the fetched dataframe is None or empty
        if df is None or df.empty:
            continue  # Skip this year if no data available

        # Apply the function to the date_column and create a new column
        df['game_date'] = df['Date'].apply(lambda date : clean_date(date, year))
        df['Date'] = df['game_date']

        # Ensure the 'Date' column in df and 'game_date' column in game_pks are in datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        game_pks['game_date'] = pd.to_datetime(game_pks['game_date'])

        # Map the team abbreviations to full team names
        df['team_id'] = df['Tm'].map(team_id_mapping)
        df['opp_id'] = df['Opp'].map(team_id_mapping)

        # Initialize a new column in df for game_id
        df['game_id'] = None

        # Iterate over the rows in df to find the corresponding game_id in game_pks
        for index, row in df.iterrows():

            #print(f"Processing row {index}: Date={row['Date']}, Team={row['team_id']}, Opponent={row['opp_id']}")
            

            # Filter the game_pks for the matching date and teams
            game_day_matches = game_pks[
                (game_pks['game_date'] == row['Date']) &
                (
                    ((game_pks['home_id'] == row['team_id']) & (game_pks['away_id'] == row['opp_id'])) |
                    ((game_pks['home_id'] == row['opp_id']) & (game_pks['away_id'] == row['team_id']))
                )
            ]

            
            #print(f"Matches found: {len(game_day_matches)}")
            

            # Check the 'dbl' column to assign the correct game_id
            if not game_day_matches.empty:

                #print(f"Match details: {game_day_matches}")

                if row['dbl'] == 1:
                    # For the first game of a double-header
                    game_id = game_day_matches.iloc[0]['game_id']
                elif row['dbl'] == 2:
                    # For the second game of a double-header
                    if len(game_day_matches) > 1:
                        game_id = game_day_matches.iloc[1]['game_id']
                    else:
                        game_id = game_day_matches.iloc[0]['game_id']
                else:
                    # For days without double-headers or unmarked double-headers, take the first game
                    game_id = game_day_matches.iloc[0]['game_id']
                df.at[index, 'game_id'] = game_id
                #print(f"Assigned game_id: {game_id}")
            else:
                print("BAD - NO GAME MATCHES FOUND (pitcher).")

        # Append the data for the year to player_df
        player_df = pd.concat([player_df, df])

    # Save the player's data to a CSV file
    player_df.to_csv(f'pitchers/{id}_pitching.csv', index=False)