In [13]:
import os
import pandas as pd

In [16]:
def get_player_stats(bbref_id, player_type, game_id):
    """
    Get the player's stats for the specific game_id. If not available, return the most recent stats.
    """
    stats_dir = 'batters' if player_type == 'batting' else 'pitchers'
    stats_file = os.path.join(stats_dir, f'{bbref_id}_stats_{player_type}.csv')
    
    if not os.path.exists(stats_file):
        print(f"Stats file for {bbref_id} not found ({player_type}).")
        return None
    
    stats_df = pd.read_csv(stats_file)
    game_stats = stats_df[stats_df['game_id'] == game_id]
    
    if not game_stats.empty:
        return game_stats.iloc[0]
    else:
        return stats_df.iloc[-1]

def process_game(game_id):
    # Read the gamelog file
    game_file = f'gamelogs/game_{game_id}.csv'
    if not os.path.exists(game_file):
        print(f"Gamelog file for game {game_id} not found.")
        return
    
    game_df = pd.read_csv(game_file)
    game_data = game_df.iloc[0].to_dict()
    
    # Define relevant columns for batters and pitchers
    batter_columns = ['AVG_20', 'OBP_20', 'SLG_20', 'OPS_20', 'SB_20', 'CS_20', 'XB_20', 'TB_20', 'SO_20',
                      'AVG_5', 'OBP_5', 'SLG_5', 'OPS_5', 'SB_5', 'CS_5', 'XB_5', 'TB_5', 'SO_5']
    pitcher_columns = ['IP_real_20', 'ERA', 'H_20', 'BF_20', 'HR_20', 'R_20', 'ER_20', 'BB_20', 'SO_20', 'XB_against_20',
                       'TB_against_20', 'ERA_20', 'WHIP_20', 'IP_real_5', 'H_5', 'BF_5', 'HR_5', 'R_5', 'ER_5', 'BB_5',
                       'SO_5', 'XB_against_5', 'TB_against_5', 'ERA_5', 'WHIP_5']
    
    # Fetch stats for each batter
    for i in range(1, 10):
        for team in ['Away', 'Home']:
            bbref_id = game_data.get(f'{team}_Batter{i}_bbrefID')
            if bbref_id:
                stats = get_player_stats(bbref_id, 'batting', game_id)
                if stats is not None:
                    for col in batter_columns:
                        game_data[f'{team}_Batter{i}_{col}'] = stats.get(col, '')
            else:
                print(f'missing bbrefID for game {game_id}')

    # Fetch stats for each pitcher
    for team in ['Away', 'Home']:
        for i in range(1, 11):
            role = 'SP' if i == 1 else f'P_{i}'
            bbref_id = game_data.get(f'{team}_{role}_bbrefID')
            if bbref_id:
                stats = get_player_stats(bbref_id, 'pitching', game_id)
                if stats is not None:
                    for col in pitcher_columns:
                        game_data[f'{team}_{role}_{col}'] = stats.get(col, '')
    
    # Create a DataFrame from the updated game data
    updated_game_df = pd.DataFrame([game_data])
    
    # Save the updated game data to a new CSV file
    output_file = f'gamelogs/gamestats_{game_id}.csv'
    updated_game_df.to_csv(output_file, index=False)
    print(f"Processed and saved game stats for game {game_id} to {output_file}")

def process_recent_games(num_recent_games):
    game_pks_file = 'game_pks.csv'
    if not os.path.exists(game_pks_file):
        print(f"{game_pks_file} not found.")
        return

    game_pks_df = pd.read_csv(game_pks_file)
    recent_game_pks = game_pks_df.tail(num_recent_games)['game_id'].tolist()
    
    for game_id in recent_game_pks:
        process_game(game_id)

In [17]:
# Input the number of most recent games to process
num_recent_games = 50
process_recent_games(num_recent_games)

Stats file for puellce01 not found (batting).
Stats file for unknown not found (pitching).
Processed and saved game stats for game 641581 to gamelogs/gamestats_641581.csv
Stats file for novaiv01 not found (pitching).
Stats file for goodyni01 not found (pitching).
Processed and saved game stats for game 641589 to gamelogs/gamestats_641589.csv
Stats file for martijo08 not found (batting).
Stats file for blevije01 not found (pitching).
Processed and saved game stats for game 642069 to gamelogs/gamestats_642069.csv
Stats file for unknown not found (pitching).
Processed and saved game stats for game 642021 to gamelogs/gamestats_642021.csv
Stats file for unknown not found (batting).
Stats file for unknown not found (pitching).
Stats file for carlesh01 not found (pitching).
Processed and saved game stats for game 642173 to gamelogs/gamestats_642173.csv
Stats file for unknown not found (pitching).
Processed and saved game stats for game 641941 to gamelogs/gamestats_641941.csv
Stats file for bi