In [29]:
import os
import re
import pandas as pd
from pybaseball import statcast_batter, statcast_pitcher, playerid_lookup, pitching_stats_range, batting_stats_range, schedule_and_record, team_game_logs, pybaseball
from datetime import timedelta, datetime
import statsapi
from statsapi import boxscore_data
import pprint
import openpyxl
import mlbstatsapi

In [27]:
gamepk = 745415
boxscore = boxscore_data(gamePk=gamepk)


In [11]:
boxscore.keys()

dict_keys(['gameId', 'teamInfo', 'playerInfo', 'away', 'home', 'awayBatters', 'homeBatters', 'awayBattingTotals', 'homeBattingTotals', 'awayBattingNotes', 'homeBattingNotes', 'awayPitchers', 'homePitchers', 'awayPitchingTotals', 'homePitchingTotals', 'gameBoxInfo'])

In [25]:
pprint.pprint(boxscore)

{'away': {'batters': [683011,
                      665742,
                      592450,
                      657077,
                      519317,
                      519203,
                      650402,
                      669224,
                      542932,
                      607074],
          'battingOrder': [683011,
                           665742,
                           592450,
                           657077,
                           519317,
                           519203,
                           650402,
                           669224,
                           542932],
          'bench': [665828, 663757, 663330, 624431],
          'bullpen': [641482,
                      657571,
                      661563,
                      672782,
                      624647,
                      605280,
                      592454,
                      642701,
                      657376,
                      573186,
                      543859,


In [28]:
# Function to extract player details from a team
def extract_team_player_details(team_data, team_prefix, player_count):
    player_info = team_data.get('players', {})
    batting_order = team_data.get('battingOrder', [])
    batting_position_map = {player_id: position for position, player_id in enumerate(batting_order, start=1)}
    
    team_data = {}
    for player_id in player_info:
        player_data = player_info[player_id]
        player_id = player_data['person']['id']
        full_name = player_data['person']['fullName']
        batting_position = batting_position_map.get(player_id)
        
        # Skip players with no batting position
        if batting_position is None:
            continue
        
        column_name = f'{team_prefix}_Batter{player_count}_'
        team_data[f'{column_name}Name'] = full_name
        team_data[f'{column_name}ID'] = player_id
        player_count += 1
    
    return team_data, player_count

# Extract data for both teams
away_player_count = 1
home_player_count = 1
away_team_data, away_player_count = extract_team_player_details(boxscore.get('away', {}), 'Away', away_player_count)
home_team_data, home_player_count = extract_team_player_details(boxscore.get('home', {}), 'Home', home_player_count)

# Add the pitching data for both teams
away_pitchers = boxscore.get('awayPitchers', [])[1:]  # Skip the first element
home_pitchers = boxscore.get('homePitchers', [])[1:]  # Skip the first element

for i, pitcher_data in enumerate(away_pitchers, start=1):
    pitcher_name = pitcher_data['name']
    pitcher_id = pitcher_data['personId']
    column_name_name = f'Away_{("SP" if i == 1 else f"P_{i}")}_Name'
    column_name_id = f'Away_{("SP" if i == 1 else f"P_{i}")}_ID'
    away_team_data[column_name_name] = pitcher_name
    away_team_data[column_name_id] = pitcher_id

for i, pitcher_data in enumerate(home_pitchers, start=1):
    pitcher_name = pitcher_data['name']
    pitcher_id = pitcher_data['personId']
    column_name_name = f'Home_{("SP" if i == 1 else f"P_{i}")}_Name'
    column_name_id = f'Home_{("SP" if i == 1 else f"P_{i}")}_ID'
    home_team_data[column_name_name] = pitcher_name
    home_team_data[column_name_id] = pitcher_id

# Ensure all positions are filled with empty strings for missing pitchers
for i in range(len(away_pitchers) + 1, 10):
    column_name_name = f'Away_P_{i}_Name'
    column_name_id = f'Away_P_{i}_ID'
    away_team_data[column_name_name] = ''
    away_team_data[column_name_id] = ''

for i in range(len(home_pitchers) + 1, 10):
    column_name_name = f'Home_P_{i}_Name'
    column_name_id = f'Home_P_{i}_ID'
    home_team_data[column_name_name] = ''
    home_team_data[column_name_id] = ''

# Combine the extracted data into a single dictionary
combined_data = {**away_team_data, **home_team_data}

# Add the 'gamepk' column
combined_data['gamepk'] = gamepk

# Read the game_pks.csv file
game_pks_df = pd.read_csv('game_pks.csv')

# Find the gamepk in the game_pks_df and get the corresponding game_date
game_date = game_pks_df.loc[game_pks_df['game_id'] == gamepk, 'game_date'].values[0]

# Add the 'game_date' column
combined_data['game_date'] = game_date

# Adding runs columns
runs_home = int(boxscore['homeBattingTotals']['r'])
runs_away = int(boxscore['awayBattingTotals']['r'])
combined_data['runs_home'] = runs_home
combined_data['runs_away'] = runs_away
combined_data['runs_total'] = runs_home + runs_away


# Create a DataFrame from the combined data
df = pd.DataFrame([combined_data])

# Reorder columns
column_order = ['game_date', 'gamepk', 'runs_away', 'runs_home', 'runs_total']
for i in range(1, 10):
    column_order += [f'Away_Batter{i}_Name', f'Away_Batter{i}_ID']
for i in range(1, 10):
    column_order += [f'Home_Batter{i}_Name', f'Home_Batter{i}_ID']
for i in range(1, len(away_pitchers) + 1):
    column_order += [f'Away_SP_Name' if i == 1 else f'Away_P_{i}_Name', f'Away_SP_ID' if i == 1 else f'Away_P_{i}_ID']
for i in range(1, len(home_pitchers) + 1):
    column_order += [f'Home_SP_Name' if i == 1 else f'Home_P_{i}_Name', f'Home_SP_ID' if i == 1 else f'Home_P_{i}_ID']

df = df[column_order]

# Display the DataFrame
print(df)
df.to_csv(f'gamelogs/game_{gamepk}.csv')

    game_date  gamepk  runs_away  runs_home  runs_total Away_Batter1_Name  \
0  2024-05-24  745415          7          0           7     Anthony Rizzo   

   Away_Batter1_ID Away_Batter2_Name  Away_Batter2_ID Away_Batter3_Name  ...  \
0           519203      Alex Verdugo           657077       Aaron Judge  ...   

   Home_Batter7_Name Home_Batter7_ID  Home_Batter8_Name Home_Batter8_ID  \
0   Jake Cronenworth          630105       Ha-Seong Kim          673490   

   Home_Batter9_Name Home_Batter9_ID  Away_SP_Name Away_SP_ID  Home_SP_Name  \
0        Luis Arraez          650333         Rodón     607074       Darvish   

  Home_SP_ID  
0     506433  

[1 rows x 45 columns]
