In [1]:
import os
import re
import pandas as pd
from pybaseball import statcast_batter, statcast_pitcher, playerid_lookup, pitching_stats_range, batting_stats_range, schedule_and_record, team_game_logs, pybaseball
from datetime import timedelta, datetime
import statsapi
import pprint

In [4]:
today = datetime.now()
end_date = today.strftime('%Y-%m-%d')

In [26]:
def get_all_game_pks_since_2021(start_year=2023):
    game_pks = []
    
    # Iterate through each year starting from 2021 to the current year
    for year in range(2024, today.year + 1):
        # Get the schedule for the year
        schedule = statsapi.schedule(start_date=f'{year}-05-01', end_date=end_date)
        
        # Extract gamePk IDs from the schedule
        for game in schedule:
            game_pks.append(game['game_id'])
    
    return game_pks

In [27]:
def get_player_roles_since_2021():
    game_pks = get_all_game_pks_since_2021()
    player_roles = {}
    
    for gamePk in game_pks:
        boxscore = statsapi.boxscore_data(gamePk)
        
        for team_key in ['home', 'away']:
            if team_key in boxscore and 'players' in boxscore[team_key]:
                team_players = boxscore[team_key]['players']
                
                for player_id, player_info in team_players.items():
                    if '_' in player_id:
                        player_id_parts = player_id.split('_')
                        if len(player_id_parts) > 1:
                            player_id = int(player_id_parts[1])
                            # Determine if the player is a batter or pitcher
                            if player_info['position']['abbreviation'] in ['P']:
                                role = 'Pitching'
                            else:
                                role = 'Batting'

                            player_roles[player_id] = role
    
    return player_roles


In [28]:
def create_player_csv(player_id, role, games):
    player_data = {
        'Game Date': [game['gameDate'] for game in games],
        'Game ID': [game['gamePk'] for game in games],
        'Stats': [game['stats'] for game in games]
    }
    
    df = pd.DataFrame(player_data)
    file_name = f"{player_id}_{role.lower()}.csv"
    df.to_csv(file_name, index=False)

In [29]:
# Example usage
player_roles = get_player_roles_since_2021()


In [30]:
print(player_roles, len(player_roles))

{} 0


In [None]:
for player_id, role in player_roles.items():
    games = statsapi.player_game_log(player_id)
    create_player_csv(player_id, role, games)