In [5]:
import requests
import pandas as pd
from datetime import datetime
from pybaseball import playerid_reverse_lookup

# Load game_pks.csv to get team names and IDs
game_pks_df = pd.read_csv('game_pks.csv')

# Mapping of team 3-digit IDs to oddshark 5-digit IDs 
team_to_oddshark_id = {
    120: 27017, 146: 27022, 139: 27003, 144: 27009, 140: 27002, 117: 27023,
    135: 26996, 143: 26995, 110: 27008, 136: 27011, 121: 27014, 109: 27007,
    108: 26998, 133: 27016, 141: 27010, 114: 27014, 138: 27019, 142: 27005,
    116: 26999, 147: 27001, 137: 26997, 118: 27006, 145: 27018, 115: 27004,
    111: 27021, 119: 27015, 112: 27020, 158: 27012, 113: 27000, 134: 27013
}

errors = []

def get_game_data(gamepk):
    url = f"https://statsapi.mlb.com/api/v1.1/game/{gamepk}/feed/live"
    response = requests.get(url)
    return response.json()

def get_bbref_id(mlbam_id):
    try:
        lookup_df = playerid_reverse_lookup([mlbam_id], key_type='mlbam')
        bbref_id = lookup_df.loc[lookup_df['key_mlbam'] == mlbam_id, 'key_bbref'].values[0]
        return bbref_id
    except IndexError:
        return 'unknown'

def extract_starting_lineup(game_data, team_side):
    lineup = {}
    team = game_data['liveData']['boxscore']['teams'][team_side]['players']
    
    for player_id, player_info in team.items():
        try:
            if 'battingOrder' in player_info and int(player_info['battingOrder']) % 100 == 0:
                order = int(player_info['battingOrder']) // 100
                mlbam_id = player_info['person']['id']
                bbref_id = get_bbref_id(mlbam_id)
                lineup[order] = {
                    'name': player_info['person']['fullName'],
                    'mlbam_id': mlbam_id,
                    'bbref_id': bbref_id
                }
        except Exception as e:
            errors.append((gamepk, f"Error processing player {player_id} in {team_side} lineup: {str(e)}"))
    
    # Ensure lineup is filled and sorted by batting order
    return [lineup.get(i, {'name': '', 'mlbam_id': '', 'bbref_id': ''}) for i in range(1, 9)]

def get_pitchers(game_data, team_side):
    team = game_data['liveData']['boxscore']['teams'][team_side]
    pitchers = []
    for idx, pitcher_id in enumerate(team['pitchers']):
        try:
            pitcher = team['players'][f'ID{pitcher_id}']
            mlbam_id = pitcher['person']['id']
            bbref_id = get_bbref_id(mlbam_id)
            pitchers.append({
                'name': pitcher['person']['fullName'],
                'mlbam_id': mlbam_id,
                'bbref_id': bbref_id,
                'order': idx + 1
            })
        except Exception as e:
            errors.append((gamepk, f"Error processing pitcher {pitcher_id} in {team_side} team: {str(e)}"))
    return pitchers

def create_game_dataframe(gamepk):
    game_data = get_game_data(gamepk)
    
    try:
        home_lineup = extract_starting_lineup(game_data, 'home')
        away_lineup = extract_starting_lineup(game_data, 'away')
    except Exception as e:
        errors.append((gamepk, f"Error extracting lineups: {str(e)}"))
        return pd.DataFrame()
    
    try:
        home_pitchers = get_pitchers(game_data, 'home')
        away_pitchers = get_pitchers(game_data, 'away')
    except Exception as e:
        errors.append((gamepk, f"Error extracting pitchers: {str(e)}"))
        return pd.DataFrame()
    
    # Get additional game information
    game_info = game_pks_df[game_pks_df['game_id'] == gamepk].iloc[0]
    
    try:
        game_date = game_info['game_date']
    except KeyError:
        game_date = 'unknown'
        errors.append((gamepk, "Error getting game date"))
    
    try:
        runs_home = game_data['liveData']['linescore']['teams']['home']['runs']
    except KeyError:
        runs_home = 0
        errors.append((gamepk, "Error getting home runs"))
    
    try:
        runs_away = game_data['liveData']['linescore']['teams']['away']['runs']
    except KeyError:
        runs_away = 0
        errors.append((gamepk, "Error getting away runs"))
    
    runs_total = runs_home + runs_away
    
    # Get team information from game_pks.csv
    home_id = game_info['home_id']
    away_id = game_info['away_id']
    home_name = game_info['home_name']
    away_name = game_info['away_name']
    
    # Check for invalid team IDs
    if home_id not in team_to_oddshark_id or away_id not in team_to_oddshark_id:
        print(f"Invalid team IDs - {home_name} {home_id} vs {away_name} {away_id}")
    
    home_oddshark_id = team_to_oddshark_id.get(home_id, 'unknown')
    away_oddshark_id = team_to_oddshark_id.get(away_id, 'unknown')
    
    game_record = {
        'game_id': gamepk, 'game_date': game_date, 'runs_home': runs_home, 'runs_away': runs_away, 'runs_total': runs_total,
        'home_id': home_id, 'home_name': home_name, 'away_id': away_id, 'away_name': away_name,
        'home_oddshark_id': home_oddshark_id, 'away_oddshark_id': away_oddshark_id
    }

    for i, player in enumerate(away_lineup, start=1):
        game_record[f'Away_Batter{i}_Name'] = player['name']
        game_record[f'Away_Batter{i}_ID'] = player['mlbam_id']
        game_record[f'Away_Batter{i}_bbrefID'] = player['bbref_id']
        
    for i, player in enumerate(home_lineup, start=1):
        game_record[f'Home_Batter{i}_Name'] = player['name']
        game_record[f'Home_Batter{i}_ID'] = player['mlbam_id']
        game_record[f'Home_Batter{i}_bbrefID'] = player['bbref_id']
    
    # Add starting pitchers
    if home_pitchers:
        game_record['Home_SP_Name'] = home_pitchers[0]['name']
        game_record['Home_SP_ID'] = home_pitchers[0]['mlbam_id']
        game_record['Home_SP_bbrefID'] = home_pitchers[0]['bbref_id']
    
    if away_pitchers:
        game_record['Away_SP_Name'] = away_pitchers[0]['name']
        game_record['Away_SP_ID'] = away_pitchers[0]['mlbam_id']
        game_record['Away_SP_bbrefID'] = away_pitchers[0]['bbref_id']
    
    # Add remaining pitchers
    for i, pitcher in enumerate(home_pitchers[1:], start=2):
        game_record[f'Home_P_{i}_Name'] = pitcher['name']
        game_record[f'Home_P_{i}_ID'] = pitcher['mlbam_id']
        game_record[f'Home_P_{i}_bbrefID'] = pitcher['bbref_id']
    
    for i, pitcher in enumerate(away_pitchers[1:], start=2):
        game_record[f'Away_P_{i}_Name'] = pitcher['name']
        game_record[f'Away_P_{i}_ID'] = pitcher['mlbam_id']
        game_record[f'Away_P_{i}_bbrefID'] = pitcher['bbref_id']
    
    return pd.DataFrame([game_record])

def save_game_to_csv(gamepk):
    game_df = create_game_dataframe(gamepk)
    if not game_df.empty:
        file_name = f'gamelogs/game_{gamepk}.csv'
        game_df.to_csv(file_name, index=False)
        #print(f"Data exported to {file_name}")
    else:
        print(f"No data exported for game {gamepk}")

all_games = pd.read_csv('game_pks.csv').game_id
# Example usage
gamepks = all_games.tail(50)
count = 0
for gamepk in gamepks:
    save_game_to_csv(gamepk)
    count += 1
    if count % 10 == 0:
        print(count)

# Print collected errors
if errors:
    print("Errors encountered:")
    for error in errors:
        print(error)
else:
    print("No errors encountered.")


Invalid team IDs - National League All-Stars 160 vs American League All-Stars 159
1830
1840
1850
1860
1870
1880
1890
1900
1910
1920
1930
1940
1950
1960
1970
1980
1990
2000
2010
2020
2030
2040
2050
2060
2070
2080
2090
2100
2110
2120
2130
2140
2150
2160
2170
2180
2190
2200
2210
2220
2230
2240
2250
2260
2270
2280
2290
2300
2310
2320
2330
2340
2350
2360
2370
2380
2390
2400
2410
2420
2430
2440
2450
2460
2470
2480
2490
2500
2510
2520
2530
2540
2550
2560
2570
2580
2590
2600
2610
2620
2630
2640
2650
2660
2670
2680
2690
2700
2710
2720
2730
2740
2750
2760
2770
2780
2790
2800
2810
2820
2830
2840
2850
2860
2870
2880
2890
2900
2910
2920
2930
2940
2950
2960
2970
2980
2990
3000
3010
3020
3030
3040
3050
3060
3070
3080
3090
3100
3110
3120
3130
3140
3150
3160
3170
3180
3190
3200
3210
3220
3230
3240
3250
3260
3270
3280
3290
3300
3310
3320
3330
3340
3350
3360
3370
3380
3390
3400
3410
3420
3430
3440
3450
3460
3470
3480
3490
3500
3510
3520
3530
3540
3550
3560
3570
3580
3590
3600
3610
3620
3630
3640
3650
366