# Retrieve Player IDs
### This script pulls the name, mlbID, and bref ID for all players from a specified start date, to a specified end date.
* The batter dataset is stored as 'batter_ids.csv'
* The pitcher dataset is stored as 'pitcher_ids.csv'

In [83]:
import pandas as pd
from statsapi import player_stats
from pybaseball import playerid_lookup, batting_stats_range, pitching_stats_range, playerid_reverse_lookup
from datetime import datetime
import csv
import pprint

In [84]:
start_date = '2021-01-01'
start_season = 2021

today = datetime.now()
end_date = today.strftime('%Y-%m-%d')

season = today.year

In [86]:
batting = batting_stats_range(start_date, end_date)
pitching = pitching_stats_range(start_date, end_date)

In [87]:
batter_ids = batting[['Name', 'Tm', 'mlbID']]
# Add an empty column 'key_bbref' initialized with None
batter_ids['key_bbref'] = None

# Loop through each player ID and fetch their bbref ID
for idx, mlb_id in batter_ids.iterrows():

    try:
        # Fetch the data using the player ID
        batterdata = playerid_reverse_lookup([mlb_id['mlbID']], key_type='mlbam')
        if not batterdata.empty:
            bbref = batterdata.at[0, 'key_bbref']  # Extract the bbref ID from the returned DataFrame
            batter_ids.at[idx, 'key_bbref'] = bbref  # Assign the bbref ID to the respective row
        else:
                print(f"No BBref ID found for mlbID: {mlb_id['mlbID']}, {mlb_id['Name']} (Batter)")
    except Exception as e:
        print(f"Error processing mlbID: {mlb_id['mlbID']}, Error: {e}")


# Ensure mlbID is converted to integers
pitching['mlbID'] = pitching['mlbID'].astype(int)

# Add an empty column 'key_bbref' initialized with None
pitcher_ids = pitching[['Name', 'Tm', 'mlbID']].copy()
pitcher_ids['key_bbref'] = None

# Loop through each player ID and fetch their bbref ID
for idx, row in pitcher_ids.iterrows():
    mlb_id = row['mlbID']
    
    try:
        # Fetch the data using the player ID
        pitcherdata = playerid_reverse_lookup([mlb_id], key_type='mlbam')
        
        if not pitcherdata.empty:
            bbref = pitcherdata.iloc[0]['key_bbref']  # Extract the bbref ID from the returned DataFrame
            pitcher_ids.at[idx, 'key_bbref'] = bbref  # Assign the bbref ID to the respective row
        else:
            print(f"No BBref ID found for mlbID: {mlb_id}, {row['Name']} (Pitcher)")
    except Exception as e:
        print(f"Error processing mlbID: {mlb_id}, Error: {e}")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  batter_ids['key_bbref'] = None


No BBref ID found for mlbID: 669208, Ryan Bliss (Batter)
No BBref ID found for mlbID: 676113, Zach DeLoach (Batter)
No BBref ID found for mlbID: 671286, Johnathan Rodr\xc3\xadguez (Batter)
No BBref ID found for mlbID: 676679, Luis V\xc3\xa1zquez (Batter)
No BBref ID found for mlbID: 642520, Gerson Garabito (Pitcher)
No BBref ID found for mlbID: 687863, Porter Hodge (Pitcher)
No BBref ID found for mlbID: 672552, Jack O\'Loughlin (Pitcher)


In [88]:
batter_ids.to_csv('batter_ids.csv')
pitcher_ids.to_csv('pitcher_ids.csv')