In [1]:
import pandas as pd
import numpy as np
import os
import requests
from unidecode import unidecode

pd.set_option("display.max_columns", None)

In [2]:
# removes unnamed col
def removeUnnamedCol(df):
    # returns columns that do not start with Unnamed
    return df.loc[:, ~df.columns.str.startswith("Unnamed")]

In [3]:
# takes the player's ids and uses the player dic to create new cols with the player's name
def addPlayerNames(sav, ids_dict):
    sav["batter_name"] = sav["batter"].map(ids_dict)
    sav["pitcher_name"] = sav["pitcher"].map(ids_dict)
    return sav

In [4]:
# create a list of players that are not in the id dictionary
def findNanPlayers(sav):
    missing_pitchers = set(sav.loc[sav["pitcher_name"].isna(), "pitcher"])
    missing_batters = set(sav.loc[sav["batter_name"].isna(), "batter"])
    return list(missing_batters | missing_pitchers)

In [None]:
def getPlayerName_list(pidlist, ids_lookup_df):
    for pid in pidlist:
        url = f"https://statsapi.mlb.com/api/v1/people/{pid}"

        try:
            pagejson = requests.get(url).json()
            playername = pagejson.get("people")[0].get("fullName")
            print(f"Found name {playername} for ID {pid}")
            new_ids_df = pd.DataFrame({"MLBID": pid, "PLAYERNAME": playername}, ignore_index=True)
            ids_lookup_df = pd.concat([ids_lookup_df, new_ids_df])
        except:
            print(f"Found nothing for {pid}")
            pass
    
    ids_lookup_df = removeUnnamedCol(ids_lookup_df)
    ids_lookup_df.to_csv(f"data/ids.csv", index=False)

In [6]:
ids_df = pd.read_csv("data/ids.csv", index_col=False)
ids_df = removeUnnamedCol(ids_df)

# create a dictionary of ids and players
id_lookup_dict = dict(zip(ids_df.MLBID, ids_df.PLAYERNAME))

sav25 = pd.read_csv("data/sav25.csv", index_col=False)
sav25 = addPlayerNames(sav25, id_lookup_dict)
nan_list = findNanPlayers(sav25)
getPlayerName_list(nan_list, ids_df)

Found name Jeff Hoffman for ID 656546
Found nothing for 656546
Found name Aaron Judge for ID 592450
Found nothing for 592450


[656546, 592450]