In [76]:
import statsapi
import pandas as pd
import numpy as np

<h3>Schedule API</h3>

In [94]:
sched = statsapi.schedule(start_date='01/01/2023',end_date='12/31/2024')
sched_df = pd.DataFrame(sched)
sched_df.to_csv('./test_output/schedule.csv', index=False)

In [14]:
sched_df = pd.DataFrame(sched)
sched_df.columns

Index(['game_id', 'game_datetime', 'game_date', 'game_type', 'status',
       'away_name', 'home_name', 'away_id', 'home_id', 'doubleheader',
       'game_num', 'home_probable_pitcher', 'away_probable_pitcher',
       'home_pitcher_note', 'away_pitcher_note', 'away_score', 'home_score',
       'current_inning', 'inning_state', 'venue_id', 'venue_name',
       'national_broadcasts', 'series_status', 'winning_team', 'losing_team',
       'winning_pitcher', 'losing_pitcher', 'save_pitcher', 'summary',
       'losing_Team'],
      dtype='object')

In [108]:
len(sched_df['home_id'].unique())

32

<h3>Team API</h3>

In [104]:
def parse_json_field(field_dict):
    # If no dictionary provide in row, return None values
    if pd.isna(field_dict):
        return pd.Series([None, None])

    # Extract id and name values from dictionary if they exist
    id = None if 'id' not in field_dict else field_dict['id']
    name = None if 'name' not in field_dict else field_dict['name']
    return pd.Series([id, name])

# Retrieve team data from API
teams = statsapi.get('teams', params={'ver': 'v1'})['teams']
teams_df = pd.DataFrame(teams)

# Create list of fields that contain dictionaries as values in the form {'id', ... , 'name': ...}
# to extract data from
dict_fields = ['venue', 'league', 'division', 'sport', 'springLeague', 'springVenue']
for field in dict_fields:
    id_field_name = f'{field.lower()}_id'
    name_field_name = f'{field.lower()}_name'
    teams_df[[id_field_name, name_field_name]] = teams_df[field].apply(lambda x: parse_json_field(x))

    teams_df.drop(columns=field, inplace=True)

# Only extract MLB teams
# Sport ID = 1 is the MLB
mlb_teams_df = teams_df[teams_df['sport_id'] == 1]

teams_df.to_csv('./test_output/teams.csv', index=False)

Unnamed: 0,allStarStatus,id,name,link,season,teamCode,fileCode,abbreviation,teamName,locationName,...,league_id,league_name,division_id,division_name,sport_id,sport_name,springleague_id,springleague_name,springvenue_id,springvenue_name
32,N,114,Cleveland Guardians,/api/v1/teams/114,2024,cle,cle,CLE,Guardians,Cleveland,...,103.0,American League,202.0,American League Central,1,Major League Baseball,114.0,Cactus League,3834.0,
176,N,5374,ACL Guardians,/api/v1/teams/5374,2024,acg,t5374,A-GUA,ACL Guardians,Goodyear,...,121.0,Arizona Complex League,570.0,Arizona Complex League Central,16,Rookie,,,,
259,N,5506,DSL Guardians Red,/api/v1/teams/5506,2024,dgd,t5506,D-GUR,DSL Guardians Red,United States,...,130.0,Dominican Summer League,247.0,Dominican Summer League North,16,Rookie,,,,
498,N,616,DSL Guardians Blue,/api/v1/teams/616,2024,dgu,t616,D-GUB,DSL Guardians Blue,United States,...,130.0,Dominican Summer League,247.0,Dominican Summer League North,16,Rookie,,,,
