In [1]:
# STEP 1 — Explore /teams endpoint

import requests

url = "https://statsapi.mlb.com/api/v1/teams"

# Make the request
response = requests.get(url)

print("HTTP status:", response.status_code)  # 200 = OK


HTTP status: 200


In [2]:
# STEP 2 — Convert the response to JSON and peek

data = response.json()   # convert raw response into a Python dict

print("Top-level type:", type(data))
print("Top-level keys:", list(data.keys()))


Top-level type: <class 'dict'>
Top-level keys: ['copyright', 'teams']


In [4]:
# STEP 3 — Peek inside the "teams" list

teams = data["teams"]

print("Type of teams:", type(teams))
print("Number of teams:", len(teams))

# Show just the first team object (keys only, not all values yet)
first_team = teams[0]
print("First team type:", type(first_team))
print("Keys in first team:", list(first_team.keys()))


Type of teams: <class 'list'>
Number of teams: 790
First team type: <class 'dict'>
Keys in first team: ['allStarStatus', 'id', 'name', 'link', 'season', 'venue', 'teamCode', 'fileCode', 'abbreviation', 'teamName', 'locationName', 'firstYearOfPlay', 'league', 'sport', 'shortName', 'parentOrgName', 'parentOrgId', 'franchiseName', 'clubName', 'active']


In [11]:
# STEP 4 — Filter to MLB teams and inspect results

# Keep only teams where sport.id == 1 (Major League Baseball)
# STEP — Filter down to only MLB teams (sport.id == 1)

mlb_teams = []  # start with an empty list

for t in teams:
    sport_dict = t.get("sport", {})     # step 1: get sport dict (nested)
    sport_id = sport_dict.get("id")     # step 2: get the id inside sport

    if sport_id == 1:                   # step 3: check if it's MLB
        mlb_teams.append(t)             # step 4: keep this team

print("Number of MLB teams:", len(mlb_teams))
print("First 5 MLB teams:", [team["name"] for team in mlb_teams[:5]])


print("Number of MLB teams:", len(mlb_teams))

# Peek at the first few teams
for team in mlb_teams[:5]:
    print(f"ID: {team['id']} | Name: {team['name']} | Abbreviation: {team['abbreviation']}")


Number of MLB teams: 30
First 5 MLB teams: ['Los Angeles Angels', 'Arizona Diamondbacks', 'Baltimore Orioles', 'Boston Red Sox', 'Chicago Cubs']
Number of MLB teams: 30
ID: 108 | Name: Los Angeles Angels | Abbreviation: LAA
ID: 109 | Name: Arizona Diamondbacks | Abbreviation: AZ
ID: 110 | Name: Baltimore Orioles | Abbreviation: BAL
ID: 111 | Name: Boston Red Sox | Abbreviation: BOS
ID: 112 | Name: Chicago Cubs | Abbreviation: CHC


In [13]:
# STEP 5 (prep) — Inspect all keys and values for a couple of MLB teams

from pprint import pprint  # pretty printer for readability

# Show full dict for first 2 MLB teams
for i, team in enumerate(mlb_teams[:2], start=1):
    print(f"\n--- MLB Team {i} ---")
    pprint(team)   # pretty-print the entire dictionary



--- MLB Team 1 ---
{'abbreviation': 'LAA',
 'active': True,
 'allStarStatus': 'N',
 'clubName': 'Angels',
 'division': {'id': 200,
              'link': '/api/v1/divisions/200',
              'name': 'American League West'},
 'fileCode': 'ana',
 'firstYearOfPlay': '1961',
 'franchiseName': 'Los Angeles',
 'id': 108,
 'league': {'id': 103, 'link': '/api/v1/league/103', 'name': 'American League'},
 'link': '/api/v1/teams/108',
 'locationName': 'Anaheim',
 'name': 'Los Angeles Angels',
 'season': 2025,
 'shortName': 'LA Angels',
 'sport': {'id': 1,
           'link': '/api/v1/sports/1',
           'name': 'Major League Baseball'},
 'springLeague': {'abbreviation': 'CL',
                  'id': 114,
                  'link': '/api/v1/league/114',
                  'name': 'Cactus League'},
 'springVenue': {'id': 2500, 'link': '/api/v1/venues/2500'},
 'teamCode': 'ana',
 'teamName': 'Angels',
 'venue': {'id': 1, 'link': '/api/v1/venues/1', 'name': 'Angel Stadium'}}

--- MLB Team 2 ---
{'ab

In [14]:
# STEP 6 — Extract useful fields (with division + league)

clean_teams = []

for team in mlb_teams:
    clean_team = {
        "id": team["id"],                           # unique team ID
        "name": team["name"],                       # full team name
        "abbreviation": team["abbreviation"],       # short code (e.g., NYY)
        "division": team.get("division", {}).get("name"),  # nested dict -> name
        "league": team.get("league", {}).get("name"),      # nested dict -> name
        "location": team["locationName"],           # city/location
        "first_year": team["firstYearOfPlay"]       # year franchise started
    }
    clean_teams.append(clean_team)

# Peek at the first few results
for ct in clean_teams[:5]:
    print(ct)


{'id': 108, 'name': 'Los Angeles Angels', 'abbreviation': 'LAA', 'division': 'American League West', 'league': 'American League', 'location': 'Anaheim', 'first_year': '1961'}
{'id': 109, 'name': 'Arizona Diamondbacks', 'abbreviation': 'AZ', 'division': 'National League West', 'league': 'National League', 'location': 'Phoenix', 'first_year': '1996'}
{'id': 110, 'name': 'Baltimore Orioles', 'abbreviation': 'BAL', 'division': 'American League East', 'league': 'American League', 'location': 'Baltimore', 'first_year': '1901'}
{'id': 111, 'name': 'Boston Red Sox', 'abbreviation': 'BOS', 'division': 'American League East', 'league': 'American League', 'location': 'Boston', 'first_year': '1901'}
{'id': 112, 'name': 'Chicago Cubs', 'abbreviation': 'CHC', 'division': 'National League Central', 'league': 'National League', 'location': 'Chicago', 'first_year': '1874'}


In [15]:
# STEP 7 — Convert clean_teams into a DataFrame for a SILVER level clean

import pandas as pd

df_teams = pd.DataFrame(clean_teams)

# Show the shape and first few rows
print("Shape:", df_teams.shape)
df_teams.head()


Shape: (30, 7)


Unnamed: 0,id,name,abbreviation,division,league,location,first_year
0,108,Los Angeles Angels,LAA,American League West,American League,Anaheim,1961
1,109,Arizona Diamondbacks,AZ,National League West,National League,Phoenix,1996
2,110,Baltimore Orioles,BAL,American League East,American League,Baltimore,1901
3,111,Boston Red Sox,BOS,American League East,American League,Boston,1901
4,112,Chicago Cubs,CHC,National League Central,National League,Chicago,1874
