In [1]:
import statsapi   # import first

# Get the teams data
teams_data = statsapi.get("teams")

print("Type:", type(teams_data))          # dict
print("Top-level keys:", list(teams_data.keys()))


Type: <class 'dict'>
Top-level keys: ['copyright', 'teams']


In [2]:
# Extract the "teams" list from the dict
teams_list = teams_data["teams"]

print("Type of teams_list:", type(teams_list))   # list
print("Number of teams:", len(teams_list))

# Peek at first team keys
first_team = teams_list[0]
print("First team type:", type(first_team))
print("Keys in first team:", list(first_team.keys()))


Type of teams_list: <class 'list'>
Number of teams: 790
First team type: <class 'dict'>
Keys in first team: ['allStarStatus', 'id', 'name', 'link', 'season', 'venue', 'teamCode', 'fileCode', 'abbreviation', 'teamName', 'locationName', 'firstYearOfPlay', 'league', 'sport', 'shortName', 'parentOrgName', 'parentOrgId', 'franchiseName', 'clubName', 'active']


In [3]:
mlb_teams = []  # start with an empty list

for t in teams_list:                       # loop through every team
    sport_dict = t.get("sport", {})        # safely get the nested "sport" dict
    sport_id = sport_dict.get("id")        # then get the "id" inside sport
    
    if sport_id == 1:                      # keep only Major League Baseball (id=1)
        mlb_teams.append(t)                # add this team to the new list


print("Number of MLB teams:", len(mlb_teams))
print("First 5 MLB teams:", [team["name"] for team in mlb_teams[:5]])

for team in mlb_teams[:5]:
    print(f"ID: {team['id']} | Name: {team['name']} | Abbreviation: {team['abbreviation']}")


Number of MLB teams: 30
First 5 MLB teams: ['Los Angeles Angels', 'Arizona Diamondbacks', 'Baltimore Orioles', 'Boston Red Sox', 'Chicago Cubs']
ID: 108 | Name: Los Angeles Angels | Abbreviation: LAA
ID: 109 | Name: Arizona Diamondbacks | Abbreviation: AZ
ID: 110 | Name: Baltimore Orioles | Abbreviation: BAL
ID: 111 | Name: Boston Red Sox | Abbreviation: BOS
ID: 112 | Name: Chicago Cubs | Abbreviation: CHC


In [4]:
from pprint import pprint

for i, team in enumerate(mlb_teams[:2], start=1):
    print(f"\n--- MLB Team {i} ---")
    pprint(team)



--- MLB Team 1 ---
{'abbreviation': 'LAA',
 'active': True,
 'allStarStatus': 'N',
 'clubName': 'Angels',
 'division': {'id': 200,
              'link': '/api/v1/divisions/200',
              'name': 'American League West'},
 'fileCode': 'ana',
 'firstYearOfPlay': '1961',
 'franchiseName': 'Los Angeles',
 'id': 108,
 'league': {'id': 103, 'link': '/api/v1/league/103', 'name': 'American League'},
 'link': '/api/v1/teams/108',
 'locationName': 'Anaheim',
 'name': 'Los Angeles Angels',
 'season': 2025,
 'shortName': 'LA Angels',
 'sport': {'id': 1,
           'link': '/api/v1/sports/1',
           'name': 'Major League Baseball'},
 'springLeague': {'abbreviation': 'CL',
                  'id': 114,
                  'link': '/api/v1/league/114',
                  'name': 'Cactus League'},
 'springVenue': {'id': 2500, 'link': '/api/v1/venues/2500'},
 'teamCode': 'ana',
 'teamName': 'Angels',
 'venue': {'id': 1, 'link': '/api/v1/venues/1', 'name': 'Angel Stadium'}}

--- MLB Team 2 ---
{'ab

In [5]:
clean_teams = []

for team in mlb_teams:
    clean_team = {
        "id": team["id"],
        "name": team["name"],
        "abbreviation": team["abbreviation"],
        "division": team.get("division", {}).get("name"),
        "league": team.get("league", {}).get("name"),
        "location": team["locationName"],
        "first_year": team["firstYearOfPlay"]
    }
    clean_teams.append(clean_team)

for ct in clean_teams[:5]:
    print(ct)


{'id': 108, 'name': 'Los Angeles Angels', 'abbreviation': 'LAA', 'division': 'American League West', 'league': 'American League', 'location': 'Anaheim', 'first_year': '1961'}
{'id': 109, 'name': 'Arizona Diamondbacks', 'abbreviation': 'AZ', 'division': 'National League West', 'league': 'National League', 'location': 'Phoenix', 'first_year': '1996'}
{'id': 110, 'name': 'Baltimore Orioles', 'abbreviation': 'BAL', 'division': 'American League East', 'league': 'American League', 'location': 'Baltimore', 'first_year': '1901'}
{'id': 111, 'name': 'Boston Red Sox', 'abbreviation': 'BOS', 'division': 'American League East', 'league': 'American League', 'location': 'Boston', 'first_year': '1901'}
{'id': 112, 'name': 'Chicago Cubs', 'abbreviation': 'CHC', 'division': 'National League Central', 'league': 'National League', 'location': 'Chicago', 'first_year': '1874'}


In [6]:
import pandas as pd

df_teams = pd.DataFrame(clean_teams)

print("Shape:", df_teams.shape)
df_teams.head()


Shape: (30, 7)


Unnamed: 0,id,name,abbreviation,division,league,location,first_year
0,108,Los Angeles Angels,LAA,American League West,American League,Anaheim,1961
1,109,Arizona Diamondbacks,AZ,National League West,National League,Phoenix,1996
2,110,Baltimore Orioles,BAL,American League East,American League,Baltimore,1901
3,111,Boston Red Sox,BOS,American League East,American League,Boston,1901
4,112,Chicago Cubs,CHC,National League Central,National League,Chicago,1874


In [7]:
import os
print("CWD:", os.getcwd())


CWD: C:\users\sneal\Dev\mlb-data-pipeline\MLB Silver Tables from Endpoints


In [8]:
import sqlite3
from datetime import datetime

# Add metadata timestamps
df_teams["created_at"] = datetime.now()
df_teams["last_updated"] = datetime.now()

# Rename columns for consistency
df_teams = df_teams.rename(columns={
    "id": "team_id",
    "name": "team_name",
    "abbreviation": "team_abbr",
    "division": "division_name",
    "league": "league_name",
    "location": "location_name"
})

# Write to SQLite
db_path = r"C:\Users\sneal\Dev\mlb-data-pipeline\mlb_data.db"
conn = sqlite3.connect(db_path)

df_teams.to_sql("teams_silver", conn, if_exists="replace", index=False)

conn.close()

print(f"✅ teams_silver written: {len(df_teams)} rows")
print(df_teams.head(3))


✅ teams_silver written: 30 rows
   team_id             team_name team_abbr         division_name  \
0      108    Los Angeles Angels       LAA  American League West   
1      109  Arizona Diamondbacks        AZ  National League West   
2      110     Baltimore Orioles       BAL  American League East   

       league_name location_name first_year                 created_at  \
0  American League       Anaheim       1961 2025-09-09 10:55:59.265766   
1  National League       Phoenix       1996 2025-09-09 10:55:59.265766   
2  American League     Baltimore       1901 2025-09-09 10:55:59.265766   

                last_updated  
0 2025-09-09 10:55:59.268304  
1 2025-09-09 10:55:59.268304  
2 2025-09-09 10:55:59.268304  
