# LA Dodgers schedule
> This notebook downloads and processes the team's current schedule from [Major League Baseball](https://www.mlb.com/dodgers/schedule) and outputs the data to CSV, JSON and Parquet formats for later analysis and visualization.

---

#### Import Python tools and Jupyter config

In [32]:
import requests
import pandas as pd
import jupyter_black
from pandas import json_normalize
from datetime import datetime, timedelta

In [33]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Fetch

#### Function for date ranges in the request

In [35]:
def generate_date_ranges(start_date, end_date, delta_days=45):
    """Generate start and end dates for each API request within the allowed range."""
    current_date = start_date
    while current_date < end_date:
        yield (
            current_date,
            min(end_date, current_date + timedelta(days=delta_days - 1)),
        )
        current_date += timedelta(days=delta_days)

#### Setup for API requests

In [36]:
headers = {
    "sec-ch-ua": '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Referer": "https://www.mlb.com/",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "sec-ch-ua-platform": '"macOS"',
}

In [37]:
base_url = "https://statsapi.mlb.com/api/v1/schedule"

In [38]:
# Define the season's date range
season_start = datetime(2024, 4, 1)
season_end = datetime(2024, 9, 30)

In [39]:
# Collect data for all date ranges
all_games = []
for start, end in generate_date_ranges(season_start, season_end):
    params = {
        "lang": "en",
        "sportIds": "1",
        "hydrate": "team(venue(timezone)),venue(timezone),game(seriesStatus,seriesSummary,tickets,promotions,sponsorships,content(summary,media(epg))),seriesStatus,seriesSummary,broadcasts(all),linescore,tickets,event(tickets,game,sport,league,status,xref),radioBroadcasts",
        "season": "2024",
        "startDate": start.strftime("%Y-%m-%d"),
        "endDate": end.strftime("%Y-%m-%d"),
        "teamId": "119",  # LA Dodgers teamId
        "timeZone": "America/Los_Angeles",
        "eventTypes": "primary",
        "scheduleTypes": "games,events,xref",
    }
    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        games = response.json().get("dates", [])
        all_games.extend(games)
    else:
        print(f"Failed to fetch data for range {start} to {end}")

In [40]:
# Optionally convert to DataFrame
games_df = pd.json_normalize(
    all_games, record_path=["games"], meta=["date"], errors="ignore"
)

In [41]:
import pandas as pd


# Sample JSON Data (simulating the response you might get from the API)
data = {
    "date": "2024-04-01",
    "games": [
        {
            "gamePk": 746170,
            "link": "/api/v1.1/game/746170/feed/live",
            "gameType": "R",
            "season": "2024",
            "gameDate": "2024-04-01T19:10:00-07:00",
            "status": {
                "abstractGameState": "Final",
                "codedGameState": "F",
                "detailedState": "Final",
                "statusCode": "F",
                "startTimeTBD": False,
                "abstractGameCode": "F"
            },
            "teams": {
                "away": {
                    "score": 3,
                    "team": {
                        "id": 137,
                        "name": "San Francisco Giants",
                        "link": "/api/v1/teams/137"
                    },
                    "isWinner": False,
                    "splitSquad": False,
                    "seriesNumber": 2
                },
                "home": {
                    "score": 8,
                    "team": {
                        "id": 119,
                        "name": "Los Angeles Dodgers",
                        "link": "/api/v1/teams/119"
                    },
                    "isWinner": True,
                    "splitSquad": False,
                    "seriesNumber": 3
                }
            },
            "venue": {
                "id": 22,
                "name": "Dodger Stadium",
                "link": "/api/v1/venues/22"
            }
        }
    ]
}

# Using json_normalize to convert JSON to DataFrame
games_data = json_normalize(
    data['games'],
    sep='_'
)

# Selecting and renaming the necessary columns
games_df = games_data[[
    'gameDate',
    'venue_name',
    'teams_home_team_name',
    'teams_away_team_name',
    'teams_home_score',
    'teams_away_score',
    'teams_home_isWinner'
]].copy()

games_df.columns = [
    'Game Date',
    'Venue',
    'Home Team',
    'Away Team',
    'Home Score',
    'Away Score',
    'Home Win'
]

# Convert 'Home Win' from boolean to string for clarity
games_df['Home Win'] = games_df['Home Win'].apply(lambda x: 'Win' if x else 'Lose')

# Convert gameDate to just the date part and local timezone adjustment if needed
games_df['Game Date'] = pd.to_datetime(games_df['Game Date']).dt.tz_localize('UTC').dt.tz_convert('America/Los_Angeles').dt.strftime('%Y-%m-%d %H:%M:%S')

print(games_df)


TypeError: Already tz-aware, use tz_convert to convert.

In [None]:
print(games_df.head())  # Print the first few rows to check

# Save to CSV
games_df.to_csv("dodgers_2024_schedule.csv", index=False)

---

## Export

#### Function to save dataframes with different formats and file extensions

In [23]:
def save_dataframe(df, path_without_extension, formats):
    """
    Save DataFrames in multiple formats.
    """
    for file_format in formats:
        if file_format == "csv":
            df.to_csv(f"{path_without_extension}.{file_format}", index=False)
        elif file_format == "json":
            df.to_json(
                f"{path_without_extension}.{file_format}", indent=4, orient="records"
            )
        elif file_format == "parquet":
            df.to_parquet(f"{path_without_extension}.{file_format}", index=False)
        else:
            print(f"Unsupported format: {file_format}")

In [24]:
formats = ["csv", "json", "parquet"]
save_dataframe(
    players_full_df,
    f"../data/batting/dodgers_player_batting_1958_present",
    formats,
)
save_dataframe(
    team_full_df, f"../data/batting/dodgers_team_batting_1958_present", formats
)
save_dataframe(
    team_ranks_full_df,
    f"../data/batting/dodgers_team_batting_ranks_1958_present",
    formats,
)