# LA Dodgers schedule
> This notebook downloads and processes the team's current schedule from [Major League Baseball](https://www.mlb.com/dodgers/schedule) and outputs the data to CSV, JSON and Parquet formats for later analysis and visualization.

---

#### Import Python tools and Jupyter config

In [1]:
import requests
import pandas as pd
import jupyter_black
from pandas import json_normalize
from datetime import datetime, timedelta

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Fetch

#### Function for date ranges in the request

In [3]:
def generate_date_ranges(start_date, end_date, delta_days=45):
    """Generate start and end dates for each API request within the allowed range."""
    current_date = start_date
    while current_date < end_date:
        yield (
            current_date,
            min(end_date, current_date + timedelta(days=delta_days - 1)),
        )
        current_date += timedelta(days=delta_days)

#### Setup for API requests

In [4]:
headers = {
    "sec-ch-ua": '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Referer": "https://www.mlb.com/",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "sec-ch-ua-platform": '"macOS"',
}

In [5]:
base_url = "https://statsapi.mlb.com/api/v1/schedule"

In [6]:
# Define the season's date range
season_start = datetime(2024, 4, 1)
season_end = datetime(2024, 9, 30)

In [None]:
# Collect data for all date ranges
all_games = []
for start, end in generate_date_ranges(season_start, season_end):
    params = {
        "lang": "en",
        "sportIds": "1",
        "hydrate": "team(venue(timezone)),venue(timezone),game(seriesStatus,seriesSummary,tickets,promotions,sponsorships,content(summary,media(epg))),seriesStatus,seriesSummary,broadcasts(all),linescore,tickets,event(tickets,game,sport,league,status,xref),radioBroadcasts",
        "season": "2024",
        "startDate": start.strftime("%Y-%m-%d"),
        "endDate": end.strftime("%Y-%m-%d"),
        "teamId": "119",  # LA Dodgers teamId
        "timeZone": "America/Los_Angeles",
        "eventTypes": "primary",
        "scheduleTypes": "games,events,xref",
    }
    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        games = response.json().get("dates", [])
        all_games.extend(games)
    else:
        print(f"Failed to fetch data for range {start} to {end}")

In [None]:
for game in games[:1]:
    games_data = json_normalize(game["games"], sep="_")

In [26]:
games_df = games_data[
    [
        "gameDate",
        "venue_name",
        "teams_home_team_name",
        "teams_away_team_name",
        "teams_home_score",
        "teams_away_score",
        "teams_home_isWinner",
    ]
].copy()

KeyError: "['teams_home_score', 'teams_away_score', 'teams_home_isWinner'] not in index"

In [None]:
    games_df = games_data[
        [
            "gameDate",
            "venue_name",
            "teams_home_team_name",
            "teams_away_team_name",
            "teams_home_score",
            "teams_away_score",
            "teams_home_isWinner",
        ]
    ].copy()

    games_df.columns = [
        "Game_Date",
        "Venue",
        "Home_Team",
        "Away_Team",
        "Home_Score",
        "Away_Score",
        "Home_Win",
    ]

    # Convert 'Home Win' from boolean to string for clarity
    games_df["Home_Win"] = games_df["Home_Win"].apply(lambda x: "Win" if x else "Lose")

    # Convert gameDate to just the date part and local timezone adjustment if needed
    games_df["Game_Date"] = (
        pd.to_datetime(games_df["Game_Date"])
        .dt.tz_convert("America/Los_Angeles")
        .dt.strftime("%Y-%m-%d %H:%M:%S")
    )

KeyError: "['teams_home_score', 'teams_away_score', 'teams_home_isWinner'] not in index"

In [21]:
games_df

Unnamed: 0,Game_Date,Venue,Home_Team,Away_Team,Home_Score,Away_Score,Home_Win
0,2024-04-01 19:10:00,Dodger Stadium,Los Angeles Dodgers,San Francisco Giants,8,3,Win


---

## Export

#### Function to save dataframes with different formats and file extensions