# LA Dodgers schedule
> This notebook downloads and processes the team's current schedule from [Major League Baseball](https://www.mlb.com/dodgers/schedule) and outputs the data to CSV, JSON and Parquet formats for later analysis and visualization.

---

#### Import Python tools and Jupyter config

In [2]:
import requests
import pandas as pd
import jupyter_black
from pandas import json_normalize
from datetime import datetime, timedelta

In [3]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Fetch

#### Function for date ranges in the request

In [4]:
def generate_date_ranges(start_date, end_date, delta_days=45):
    """Generate start and end dates for each API request within the allowed range."""
    current_date = start_date
    while current_date < end_date:
        yield (
            current_date,
            min(end_date, current_date + timedelta(days=delta_days - 1)),
        )
        current_date += timedelta(days=delta_days)

#### Setup for API requests

In [5]:
headers = {
    "sec-ch-ua": '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Referer": "https://www.mlb.com/",
    "sec-ch-ua-mobile": "?0",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "sec-ch-ua-platform": '"macOS"',
}

In [6]:
base_url = "https://statsapi.mlb.com/api/v1/schedule"

In [7]:
# Define the season's date range
season_start = datetime(2024, 4, 1)
season_end = datetime(2024, 9, 30)

In [8]:
# Collect data for all date ranges
all_games = []
for start, end in generate_date_ranges(season_start, season_end):
    params = {
        "lang": "en",
        "sportIds": "1",
        "hydrate": "team(venue(timezone)),venue(timezone),game(seriesStatus,seriesSummary,tickets,promotions,sponsorships,content(summary,media(epg))),seriesStatus,seriesSummary,broadcasts(all),linescore,tickets,event(tickets,game,sport,league,status,xref),radioBroadcasts",
        "season": "2024",
        "startDate": start.strftime("%Y-%m-%d"),
        "endDate": end.strftime("%Y-%m-%d"),
        "teamId": "119",  # LA Dodgers teamId
        "timeZone": "America/Los_Angeles",
        "eventTypes": "primary",
        "scheduleTypes": "games,events,xref",
    }
    response = requests.get(base_url, headers=headers, params=params)
    if response.status_code == 200:
        games = response.json().get("dates", [])
        all_games.extend(games)
    else:
        print(f"Failed to fetch data for range {start} to {end}")

In [15]:
games

[{'date': '2024-09-28',
  'totalItems': 1,
  'totalEvents': 0,
  'totalGames': 1,
  'totalGamesInProgress': 0,
  'games': [{'gamePk': 746499,
    'gameGuid': 'f49b8e0a-4e4b-4288-be2d-d15d03a30ac6',
    'link': '/api/v1.1/game/746499/feed/live',
    'gameType': 'R',
    'season': '2024',
    'gameDate': '2024-09-28T17:10:00-07:00',
    'officialDate': '2024-09-28',
    'status': {'abstractGameState': 'Preview',
     'codedGameState': 'S',
     'detailedState': 'Scheduled',
     'statusCode': 'S',
     'startTimeTBD': False,
     'abstractGameCode': 'P'},
    'teams': {'away': {'leagueRecord': {'wins': 29,
       'losses': 15,
       'pct': '.659'},
      'team': {'springLeague': {'id': 114,
        'name': 'Cactus League',
        'link': '/api/v1/league/114',
        'abbreviation': 'CL'},
       'allStarStatus': 'N',
       'id': 119,
       'name': 'Los Angeles Dodgers',
       'link': '/api/v1/teams/119',
       'season': 2024,
       'venue': {'id': 22,
        'name': 'Dodger Stad