In [None]:
import requests
import pandas as pd
import datetime as dt
from google.colab import userdata
import time

## Extract, Process, and Transform the Data

In [None]:
MLB_API = userdata.get('MLB_API')

#### Find Dodgers Team ID

In [None]:
url = f"https://api.sportsdata.io/v3/mlb/scores/json/teams?key={MLB_API}"
response = requests.get(url)
data = response.json()

In [None]:
for team in data:
    print(f"Team: {team['Name']} | ID: {team['TeamID']} | Key: {team['Key']}")

Team: Dodgers | ID: 1 | Key: LAD
Team: Reds | ID: 2 | Key: CIN
Team: Blue Jays | ID: 3 | Key: TOR
Team: Pirates | ID: 4 | Key: PIT
Team: Royals | ID: 5 | Key: KC
Team: Cubs | ID: 9 | Key: CHC
Team: Guardians | ID: 10 | Key: CLE
Team: Rays | ID: 11 | Key: TB
Team: Phillies | ID: 12 | Key: PHI
Team: Mariners | ID: 13 | Key: SEA
Team: Diamondbacks | ID: 14 | Key: ARI
Team: Giants | ID: 15 | Key: SF
Team: White Sox | ID: 16 | Key: CHW
Team: Tigers | ID: 17 | Key: DET
Team: Mets | ID: 18 | Key: NYM
Team: Orioles | ID: 19 | Key: BAL
Team: Twins | ID: 20 | Key: MIN
Team: Angels | ID: 21 | Key: LAA
Team: Marlins | ID: 22 | Key: MIA
Team: Rockies | ID: 23 | Key: COL
Team: Athletics | ID: 24 | Key: ATH
Team: Red Sox | ID: 25 | Key: BOS
Team: Braves | ID: 26 | Key: ATL
Team: Rangers | ID: 28 | Key: TEX
Team: Yankees | ID: 29 | Key: NYY
Team: Astros | ID: 30 | Key: HOU
Team: Cardinals | ID: 31 | Key: STL
Team: Brewers | ID: 32 | Key: MIL
Team: Padres | ID: 33 | Key: SD
Team: Nationals | ID: 35 | K

In [None]:
Dodgers_Team_ID = 1
Dodgers_Team_Key = "LAD"

### Dodgers Team Roster

In [None]:
class DodgersRoster:
    def __init__(self, api_key):
        self.api_key = MLB_API

    def get_roster(self):
        roster_url = f"https://api.sportsdata.io/v3/mlb/scores/json/PlayersBasic/LAD?key={self.api_key}"
        roster_response = requests.get(roster_url)
        roster_data = roster_response.json()

        dodgers_players = []

        for player in roster_data:
            dodgers_players.append({
                "Status": player["Status"],
                "Player ID": player["PlayerID"],
                "First Name": player["FirstName"],
                "Last Name": player["LastName"],
                "Birth Date": player["BirthDate"],
                "Birth Country": player["BirthCountry"],
                "Height": player["Height"],
                "Weight": player["Weight"],
                "Jersey": player["Jersey"],
                "Position": player["Position"],
                "Bat Hand": player["BatHand"],
                "Throw Hand": player["ThrowHand"]
            })

        return self.modify_roster(dodgers_players)

    def modify_roster(self, dodgers_players):
        dodgers_players = pd.DataFrame(dodgers_players)

        dodgers_players["Birth Date"] = pd.to_datetime(dodgers_players["Birth Date"])
        today = dt.date.today()

        # Generate age of the players
        dodgers_players["Age"] = dodgers_players["Birth Date"].apply(
            lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)) if pd.notnull(x) else pd.NA
        ).astype("Int64")

        # Convert Jersey number from float to integer
        dodgers_players["Jersey"] = dodgers_players["Jersey"].astype("Int64")

        dodgers_players.rename(columns={"Jersey": "Jersey Number"}, inplace=True)

        # Convert the players height from inches to feet
        dodgers_players["Height"] = dodgers_players["Height"].apply(
            lambda x: f"{x // 12}'{x % 12}\"" if pd.notnull(x) else pd.NA
        )

        # Drop players from the dataset who are not active
        dodgers_players = dodgers_players[dodgers_players["Status"] == "Active"]

        # Reorder columns
        dodgers_players = dodgers_players[
            ["Status", "Player ID", "First Name", "Last Name", "Age", "Birth Country", "Height", "Weight", "Jersey Number", "Position", "Bat Hand", "Throw Hand"]
        ]

        return dodgers_players


In [None]:
dodgers_roster = DodgersRoster(api_key=MLB_API)
dodgers_players = dodgers_roster.get_roster()

In [None]:
dodgers_players.head()

Unnamed: 0,Status,Player ID,First Name,Last Name,Age,Birth Country,Height,Weight,Jersey Number,Position,Bat Hand,Throw Hand
1,Active,10000077,Freddie,Freeman,35,USA,"6'5""",220,5,1B,L,R
2,Active,10000176,Mookie,Betts,32,USA,"5'9""",180,50,SS,R,R
3,Active,10000618,Clayton,Kershaw,36,USA,"6'4""",225,22,SP,L,L
4,Active,10000637,Enrique,Hern치ndez,33,Puerto Rico,"5'11""",195,8,3B,R,R
5,Active,10000685,Miguel,Rojas,36,Venezuela,"6'0""",188,72,SS,R,R


### Dodgers Games Played in 2024

In [None]:
class DodgersGames:
    def __init__(self, api_key):
        self.api_key = api_key

    def get_games(self):
        last_season_url = f"https://api.sportsdata.io/v3/mlb/scores/json/Games/2024?key={self.api_key}"
        last_season_response = requests.get(last_season_url)
        last_season_data = last_season_response.json()

        dodgers_last_season = []

        for game_stats in last_season_data:
            dodgers_last_season.append({
                "Game ID": game_stats["GameID"],
                "Game Date": game_stats["GameEndDateTime"],
                "Pitching Team ID": game_stats["CurrentPitchingTeamID"],
                "Hitting Team ID": game_stats["CurrentHittingTeamID"],
                "Low Temp": game_stats["ForecastTempLow"],
                "High Temp": game_stats["ForecastTempHigh"],
                "Wind Chill": game_stats["ForecastWindChill"],
                "Wind Speed": game_stats["ForecastWindSpeed"],
                "Wind Direction": game_stats["ForecastWindDirection"]
            })

        return self.modify_games(dodgers_last_season)

    def modify_games(self, dodgers_last_season):
        dodgers_last_season = pd.DataFrame(dodgers_last_season)

        # Use Dodgers_Team_ID defined elsewhere
        global Dodgers_Team_ID  # Ensure it is accessible if defined elsewhere in your environment

        # Filter the DataFrame for games involving the Dodgers
        dodgers_last_season = dodgers_last_season[
            (dodgers_last_season["Pitching Team ID"] == Dodgers_Team_ID) |
            (dodgers_last_season["Hitting Team ID"] == Dodgers_Team_ID)
        ]

        # Format the Game Date
        dodgers_last_season["Game Date"] = pd.to_datetime(dodgers_last_season["Game Date"]).dt.strftime("%m-%d-%Y")

        # Convert weather data columns to integer
        dodgers_last_season[["Low Temp", "High Temp", "Wind Chill", "Wind Speed", "Wind Direction"]] = dodgers_last_season[[
            "Low Temp", "High Temp", "Wind Chill", "Wind Speed", "Wind Direction"]].astype(int)

        # Reset index
        dodgers_last_season = dodgers_last_season.reset_index(drop=True)

        return dodgers_last_season


In [None]:
games_played = DodgersGames(api_key=MLB_API)
dodgers_last_season = games_played.get_games()

In [None]:
dodgers_last_season.head()

Unnamed: 0,Game ID,Game Date,Pitching Team ID,Hitting Team ID,Low Temp,High Temp,Wind Chill,Wind Speed,Wind Direction
0,73694,03-21-2024,1.0,1.0,67,71,71,19,196
1,70824,04-05-2024,1.0,1.0,47,47,41,9,423
2,70853,04-07-2024,1.0,1.0,59,59,53,14,206
3,70895,04-10-2024,1.0,1.0,67,69,67,8,265
4,70921,04-13-2024,1.0,1.0,72,73,72,11,191


### Box Scores

In [None]:
class DodgersBoxScores:

  def __init__(self, api_key):
    self.api_key = api_key

  def get_box_scores(self):
    all_box_scores = []

    for index, row in dodgers_last_season.iterrows():
      game_id = row['Game ID']
      box_scores_url = f"https://api.sportsdata.io/v3/mlb/stats/json/BoxScoreFinal/{game_id}?key={self.api_key}"

      box_scores_response = requests.get(box_scores_url)

      if box_scores_response.status_code == 200:
          box_scores_data = box_scores_response.json()
          all_box_scores.append(box_scores_data)

      time.sleep(1)

    return self.modify_box_scores(all_box_scores)

  def modify_box_scores(self, all_box_scores):

    team_box_scores = []

    for box_scores_data in all_box_scores:
      if "PlayerGames" in box_scores_data:
          player_games = box_scores_data["PlayerGames"]

          for box_scores in player_games:
            team_box_scores.append({
                "Game ID": box_scores.get("GameID"),
                "Player ID": box_scores.get("PlayerID"),
                "Name": box_scores.get("Name"),
                "At Bats": box_scores.get("AtBats"),
                "Hits": box_scores.get("Hits"),
                "Home Runs": box_scores.get("HomeRuns"),
                "RBI": box_scores.get("RunsBattedIn"),
                "Walks": box_scores.get("Walks"),
                "Strikeouts": box_scores.get("Strikeouts"),
                "Doubles": box_scores.get("Doubles"),
                "Triples": box_scores.get("Triples"),
                "Earned Runs": box_scores.get("PitchingEarnedRuns"),
                "Innings Pitched": box_scores.get("InningsPitchedFull"),
                "Pitching Strikeouts": box_scores.get("PitchingStrikeouts"),
                "Pitching Walks": box_scores.get("PitchingWalks")
              })


    box_scores = pd.DataFrame(team_box_scores)

    return box_scores


In [None]:
box_scores = DodgersBoxScores(api_key=MLB_API)
box_scores = box_scores.get_box_scores()
dodgers_box_scores = box_scores.merge(dodgers_players[["Player ID"]], on="Player ID", how="inner")

In [None]:
dodgers_box_scores.head()

Unnamed: 0,Game ID,Player ID,Name,At Bats,Hits,Home Runs,RBI,Walks,Strikeouts,Doubles,Triples,Earned Runs,Innings Pitched,Pitching Strikeouts,Pitching Walks
0,73694,10000077,Freddie Freeman,4.5,1.1,0.0,0.0,2.3,2.3,0.0,0.0,0.0,0.0,0.0,0.0
1,73694,10000176,Mookie Betts,5.6,4.5,1.1,6.8,1.1,0.0,1.1,0.0,0.0,0.0,0.0,0.0
2,73694,10001955,Max Muncy,6.8,2.3,0.0,0.0,0.0,3.4,1.1,0.0,0.0,0.0,0.0,0.0
3,73694,10006072,Teoscar Hern치ndez,3.4,0.0,0.0,0.0,2.3,2.3,0.0,0.0,0.0,0.0,0.0,0.0
4,73694,10007125,Will Smith,6.8,4.5,0.0,2.3,0.0,1.1,1.1,0.0,0.0,0.0,0.0,0.0


## Calculate Player Stats

In [None]:
class PlayerStatsCalculator:

    def __init__(self, df):
        self.df = df
        self.dodgers_player_stats = self.group_players()

    def group_players(self):
        return self.df.groupby("Player ID").agg({
            "At Bats": "sum",
            "Hits": "sum",
            "Home Runs": "sum",
            "RBI": "sum",
            "Walks": "sum",
            "Strikeouts": "sum",
            "Doubles": "sum",
            "Triples": "sum",
            "Earned Runs": "sum",
            "Innings Pitched": "sum",
            "Pitching Strikeouts": "sum",
            "Pitching Walks": "sum"
        }).reset_index()

    def calculate_stats(self):
        self.dodgers_player_stats["Batting Average"] = (
            self.dodgers_player_stats["Hits"] / self.dodgers_player_stats["At Bats"]
        ).fillna(0).replace([float('inf'), float('-inf')], 0).round(1)

        # Calculate Slugging Percentage
        self.dodgers_player_stats["Slugging Percentage"] = (
            (self.dodgers_player_stats["Hits"] - self.dodgers_player_stats["Doubles"] -
             self.dodgers_player_stats["Triples"] - self.dodgers_player_stats["Home Runs"]) +
            (2 * self.dodgers_player_stats["Doubles"]) +
            (3 * self.dodgers_player_stats["Triples"]) +
            (4 * self.dodgers_player_stats["Home Runs"])
        ) / self.dodgers_player_stats["At Bats"]
        self.dodgers_player_stats["Slugging Percentage"] = (
            self.dodgers_player_stats["Slugging Percentage"]
        ).fillna(0).replace([float('inf'), float('-inf')], 0).round(1)

        # Calculate On-Base Percentage
        self.dodgers_player_stats["On-Base Percentage"] = (
            (self.dodgers_player_stats["Hits"] + self.dodgers_player_stats["Walks"]) /
            (self.dodgers_player_stats["At Bats"] + self.dodgers_player_stats["Walks"])
        ).fillna(0).replace([float('inf'), float('-inf')], 0).round(1)

        # Calculate OPS
        self.dodgers_player_stats["OPS"] = (
            self.dodgers_player_stats["On-Base Percentage"] + self.dodgers_player_stats["Slugging Percentage"]
        )

        # Calculate ERA for pitchers
        self.dodgers_player_stats["ERA"] = (
            (self.dodgers_player_stats["Earned Runs"] * 9) / self.dodgers_player_stats["Innings Pitched"]
        ).fillna(0).replace([float('inf'), float('-inf')], 0).round(1)

        # Returning the final stats
        return self.dodgers_player_stats


In [None]:
calculator = PlayerStatsCalculator(dodgers_box_scores)
dodgers_players_stats = calculator.calculate_stats()

In [None]:
dodgers_players_stats = calculator.calculate_stats()

In [None]:
dodgers_players_stats.head()

Unnamed: 0,Player ID,At Bats,Hits,Home Runs,RBI,Walks,Strikeouts,Doubles,Triples,Earned Runs,Innings Pitched,Pitching Strikeouts,Pitching Walks,Batting Average,Slugging Percentage,On-Base Percentage,OPS,ERA
0,10000077,264.9,62.7,2.9,16.8,37.4,57.5,11.8,0.0,0.0,0.0,0.0,0.0,0.237,0.314,0.331,0.645,0.0
1,10000176,231.4,67.4,7.3,33.2,17.5,34.8,14.6,1.0,0.0,0.0,0.0,0.0,0.291,0.458,0.341,0.799,0.0
2,10000618,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.3,10.4,10.3,4.0,0.0,0.0,0.0,0.0,6.32
3,10000637,156.7,28.8,3.5,14.9,7.5,31.0,3.9,0.0,4.8,3.8,0.0,0.0,0.184,0.276,0.221,0.497,11.37
4,10000685,135.1,30.7,3.5,11.7,16.0,18.7,6.2,0.0,0.0,0.6,0.0,0.0,0.227,0.351,0.309,0.66,0.0
