# Import Packages

In [2]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd

# Extracting Frangraphs data (Player Detail - Batting/Pitching)

In [25]:
def fetch_player_stats_batpit(first_name: str, last_name: str, playerid: str) -> pd.DataFrame:
    """
    Fetch seasonal statistics for a given player from Fangraphs and return them as a DataFrame.

    Args:
        first_name: Player's first name in lowercase (e.g. "munetaka")
        last_name:  Player's last name in lowercase (e.g. "murakami")
        playerid:   Player identifier on Fangraphs (e.g. "sa3063258")

    Returns:
        pandas.DataFrame: DataFrame containing the player's season-by-season stats.
    """
    # 1. Build the URL and fetch the page
    url = f'https://www.fangraphs.com/players/{first_name}-{last_name}/{playerid}/stats'
    res = requests.get(url)
    res.raise_for_status()

    # 2. Parse the HTML and extract the __NEXT_DATA__ JSON block
    soup = BeautifulSoup(res.text, 'html.parser')
    script = soup.find('script', id='__NEXT_DATA__')
    data = json.loads(script.string)

    # 3. Retrieve the list of per-season records from the JSON
    records = data['props']['pageProps']['dataStats']['data']

    # 4. Normalize the records into a pandas DataFrame
    df = pd.json_normalize(records)
    return df

In [26]:
# Define the first name, last name and playerID of the target player
first_name = "munetaka"
last_name = "murakami"
playerid = "sa3063258"

# Fetch data (and delete the field "Season" and "Team")
df_player_batpit = fetch_player_stats_batpit(first_name, last_name, playerid)
df_player_batpit = df_player_batpit.drop(columns=['Season', 'Team'])

# Display the first 5 records of the dataframe
df_player_batpit.head()

Unnamed: 0,ateam,AbbName,AbbLevel,Age,G,AB,PA,H,1B,2B,...,Hard%,CStr%,C+SwStr%,wBsR,type,aseason,leagueUrl,teamId,sortSeason,sortType
0,Swallows,YAK,NPB,18,6,12,14,1,0,0,...,0.142857,0.118644,0.338983,-0.00419,-2000,2018,all,8,2018,-2000
1,Swallows,YAK,NPB,19,143,511,593,118,62,20,...,0.30303,0.157649,0.304399,-0.747317,-2000,2019,all,8,2019,-2000
2,Swallows,YAK,NPB,20,120,424,515,130,70,30,...,0.33657,0.162631,0.283691,-0.062412,-2000,2020,all,8,2020,-2000
3,Swallows,YAK,NPB,21,143,500,615,139,73,27,...,0.369565,0.14826,0.26059,-0.419538,-2000,2021,all,8,2021,-2000
4,Swallows,YAK,NPB,22,141,487,612,155,77,21,...,0.37883,0.151127,0.27972,-0.292108,-2000,2022,all,8,2022,-2000


# Extracting Frangraphs data (Player Detail - Fielding)

In [22]:
def fetch_player_stats_fielding(first_name: str, last_name: str, playerid: str) -> pd.DataFrame:
    """
    Fetch seasonal statistics for a given player from Fangraphs and return them as a DataFrame.

    Args:
        first_name: Player's first name in lowercase (e.g. "munetaka")
        last_name:  Player's last name in lowercase (e.g. "murakami")
        playerid:   Player identifier on Fangraphs (e.g. "sa3063258")

    Returns:
        pandas.DataFrame: DataFrame containing the player's season-by-season stats.
    """
    # 1. Build the URL and fetch the page
    url = f'https://www.fangraphs.com/players/{first_name}-{last_name}/{playerid}/stats'
    res = requests.get(url)
    res.raise_for_status()

    # 2. Parse the HTML and extract the __NEXT_DATA__ JSON block
    soup = BeautifulSoup(res.text, 'html.parser')
    script = soup.find('script', id='__NEXT_DATA__')
    data = json.loads(script.string)

    # 3. Retrieve the list of per-season records from the JSON
    records = data['props']['pageProps']['dataStats']['fielding']

    # 4. Normalize the records into a pandas DataFrame
    df = pd.json_normalize(records)
    return df

In [24]:
# Define the first name, last name and playerID of the target player
first_name = "munetaka"
last_name = "murakami"
playerid = "sa3063258"

# Fetch data (and delete the field "Season" and "Team")
df_player_fielding = fetch_player_stats_fielding(first_name, last_name, playerid)
df_player_fielding = df_player_fielding.drop(columns=['Season', 'Team'])

# Display the first 5 records of the dataframe
df_player_fielding.head()

Unnamed: 0,ateam,AbbName,AbbLevel,Pos,apos,G,GS,Inn,PO,A,E,DP,SB,CS,PB,posnum,Type,aseason,teamId
0,Swallows (NPB),YAK,NPB,1B,1B,124,117,1024.0,950,60,5,92,0,0,0,3,-2000,2019,8
1,Swallows (NPB),YAK,NPB,1B,1B,94,71,655.333313,639,50,6,49,0,0,0,3,-2000,2020,8
2,Swallows (NPB),YAK,NPB,1B,1B,19,7,76.333336,75,8,0,7,0,0,0,3,-2000,2021,8
3,Swallows (NPB),YAK,NPB,1B,1B,3,0,5.0,4,1,0,0,0,0,0,3,-2000,2023,8
4,Swallows (NPB),YAK,NPB,1B,1B,15,1,23.333332,24,1,0,4,0,0,0,3,-2000,2024,8
