In [1]:
# Goal:
# Which players steal the most bases?
# When do stolen bases happen?
# Scan for players that will help win this category.
# Compare matchup teams.

In [1]:
import configparser

config = configparser.ConfigParser(interpolation=None)
config.read("config.ini")

['config.ini']

In [2]:
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
from datetime import datetime, timedelta, date
from fantasy_baseball.functions import read_csv, write_csv

TODAY = date.today()
BB_DATA_LOCATION = config['BASEBALL']['BB_DATA_LOCATION']
ESPN_HEADERS = {'Connection': 'keep-alive',
                'Accept': 'application/json',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
                    # 'x-fantasy-filter': '{"filterActive":{"value":true}}',
                    # 'x-fantasy-platform': 'kona-PROD-e812308c9c808f6b21dfc8c9d51d49c6ccb9f2ee',
                    # 'x-fantasy-source': 'kona'
}
YEARS = list(range(2024, 2016, -1))
PAGE_START = list(range(0, 500, 50))
FILTER_TYPE = ['gamesPlayed', 'atBats', 'runs', 'hits', 'homeRuns', 'RBIs', 'stolenBases','caughtStealing']
KEY_DCT =  {
    0: 'RANK',
    1: 'PLAYER',
    2: 'YRS',
    3: 'G',
    4: 'AB',
    5: 'R',
    6: 'H',
    7: '2B',
    8: '3B',
    9: 'HR',
    10: 'RBI',
    11: 'BB',
    12: 'SO',
    13: 'SB',
    14: 'CS',
    15: 'BA',
    16: 'YEAR',
    17: 'FILTER'}

In [3]:
lst = []
for year in YEARS:
    for filter in FILTER_TYPE:
        for start in PAGE_START:
            time.sleep(0.5)
            url = f'https://www.espn.com/mlb/history/leaders/_/breakdown/season/year/{year}/sort/{filter}/start/{start}'
            response = requests.get(url, headers=ESPN_HEADERS)
            soup = BeautifulSoup(response.text, 'html.parser')
            table = soup.find('table', attrs={'class': 'tablehead'})
            if table:
                df = pd.read_html(StringIO(str(table)))[0].to_dict('records')[1:]
                if len(df) > 0:
                    for i in df:
                        i[16] = year
                        i[17] = filter
                    lst.extend(df)
                elif len(df) < 50:
                    break
            else:
                print(f'Table not found on the webpage: ... ({year}) ... ({filter}) ... ({start})')
            print(f'Data collected for: ... ({year}, {filter}, {start})')

clean_lst = []
for i in lst:
    if i[1] != 'PLAYER':
        clean_lst.append({KEY_DCT[k]: v for k, v in i.items()})

Data collected for: ... (2024, gamesPlayed, 0)
Data collected for: ... (2024, gamesPlayed, 50)
Data collected for: ... (2024, gamesPlayed, 100)
Data collected for: ... (2024, gamesPlayed, 150)
Data collected for: ... (2024, gamesPlayed, 200)
Data collected for: ... (2024, gamesPlayed, 250)
Data collected for: ... (2024, gamesPlayed, 300)
Data collected for: ... (2024, gamesPlayed, 350)
Data collected for: ... (2024, gamesPlayed, 400)
Data collected for: ... (2024, gamesPlayed, 450)
Data collected for: ... (2024, atBats, 0)
Data collected for: ... (2024, atBats, 50)
Data collected for: ... (2024, atBats, 100)
Data collected for: ... (2024, atBats, 150)
Data collected for: ... (2024, atBats, 200)
Data collected for: ... (2024, atBats, 250)
Data collected for: ... (2024, atBats, 300)
Data collected for: ... (2024, atBats, 350)
Data collected for: ... (2024, atBats, 400)
Data collected for: ... (2024, atBats, 450)
Data collected for: ... (2024, runs, 0)
Data collected for: ... (2024, runs,

In [4]:
write_csv(file_path=BB_DATA_LOCATION + 'espn_player_stat_leaders.csv', data=clean_lst)