Constant Parameters

In [1]:
START_YEAR = 2005
SPORT_IDS = [1,11,12,13,14,15,16]

Year Parameters to update

In [2]:
CURRENT_SEASON = 2024
ONLY_UPDATE_CURRENT_SEASON = True

Get List of Players to add to database

In [3]:
import requests
import sqlite3

In [4]:
db = sqlite3.connect("BaseballStats.db")
cursor = db.cursor()

In [None]:
db.rollback()
cursor = db.cursor()

for year in range(START_YEAR, CURRENT_SEASON + 1):
    if ONLY_UPDATE_CURRENT_SEASON and year != CURRENT_SEASON:
        continue
    
    for sportId in SPORT_IDS:
        playersToInsert = []
        for position in ["hitting","pitching"]:
            print(f"Getting players for Year={year} SportId={sportId} Position={position}")
            response = requests.get(f"https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?stitch_env=prod&season={year}&sportId={sportId}&stats=season&group={position}&gameType=R&limit=5000&offset=0&sortStat=homeRuns&order=desc")
            if response.status_code != 200:
                print(f"Code {response.status_code} for Year={year} and sportId={sportId} and position={position}")
                continue
            
            responseJson = response.json()
            jsonPlayers = responseJson['stats']
            for player in jsonPlayers:
                try:
                    if cursor.execute(f"SELECT COUNT(*) FROM Player WHERE mlbId='{player['playerId']}' AND position='{position}'").fetchone()[0] > 0:
                        continue
                    playersToInsert.append((player["playerId"], position))
                except:
                    pass
                
        cursor.execute("BEGIN TRANSACTION")
        cursor.executemany("INSERT INTO Player('mlbId','position') VALUES(?,?)", playersToInsert)
        cursor.execute("END TRANSACTION")
        db.commit()
        cursor = db.cursor()

Get Player Data for all players

In [8]:
from tqdm import tqdm
import threading

In [None]:
db.rollback()
cursor = db.cursor()

NUM_THREADS = 16
threadOutputs = [[]] * NUM_THREADS
threadCompleteCounts = [0] * NUM_THREADS

unsetPlayers = cursor.execute("SELECT mlbId, position FROM Player WHERE birthYear IS NULL").fetchall()


In [None]:
def ReadPlayer(mlbId, position, threadIdx):
    response = requests.get(f"https://statsapi.mlb.com/api/v1/people/{mlbId}?hydrate=currentTeam,team,stats(type=[yearByYear](team(league)),leagueListId=mlb_milb)&site=en")
    if response.status_code != 200:
        print(f"Status code {response.status_code} for {mlbId} {position}")
        return
    
    try:
        response = response.json()
        person = response["people"][0]
        useFirstName = person["useName"]
        useLastName = person["useLastName"]
        bats = person["batSide"]["code"]
        throws = person["pitchHand"]["code"]
        birthdateFormatted = person["birthDate"]
        birthYear, birthMonth, birthDate = birthdateFormatted.split("-")
        global threadOutputs
        threadOutputs[threadIdx].append((mlbId, position, useFirstName, useLastName, bats, throws, birthYear, birthMonth, birthDate))
        
    except Exception as e:
        print(f"Exception {e} for {mlbId}")
        return
    
def ReadPlayers(threadIdx):
    global unsetPlayers
    global threadCompleteCounts
    for mlbId, position in unsetPlayers[threadIdx * len(unsetPlayers) // NUM_THREADS : (threadIdx + 1) * len(unsetPlayers) // NUM_THREADS]:
        ReadPlayer(mlbId, position, threadIdx)
        threadCompleteCounts[threadIdx] += 1

In [None]:
threads = []
for i in range(NUM_THREADS):
    thread = threading.Thread(target=ReadPlayers, args=[i])
    threads.append(thread)
    thread.start()
    
progressBar = tqdm(total=len(unsetPlayers))

# Start progress bar
keepTimerRunning = True
def UpdateTimer():
    if keepTimerRunning:
        threading.Timer(5.0, UpdateTimer).start()
    
    count = 0
    global threadCompleteCounts
    for i in range(NUM_THREADS):
        count += threadCompleteCounts[i]
    
    global progressBar
    progressBar.n = count
    progressBar.last_print_n = progressBar.n
    progressBar.refresh()
    
UpdateTimer()

for thread in threads:
    thread.join()
    
keepTimerRunning = False

In [None]:
db.rollback()
cursor = db.cursor()
cursor.execute("BEGIN TRANSACTION")

for threadOutput in threadOutputs:
    for mlbId, position, useFirstName, useLastName, bats, throws, birthYear, birthMonth, birthDate in threadOutput:
    # for data in threadOutput:
        # cursor.execute(f"UPDATE Player SET birthYear='{birthYear}', birthMonth='{birthMonth}', birthDate='{birthDate}', bats='{bats}', throws='{throws}', useFirstName='{useFirstName}', useLastName='{useLastName}' WHERE mlbId='{mlbId}' AND position='{position}'")
        cursor.execute("UPDATE Player SET birthYear=?, birthMonth=?, birthDate=?, bats=?, throws=?, useFirstName=?,  useLastName=? WHERE mlbId=? AND position=?", (birthYear, birthMonth, birthDate, bats, throws, useFirstName, useLastName, mlbId, position))
cursor.execute("END TRANSACTION")
db.commit()

Calculate Draft Position

In [None]:
db.rollback()
cursor = db.cursor()
cursor.execute("BEGIN TRANSACTION")

for year in tqdm(range(2001, CURRENT_SEASON + 1), desc="Drafts"):
    response = requests.get(f"https://statsapi.mlb.com/api/v1/draft/{year}")
    json = response.json()
    for rounds in json["drafts"]["rounds"]:
        for pick in rounds["picks"]:
            try:
                draftPick = pick["pickNumber"]
                mlbId = pick["person"]["id"]
                cursor.execute("UPDATE Player SET draftPick=?, signingYear=?, signingMonth='7', signingDate='1' WHERE mlbId=?", (draftPick, year, mlbId))
            except:
                continue
    
cursor.execute("END TRANSACTION")
db.commit()