In [4]:
START_YEAR = 2005
CURRENT_YEAR = 2024

In [2]:
import sqlite3
from tqdm import tqdm

In [3]:
db = sqlite3.connect("BaseballStats.db")

In [26]:
db.rollback()
cursor = db.cursor()
cursor.execute("BEGIN TRANSACTION")
cursor.execute("DELETE FROM Model_Players")

data = cursor.execute(f'''
                      SELECT DISTINCT(pcs.mlbId), pcs.position, pcs.agedOut, pcs.serviceEndYear, pre.year, pre.month, psl.year, p.birthYear, p.birthMonth, p.birthDate, p.signingYear
                      FROM Player_CareerStatus AS pcs
                      LEFT JOIN Player_RookieEligibility AS pre ON pcs.mlbId = pre.mlbId
                      LEFT JOIN Player_ServiceLapse AS psl ON pcs.mlbId = psl.mlbId
                      INNER JOIN Player AS p ON pcs.mlbId = p.mlbId
                      WHERE pcs.isPrimaryPosition='1'
                      AND pcs.careerStartYear>='{START_YEAR}'
                      AND (
                          pcs.serviceEndYear IS NOT NULL
                          OR pcs.agedOut>'0'
                          OR psl.year IS NOT NULL
                      )
                      AND pcs.ignorePlayer IS NULL
                      AND p.birthYear IS NOT NULL
                      AND p.birthMonth IS NOT NULL
                      AND p.birthDate IS NOT NULL
                      ''').fetchall()

for id, position, agedOut, serviceEndYear, rookieYear, rookieMonth, serviceLapseYear, birthYear, birthMonth, birthDate, signingYear in tqdm(data):
    # Positions
    if position == "hitting":
        isHitter = 1
        isPitcher = 0
    else:
        isHitter = 0
        isPitcher = 1
        
    # Determine last MLB Season
    if agedOut is not None and agedOut!= 0:
        lastMLBSeason = agedOut
    elif serviceLapseYear is not None:
        lastMLBSeason = serviceLapseYear
    elif serviceEndYear is not None:
        lastMLBSeason = serviceEndYear
    else:
        raise Exception(f"No valid last MLB season for id={id}")
    
    # Last prospect year/month
    if agedOut is not None and agedOut != 0:
        lastProspectYear = agedOut
        lastProspectMonth = 13
    elif rookieYear is not None and rookieMonth is not None:
        lastProspectYear = rookieYear
        lastProspectMonth = rookieMonth
    elif serviceLapseYear is not None:
        lastProspectYear = serviceLapseYear
        lastProspectMonth = 13
    else:
        raise Exception(f"No valid last prospect season for id={id}")
    
    # Age at signing
    # Use 07/01/SigningYear for signing date.  Should try to get better data for this
    signingYear += 0.5
    signingAge = signingYear - birthYear - (birthMonth - 1) / 12 - (birthDate - 1) / 365
    if signingAge >= 27: # Player will immediately be ineligible, so discard
        continue
    
    cursor.execute("INSERT INTO Model_Players VALUES(?,?,?,?,?,?,?)", (id, isHitter, isPitcher, lastProspectYear, lastProspectMonth, lastMLBSeason, signingAge))

cursor.execute("END TRANSACTION")
db.commit()

100%|██████████| 23715/23715 [00:00<00:00, 613420.16it/s]
