In [1]:
import sqlite3

In [2]:
START_YEAR = 2005
CURRENT_SEASON = 2024
SPORT_IDS = [1,11,12,13,14,15,16]
MONTHS = [4,5,6,7,8,9] # March folded into April, October into September

In [3]:
db = sqlite3.connect("BaseballStats.db")
cursor = db.cursor()

Get all League Factors

In [4]:
LeagueFactors = {}
lfData = cursor.execute("SELECT LeagueId, Year, RunFactor, HRFactor FROM League_Factors").fetchall()
for league, year, rFac, hrFac in lfData:
    if not league in LeagueFactors.keys():
        LeagueFactors[league] = {}
    LeagueFactors[league][year] = {"RunFactor" : rFac, "HRFactor" : hrFac}

Get all Park factors, adjusted by league factor

In [5]:
ParkFactors = {}
pfData = cursor.execute("SELECT TeamId, LeagueId, Year, RunFactor, HRFactor FROM Park_Factors").fetchall()
for team, league, year, rFac, hrFac in pfData:
    if not team in ParkFactors:
        ParkFactors[team] = {}
    ParkFactors[team][year] = {"RunFactor" : rFac * LeagueFactors[league][year]["RunFactor"], "HRFactor" : hrFac * LeagueFactors[league][year]["HRFactor"]}

Generate Month Statistics

In [6]:
from tqdm import tqdm

In [7]:
db.rollback()
cursor = db.cursor()
cursor.execute("DELETE FROM Player_Pitcher_MonthStats")
db.commit()
cursor = db.cursor()

PROGRESS_TOTAL = 1000
progressBar = tqdm(total=PROGRESS_TOTAL)

for i, year in enumerate(range(START_YEAR, CURRENT_SEASON + 1)):
    playerLevels = cursor.execute(f"SELECT DISTINCT mlbId, Level FROM Player_Pitcher_GameLog WHERE Year='{year}'").fetchall()
    dbData = []
    for j, (mlbId, level) in enumerate(playerLevels):
        for month in MONTHS:
            if month == 4:
                gameLogs = cursor.execute(f'SELECT battersFaced,outs,go,ao,r,er,h,k,bb,hbp,"2B","3B",HR,HomeTeamId FROM Player_Pitcher_GameLog WHERE mlbId=? AND Year=? AND Month<=? AND Level=?', (mlbId, year, 4, level)).fetchall()
            elif month == 8 and level == 16:
                gameLogs = cursor.execute(f'SELECT battersFaced,outs,go,ao,r,er,h,k,bb,hbp,"2B","3B",HR,HomeTeamId FROM Player_Pitcher_GameLog WHERE mlbId=? AND Year=? AND Month>=? AND Level=?', (mlbId, year, 8, level)).fetchall()
            elif month > 8 and level == 16: # Rookie ball has few games after this month, roll september into august
                continue
            elif month == 9:
                gameLogs = cursor.execute(f'SELECT battersFaced,outs,go,ao,r,er,h,k,bb,hbp,"2B","3B",HR,HomeTeamId FROM Player_Pitcher_GameLog WHERE mlbId=? AND Year=? AND Month>=? AND Level=?', (mlbId, year, 9, level)).fetchall()
            else:
                gameLogs = cursor.execute(f'SELECT battersFaced,outs,go,ao,r,er,h,k,bb,hbp,"2B","3B",HR,HomeTeamId FROM Player_Pitcher_GameLog WHERE mlbId=? AND Year=? AND Month=? AND Level=?', (mlbId, year, month, level)).fetchall()
            
            if len(gameLogs) == 0:
                continue
            totalH = 0
            total2B = 0
            total3B = 0
            totalHR = 0
            totalK = 0
            totalBB = 0
            totalHBP = 0
            totalBF = 0
            totalOuts = 0
            totalGO = 0
            totalAO = 0
            totalR = 0
            totalER = 0
            totalRunFactor = 0
            totalHRFactor = 0
            
            for battersFaced, outs, go, ao, r, er, h, k, bb, hbp, doubles, triples, hr, homeTeamId in gameLogs:
                totalH += h
                total2B += doubles
                total3B += triples
                totalHR += hr
                totalK += k
                totalBB += bb
                totalHBP += hbp
                totalBF += battersFaced
                totalOuts += outs
                totalGO += go
                totalAO += ao
                totalR += r
                totalER += er
                    
                try:
                    totalRunFactor += battersFaced * ParkFactors[homeTeamId][year]["RunFactor"]
                    totalHRFactor += battersFaced * ParkFactors[homeTeamId][year]["HRFactor"]
                except: # Not enough data on this park
                    totalRunFactor += battersFaced
                    totalHRFactor += battersFaced
            
            if totalBF > 0:
                totalRunFactor /= totalBF
                totalHRFactor /= totalBF
            else:
                totalRunFactor = 1
                totalHRFactor = 1
            dbData.append((mlbId, year, month, level, totalBF, totalOuts, totalGO, totalAO, totalR, totalER, totalH, totalK, totalBB, totalHBP, total2B, total3B, totalHR, totalRunFactor, totalHRFactor))
    
        progressBar.n = int(PROGRESS_TOTAL * (i + j / len(playerLevels)) / len(range(START_YEAR, CURRENT_SEASON + 1)))
        progressBar.last_print_n = progressBar.n
        progressBar.refresh()
    
    cursor.execute("BEGIN TRANSACTION")
    cursor.executemany("INSERT INTO Player_Pitcher_MonthStats VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", dbData)
    cursor.execute("END TRANSACTION")
    db.commit()
    cursor = db.cursor()

100%|█████████▉| 999/1000 [01:14<00:00, 13.40it/s]