In [1]:
import sqlite3

In [2]:
START_YEAR = 2005
CURRENT_SEASON = 2024
SPORT_IDS = [1,11,12,13,14,15,16]
MONTHS = [4,5,6,7,8,9] # March folded into April, October into September

In [3]:
db = sqlite3.connect("BaseballStats.db")
cursor = db.cursor()

Get all League Factors

In [17]:
LeagueFactors = {}
lfData = cursor.execute("SELECT LeagueId, Year, RunFactor, HRFactor FROM League_Factors").fetchall()
for league, year, rFac, hrFac in lfData:
    if not league in LeagueFactors.keys():
        LeagueFactors[league] = {}
    LeagueFactors[league][year] = {"RunFactor" : rFac, "HRFactor" : hrFac}

Get all Park factors, adjusted by league factor

In [18]:
ParkFactors = {}
pfData = cursor.execute("SELECT TeamId, LeagueId, Year, RunFactor, HRFactor FROM Park_Factors").fetchall()
for team, league, year, rFac, hrFac in pfData:
    if not team in ParkFactors:
        ParkFactors[team] = {}
    ParkFactors[team][year] = {"RunFactor" : rFac * LeagueFactors[league][year]["RunFactor"], "HRFactor" : hrFac * LeagueFactors[league][year]["HRFactor"]}

Generate Month Statistics

In [19]:
from tqdm import tqdm

In [24]:
db.rollback()
cursor = db.cursor()

PROGRESS_TOTAL = 1000
progressBar = tqdm(total=PROGRESS_TOTAL)

for i, year in enumerate(range(START_YEAR, CURRENT_SEASON + 1)):
    if year != CURRENT_SEASON:
        if cursor.execute(f"SELECT COUNT(*) FROM Player_Hitter_MonthStats WHERE Year='{year}'").fetchone()[0] > 0:
            continue
        
    playerLevels = cursor.execute(f"SELECT DISTINCT mlbId, Level FROM Player_Hitter_GameLog WHERE Year='{year}'").fetchall()
    dbData = []
    for j, (mlbId, level) in enumerate(playerLevels):
        for month in MONTHS:
            if month == 4:
                gameLogs = cursor.execute(f'SELECT AB,H,"2B","3B",HR,K,BB,SB,CS,HBP,Position,HomeTeamId FROM Player_Hitter_GameLog WHERE mlbId=? AND Year=? AND Month<=? AND Level=?', (mlbId, year, 4, level)).fetchall()
            elif month == 9:
                gameLogs = cursor.execute(f'SELECT AB,H,"2B","3B",HR,K,BB,SB,CS,HBP,Position,HomeTeamId FROM Player_Hitter_GameLog WHERE mlbId=? AND Year=? AND Month>=? AND Level=?', (mlbId, year, 9, level)).fetchall()
            else:
                gameLogs = cursor.execute(f'SELECT AB,H,"2B","3B",HR,K,BB,SB,CS,HBP,Position,HomeTeamId FROM Player_Hitter_GameLog WHERE mlbId=? AND Year=? AND Month=? AND Level=?', (mlbId, year, month, level)).fetchall()
            
            if len(gameLogs) == 0:
                continue
            totalAb = 0
            totalH = 0
            total2B = 0
            total3B = 0
            totalHR = 0
            totalK = 0
            totalBB = 0
            totalHBP = 0
            totalSB = 0
            totalCS = 0
            totalPositions = [0] * 9
            totalRunFactor = 0
            totalHRFactor = 0
            
            for ab, h, doubles, triples, hr, k, bb, sb, cs, hbp, position, homeTeamId in gameLogs:
                totalAb += ab
                totalH += h
                total2B += doubles
                total3B += triples
                totalHR += hr
                totalK += k
                totalBB += bb
                totalHBP += hbp
                totalSB += sb
                totalCS += cs
                if position > 1 and position <= 9:
                    totalPositions[position - 2] += 1
                else:
                    totalPositions[-1] += 1
                    
                try:
                    totalRunFactor += ab * ParkFactors[homeTeamId][year]["RunFactor"]
                    totalHRFactor += ab * ParkFactors[homeTeamId][year]["HRFactor"]
                except: # Not enough data on this park
                    totalRunFactor += ab
                    totalHRFactor += ab
            
            if totalAb > 0:
                totalRunFactor /= totalAb
                totalHRFactor /= totalAb
            else:
                totalRunFactor = 1
                totalHRFactor = 1
            dbData.append((mlbId, year, month, level, totalAb, totalH, total2B, total3B, totalHR, totalK, totalBB, totalSB, totalCS, totalHBP, totalRunFactor, totalHRFactor, totalPositions[0], totalPositions[1], totalPositions[2], totalPositions[3], totalPositions[4], totalPositions[5], totalPositions[6], totalPositions[7], totalPositions[8]))
    
        progressBar.n = int(PROGRESS_TOTAL * (i + j / len(playerLevels)) / len(range(START_YEAR, CURRENT_SEASON + 1)))
        progressBar.last_print_n = progressBar.n
        progressBar.refresh()
    
    cursor.execute("BEGIN TRANSACTION")
    cursor.executemany("INSERT INTO Player_Hitter_MonthStats VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", dbData)
    cursor.execute("END TRANSACTION")
    db.commit()
    cursor = db.cursor()

  0%|          | 0/1000 [06:50<?, ?it/s]
  8%|▊         | 78/1000 [00:03<00:43, 21.17it/s]

KeyError: 2006

In [25]:
homeTeamId

635

In [26]:
year

2006