In [None]:
from DBTypes import *
from DBEnums import *
import sqlite3
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
db = sqlite3.connect('../Db/BaseballStats.db')
cursor = db.cursor()

In [None]:
def colorMap(zone):
    if zone == 0:
        return "#000000"
    if zone == 1:
        return "#C6AEFF"
    if zone == 2:
        return "#BDD8FF"
    if zone == 3:
        return "#BDFFB2"
    if zone == 4:
        return "#0ADFDC"
    if zone == 5:
        return "#FFC8AE"
    if zone == 6:
        return "#FF9FBB"
    if zone == 7:
        return "#980000"
    if zone == 78:
        return "#984500"
    if zone == 8:
        return "#a77f03"
    if zone == 89:
        return "#0c8900"
    if zone == 9:
        return "#004b83"
    

In [None]:
leagues = cursor.execute("SELECT DISTINCT LeagueId FROM Player_Hitter_MonthStats ORDER BY LeagueId ASC").fetchall()

In [None]:
def IsGameSkewed(events : list[DB_GamePlayByPlay]) -> tuple[bool, int]:
    position_expected_xs = [125, 125, 160, 145, 90, 110, 60, 125, 190]
    position_cutoff_xs = [x + 30 for x in position_expected_xs]
    position_means = [0,0,0,0,0,0,0,0,0]
    for i in range(1, 10):
        zone_xs = [x.HitCoordX for x in events if x.HitZone == i]
        if len(zone_xs) == 0:
            position_means[i-1] = position_expected_xs[i-1]
            continue
        
        position_means[i-1] = sum(zone_xs) / len(zone_xs)
        
    num_exceeding_range = 0
    for i in range(9):
        if position_means[i] > position_cutoff_xs[i]:
            num_exceeding_range += 1
        
    return (num_exceeding_range >= 5, num_exceeding_range)

In [None]:
def RemoveImpossibleLocations(events : list[DB_GamePlayByPlay]) -> list[DB_GamePlayByPlay]:
    goodEvents = []
    for event in events:
        if event.HitCoordY + abs(event.HitCoordX - 125) < 230:
            goodEvents.append(event)
            
    return goodEvents

In [None]:
from sklearn.cluster import DBSCAN
from sklearn.ensemble import IsolationForest
import numpy as np
import random
import math

In [None]:
def GetHitAngle(hitEvent : DB_GamePlayByPlay) -> float:
    return math.atan2(hitEvent.HitCoordX - 125.42, 198.27 - hitEvent.HitCoordY)

In [None]:
def GetDistance(hitEvent : DB_GamePlayByPlay) -> float:
    return math.sqrt(math.pow(hitEvent.HitCoordX - 125.42, 2) + math.pow(198.27 - hitEvent.HitCoordY, 2))

In [None]:
hitEvents : list[DB_GamePlayByPlay] = DB_GamePlayByPlay.Select_From_DB(cursor, "WHERE HitZone IS NOT NULL AND HitHardness IS NOT NULL AND HitTrajectory=1 AND HitCoordX IS NOT NULL AND HitCoordY IS NOT NULL AND Year=? AND LeagueId=?", (2006, 113))
angles = [[] for i in range(9)]
distances = [[] for i in range(9)]
colors = [colorMap(i) for i in range(1, 10)]
for hitEvent in hitEvents:
    dist = GetDistance(hitEvent)
    distances[hitEvent.HitZone - 1].append(dist)
    
    if dist > 30:
        angle = GetHitAngle(hitEvent)
        if angle < math.pi / 2 and angle > -math.pi / 2:
            angles[hitEvent.HitZone - 1].append(angle)
    

plt.hist(angles, 30, color=colors, histtype='bar', stacked=True, range=(-math.pi/4*1.1, math.pi/4*1.1))
plt.show()

In [None]:
from scipy import signal, ndimage

In [None]:
year = 2006
league = 130

hitEvents : list[DB_GamePlayByPlay] = DB_GamePlayByPlay.Select_From_DB(cursor, "WHERE HitZone IS NOT NULL AND HitHardness IS NOT NULL AND HitTrajectory=2 AND HitCoordX IS NOT NULL AND HitCoordY IS NOT NULL AND Year=? AND LeagueId=?", (year, league))
angles = []
distances = []
colors = [colorMap(i) for i in range(1, 10)]

range_angles = (-math.pi/4*1.05, math.pi/4*1.05)
range_dists = (60,180)

angle_size = 32
dist_size = 22

angle_edges = np.linspace(range_angles[0], range_angles[1], angle_size + 1)
dists_edges = np.linspace(range_dists[0], range_dists[1], dist_size + 1)

Hits = np.zeros((angle_size,dist_size))
Outs = np.zeros((angle_size,dist_size))

for hitEvent in hitEvents:
    angle = GetHitAngle(hitEvent)
    if hitEvent.Result & PBP_Events.HR != 0:
        continue
    
    if angle < math.pi / 2 and angle > -math.pi / 2:
        angles.append(angle)
        dist = GetDistance(hitEvent)
        distances.append(dist)
        isHit = (hitEvent.Result & (PBP_Events.SINGLE | PBP_Events.DOUBLE | PBP_Events.TRIPLE | PBP_Events.HR)) != 0
        for m in range(len(angle_edges)-1):
            if angle < angle_edges[m] or angle > angle_edges[m + 1]:
                continue
            for n in range(len(dists_edges)-1):
                if dist < dists_edges[n] or dist > dists_edges[n + 1]:
                    continue
                
                if isHit:
                    Hits[m,n] += 1
                else:
                    Outs[m,n] += 1
    
    
fig, ax = plt.subplots(figsize=(18, 12))

kernel = ([[0.05, 0.2, 0.05],
           [0.2, 1, 0.2],
           [0.05, 0.2, 0.05]])

Hits = signal.convolve2d(Hits, kernel, mode='same', boundary='fill', fillvalue=0)
Outs = signal.convolve2d(Outs, kernel, mode='same', boundary='fill', fillvalue=0)

Probs = (Hits * 100) / (Hits + Outs)
Probs = np.nan_to_num(Probs, nan=100, posinf=100, neginf=100)

H, xedges, yedges, im = plt.hist2d(x=angles, y=distances, bins=(angle_size,dist_size), range=(range_angles, range_dists))

for i in range(len(xedges)-1):
    for j in range(len(yedges)-1):
        # Center of the bin
        x_center = (xedges[i] + xedges[i+1]) / 2
        y_center = (yedges[j] + yedges[j+1]) / 2
        
        count = H[i, j]
        prob = f"{Hits[i,j] * 100 / (Hits[i,j] + Outs[i,j]):.0f}%"
        if count > 0:                      # ‚Üê optional: hide zero counts
            ax.text(
                x_center, y_center, 
                f'{int(count)}\n{prob}', 
                ha='center', va='center',
                color='black' if count < H.max()/2 else 'white',  # good contrast
                fontsize=9,
                fontweight='bold' if count > 15 else 'normal'
            )

plt.show()

fig, ax = plt.subplots(figsize=(18, 12))

im = ax.imshow(
    Probs.T,
    cmap='viridis',           
    interpolation='nearest',  
    origin='upper'            
)
ax.invert_yaxis()

In [None]:
for league, in tqdm(leagues):
    years = cursor.execute(f"SELECT DISTINCT Year FROM Player_Hitter_MonthStats WHERE LeagueId={league}").fetchall()
    M = (len(years) + 3) // 4
    N = 4
    figs, axs = plt.subplots(M, N, sharex=False, sharey=False, figsize=(N * 4, M * 4), constrained_layout=True)
    for y, (year,) in enumerate(years):
        gameIds = cursor.execute(f"SELECT DISTINCT gameId FROM Player_Hitter_GameLog WHERE Year={year} AND LeagueId={league}").fetchall()
        nonSkewedHitEvents = []
        for game, in gameIds:
            hitEvents : list[DB_GamePlayByPlay] = DB_GamePlayByPlay.Select_From_DB(cursor, "WHERE HitZone IS NOT NULL AND HitHardness IS NOT NULL AND HitTrajectory IS NOT NULL AND HitCoordX IS NOT NULL AND HitCoordY IS NOT NULL AND GameId=?", (game,))
            hitEvents = [he for he in hitEvents if (he.Result & PBP_Events.HR) == 0 and he.HitTrajectory == PBP_HitTrajectory.Groundball]
            if not IsGameSkewed(hitEvents)[0]:
                nonSkewedHitEvents += RemoveImpossibleLocations(hitEvents)
                
        
        nonOutlierEvents = []
        for i in range(1, 10):
            selectedEvents = [e for e in nonSkewedHitEvents if e.HitZone == i]
                
            hit_event_x = [he.HitCoordX for he in selectedEvents]
            hit_event_y = [he.HitCoordY for he in selectedEvents]
            hit_event = [(he.HitCoordX, he.HitCoordY) for he in selectedEvents]
            hit_event_colors = [colorMap(he.HitZone) for he in selectedEvents]
            
            X = np.array(hit_event)
            clustering = DBSCAN(eps=20 if i <= 6 else 30, min_samples=len(hit_event) // 10).fit(X)
            for j in range(len(clustering.labels_)):
                if clustering.labels_[j] == 0:
                    nonOutlierEvents.append(selectedEvents[j])        
                
                
        random.shuffle(nonOutlierEvents)
        hit_event_x = [he.HitCoordX for he in nonOutlierEvents]
        hit_event_y = [he.HitCoordY for he in nonOutlierEvents]
        hit_event_colors = [colorMap(he.HitZone) for he in nonOutlierEvents]
        ax = axs[y // 4, y % 4]
        ax.set_aspect('equal')
    
        ax.set_xlim(0, 250)
        ax.set_ylim(250, 0)

        ax.scatter(hit_event_x, hit_event_y, s=1, c=hit_event_colors, edgecolors=hit_event_colors, zorder=10)

        ax.set_title(f"League={league}, Year={year}")
        
        
    plt.savefig(f"ModifiedSprayCharts/groundball-spray-{league}.png")

In [None]:
test_league = 123
test_year = 2012

#gameIds = cursor.execute(f"SELECT DISTINCT GameId FROM GamePlayByPlay WHERE Year={test_year} AND LeagueId={test_league} AND HitZone=6 AND HitCoordX>160").fetchall()
#gameIds = cursor.execute(f"SELECT DISTINCT GameId FROM GamePlayByPlay WHERE Year={test_year} AND LeagueId={test_league}").fetchall()
gameIds = cursor.execute(f"SELECT DISTINCT gameId FROM Player_Hitter_GameLog WHERE Year=2012 AND LeagueId=123").fetchall()
nonSkewedHitEvents = []
for game, in gameIds:
    hitEvents : list[DB_GamePlayByPlay] = DB_GamePlayByPlay.Select_From_DB(cursor, "WHERE HitZone IS NOT NULL AND HitHardness IS NOT NULL AND HitTrajectory IS NOT NULL AND HitCoordX IS NOT NULL AND HitCoordY IS NOT NULL AND GameId=?", (game,))
    hitEvents = [he for he in hitEvents if (he.Result & PBP_Events.HR) == 0]
    if not IsGameSkewed(hitEvents)[0]:
        nonSkewedHitEvents += RemoveImpossibleLocations(hitEvents)
        
nonOutlierEvents = []
for i in range(1, 10):
    selectedEvents = [e for e in nonSkewedHitEvents if e.HitZone == i]
        
    hit_event_x = [he.HitCoordX for he in selectedEvents]
    hit_event_y = [he.HitCoordY for he in selectedEvents]
    hit_event = [(he.HitCoordX, he.HitCoordY) for he in selectedEvents]
    hit_event_colors = [colorMap(he.HitZone) for he in selectedEvents]
    
    X = np.array(hit_event)
    clustering = DBSCAN(eps=20 if i <= 6 else 30, min_samples=len(hit_event) // 10).fit(X)
    for j in range(len(clustering.labels_)):
        if clustering.labels_[j] != 0:
            hit_event_colors[j] = "#000000"
        else:
            nonOutlierEvents.append(selectedEvents[j])
    fig, ax = plt.subplots()
    ax.set_xlim(0, 250)
    ax.set_ylim(250, 0)
    ax.scatter(hit_event_x, hit_event_y, s=1, c=hit_event_colors, edgecolors=hit_event_colors, zorder=10)
    plt.show()
    
random.shuffle(nonOutlierEvents)
hit_event_x = [he.HitCoordX for he in nonOutlierEvents]
hit_event_y = [he.HitCoordY for he in nonOutlierEvents]
hit_event_colors = [colorMap(he.HitZone) for he in nonOutlierEvents]
fig, ax = plt.subplots()
ax.set_xlim(0, 250)
ax.set_ylim(250, 0)
ax.scatter(hit_event_x, hit_event_y, s=1, c=hit_event_colors, edgecolors=hit_event_colors, zorder=10)
plt.show()

In [None]:
for league, in tqdm(leagues):
    years = cursor.execute(f"SELECT DISTINCT Year FROM Player_Hitter_MonthStats WHERE LeagueId={league}").fetchall()
    M = (len(years) + 3) // 4
    N = 4
    figs, axs = plt.subplots(M, N, sharex=False, sharey=False, figsize=(N * 4, M * 4), constrained_layout=True)
    for i, (year,) in enumerate(years):
        hitEvents : list[DB_GamePlayByPlay] = DB_GamePlayByPlay.Select_From_DB(cursor, "WHERE HitZone IS NOT NULL AND HitHardness IS NOT NULL AND HitTrajectory IS NOT NULL AND HitCoordX IS NOT NULL AND HitCoordY IS NOT NULL AND Year=? AND LeagueId=?", (year, league))
        hitEvents = [he for he in hitEvents if he.HitTrajectory]
        hit_event_x = [he.HitCoordX for he in hitEvents]
        hit_event_y = [he.HitCoordY for he in hitEvents]
        hit_event_colors = [colorMap(he.HitZone) for he in hitEvents]
        ax = axs[i // 4, i % 4]
        ax.set_aspect('equal')
    
        ax.set_xlim(0, 250)
        ax.set_ylim(250, 0)

        ax.scatter(hit_event_x, hit_event_y, s=1, c=hit_event_colors, edgecolors=hit_event_colors, zorder=10)

        ax.set_title(f"League={league}, Year={year}")
        
        
    plt.savefig(f"SprayCharts/spray-{league}.png")