In [1]:
import pandas as pd
import sys
import os
from pathlib import Path
import sqlite3

project_root = os.path.abspath('../../')
if project_root not in sys.path:
    sys.path.append(project_root)
    
from src.data.utils import DBConnection
from src.config import DBConfig

In [2]:
class NotebookDBConnection(DBConnection):
    def __init__(self):
        db_path = os.path.join(project_root, 'data', 'pitcher_stats.db')
        super().__init__(db_name=db_path)

In [3]:
pd.set_option('display.max_columns', 150)

In [4]:
with NotebookDBConnection() as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    print([table[0] for table in tables])

['statcast_pitchers', 'statcast_batters', 'train_predictive_pitch_features', 'test_predictive_pitch_features', 'train_batter_predictive_features', 'test_batter_predictive_features', 'team_mapping', 'pitcher_mapping', 'team_season_features', 'prediction_features', 'team_batting', 'game_level_pitchers', 'game_level_batters', 'predictive_pitch_features', 'batter_predictive_features', 'train_combined_features', 'test_combined_features', 'mlb_api']


In [5]:
with NotebookDBConnection() as conn:
    cursor = conn.cursor()
    df = pd.read_sql_query("SELECT * FROM statcast_pitchers ORDER BY game_date DESC LIMIT 5", conn)

df

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,spin_dir,spin_rate_deprecated,break_angle_deprecated,break_length_deprecated,zone,des,game_type,stand,p_throws,home_team,away_team,type,hit_location,bb_type,balls,strikes,game_year,pfx_x,pfx_z,plate_x,plate_z,on_3b,on_2b,on_1b,outs_when_up,inning,inning_topbot,hc_x,hc_y,tfs_deprecated,tfs_zulu_deprecated,umpire,sv_id,vx0,vy0,vz0,ax,ay,az,sz_top,sz_bot,hit_distance_sc,launch_speed,launch_angle,effective_speed,release_spin_rate,release_extension,game_pk,fielder_2,fielder_3,fielder_4,fielder_5,fielder_6,fielder_7,fielder_8,fielder_9,release_pos_y,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,woba_denom,babip_value,iso_value,launch_speed_angle,at_bat_number,pitch_number,pitch_name,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp,bat_speed,swing_length,estimated_slg_using_speedangle,delta_pitcher_run_exp,hyper_speed,home_score_diff,bat_score_diff,home_win_exp,bat_win_exp,age_pit_legacy,age_bat_legacy,age_pit,age_bat,n_thruorder_pitcher,n_priorpa_thisgame_player_at_bat,pitcher_days_since_prev_game,batter_days_since_prev_game,pitcher_days_until_next_game,batter_days_until_next_game,api_break_z_with_gravity,api_break_x_arm,api_break_x_batter_in,arm_angle,pitcher_id,season
0,FC,2025-04-11,86.1,-2.19,6.14,"Bibee, Tanner",677951,676440,walk,blocked_ball,,,,,14.0,Bobby Witt Jr. walks.,R,R,R,CLE,KC,B,,,3,1,2025,0.8,-0.04,1.63,-0.33,666023.0,,,2,5,Top,,,,,,,7.545133,-124.896557,-9.030158,6.625834,27.631418,-30.818203,3.58,1.68,,,,86.7,2663.0,7.1,778375,595978,467793,672356,608070,677587,680757,682657,678877,53.43,,0.695181,0.7,1.0,0.0,0.0,,40,5,Cutter,3,0,0,3,0,3,0,3,Standard,Standard,70.0,-0.016,0.087,,,,-0.087,,3,-3,0.862,0.138,26,25,26,25,3,2,,,,,3.16,-0.8,-0.8,,676440,2025
1,FC,2025-04-11,85.2,-2.17,6.23,"Bibee, Tanner",677951,676440,,ball,,,,,14.0,Bobby Witt Jr. walks.,R,R,R,CLE,KC,B,,,2,1,2025,0.87,0.12,0.93,1.86,666023.0,,,2,5,Top,,,,,,,5.623052,-123.905826,-4.232735,7.858942,25.133811,-30.275752,3.58,1.68,,,,86.4,2662.0,7.1,778375,595978,467793,672356,608070,677587,680757,682657,678877,53.39,,,,,,,,40,4,Cutter,3,0,0,3,0,3,0,3,Infield shade,Standard,62.0,0.0,0.042,,,,-0.042,,3,-3,0.862,0.138,26,25,26,25,3,2,,,,,3.03,-0.87,-0.87,,676440,2025
2,FF,2025-04-11,95.9,-1.75,6.2,"Bibee, Tanner",677951,676440,,ball,,,,,14.0,Bobby Witt Jr. walks.,R,R,R,CLE,KC,B,,,1,1,2025,-0.06,1.46,2.02,1.9,666023.0,,,2,5,Top,,,,,,,10.250854,-138.951629,-9.116701,-3.248692,32.95142,-11.227116,3.53,1.69,,,,96.8,2342.0,7.2,778375,595978,467793,672356,608070,677587,680757,682657,678877,53.34,,,,,,,,40,3,4-Seam Fastball,3,0,0,3,0,3,0,3,Standard,Standard,201.0,0.0,0.047,,,,-0.047,,3,-3,0.862,0.138,26,25,26,25,3,2,,,,,1.05,0.06,0.06,,676440,2025
3,FC,2025-04-11,84.5,-2.25,6.14,"Bibee, Tanner",677951,676440,,foul_tip,,,,,6.0,Bobby Witt Jr. walks.,R,R,R,CLE,KC,S,,,1,0,2025,1.06,0.41,0.76,2.51,666023.0,,,2,5,Top,,,,,,,4.977695,-122.959944,-2.896887,9.647866,25.83797,-27.679332,3.52,1.68,,,,85.6,2617.0,7.1,778375,595978,467793,672356,608070,677587,680757,682657,678877,53.35,,,,,,,,40,2,Cutter,3,0,0,3,0,3,0,3,Standard,Standard,61.0,0.0,-0.039,71.6,7.8,,0.039,,3,-3,0.862,0.138,26,25,26,25,3,2,,,,,2.8,-1.06,-1.06,,676440,2025
4,FC,2025-04-11,86.1,-2.19,6.2,"Bibee, Tanner",677951,676440,,ball,,,,,14.0,Bobby Witt Jr. walks.,R,R,R,CLE,KC,B,,,0,0,2025,0.68,0.07,1.38,2.57,666023.0,,,2,5,Top,,,,,,,7.186869,-125.277826,-2.548674,5.801146,24.341261,-31.137674,3.51,1.66,,,,87.2,2719.0,6.9,778375,595978,467793,672356,608070,677587,680757,682657,678877,53.58,,,,,,,,40,1,Cutter,3,0,0,3,0,3,0,3,Infield shade,Standard,81.0,0.0,0.032,,,,-0.032,,3,-3,0.862,0.138,26,25,26,25,3,2,,,,,3.0,-0.68,-0.68,,676440,2025


In [6]:
with NotebookDBConnection() as conn:
    df_pred = pd.read_sql_query("SELECT * FROM mlb_api", conn)

In [7]:
df_pred

Unnamed: 0,gamePk,game_date,home_team_id,home_team_name,home_team_abbr,away_team_id,away_team_name,away_team_abbr,home_probable_pitcher_id,home_probable_pitcher_name,away_probable_pitcher_id,away_probable_pitcher_name
0,778348,2025-04-12,142,Minnesota Twins,MIN,116,Detroit Tigers,DET,663978,Chris Paddack,695549,Jackson Jobe
1,778347,2025-04-12,138,St. Louis Cardinals,STL,143,Philadelphia Phillies,PHI,571945,Miles Mikolas,650911,Cristopher Sánchez
2,778355,2025-04-12,147,New York Yankees,NYY,137,San Francisco Giants,SF,701542,Will Warren,663855,Jordan Hicks
3,778346,2025-04-12,133,Athletics,ATH,121,New York Mets,NYM,669372,J.T. Ginn,656849,David Peterson
4,778362,2025-04-12,110,Baltimore Orioles,BAL,141,Toronto Blue Jays,TOR,608372,Tomoyuki Sugano,670102,Bowden Francis
5,778349,2025-04-12,145,Chicago White Sox,CWS,111,Boston Red Sox,BOS,527048,Martín Pérez,690916,Richard Fitts
6,778353,2025-04-12,139,Tampa Bay Rays,TB,144,Atlanta Braves,ATL,656876,Drew Rasmussen,700363,AJ Smith-Shawver
7,778354,2025-04-12,146,Miami Marlins,MIA,120,Washington Nationals,WSH,645261,Sandy Alcantara,592866,Trevor Williams
8,778359,2025-04-12,114,Cleveland Guardians,CLE,118,Kansas City Royals,KC,682847,Luis L. Ortiz,547179,Michael Lorenzen
9,778357,2025-04-12,113,Cincinnati Reds,CIN,134,Pittsburgh Pirates,PIT,671096,Andrew Abbott,571760,Andrew Heaney
