In [25]:
import pandas as pd
import sys
import os
from pathlib import Path
import sqlite3

project_root = os.path.abspath('../../')
if project_root not in sys.path:
    sys.path.append(project_root)
    
from src.data.db import DBConnection, execute_query, get_pitcher_data
from config import DBConfig

In [2]:
class NotebookDBConnection(DBConnection):
    def __init__(self):
        db_path = os.path.join(project_root, 'data', 'pitcher_stats.db')
        super().__init__(db_name=db_path)

In [3]:
pd.set_option('display.max_columns', 150)

In [26]:
with NotebookDBConnection() as conn:
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    print([table[0] for table in tables])

['pitcher_ids', 'sqlite_sequence', 'teams', 'pitcher_mapping', 'statcast_pitchers', 'team_batting', 'statcast_batters']


In [27]:
with NotebookDBConnection() as conn:
    df_batters = pd.read_sql_query("SELECT * FROM statcast_batters LIMIT 25000;", conn)
    df_pitchers = pd.read_sql_query("SELECT * FROM statcast_pitchers LIMIT 25000;", conn)
    df_team_batting = pd.read_sql_query("SELECT * FROM team_batting;", conn)
    

In [29]:
df_pitchers.head()

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,spin_dir,spin_rate_deprecated,break_angle_deprecated,break_length_deprecated,zone,des,game_type,stand,p_throws,home_team,away_team,type,hit_location,bb_type,balls,strikes,game_year,pfx_x,pfx_z,plate_x,plate_z,on_3b,on_2b,on_1b,outs_when_up,inning,inning_topbot,hc_x,hc_y,tfs_deprecated,tfs_zulu_deprecated,umpire,sv_id,vx0,vy0,vz0,ax,ay,az,sz_top,sz_bot,hit_distance_sc,launch_speed,launch_angle,effective_speed,release_spin_rate,release_extension,game_pk,fielder_2,fielder_3,fielder_4,fielder_5,fielder_6,fielder_7,fielder_8,fielder_9,release_pos_y,estimated_ba_using_speedangle,estimated_woba_using_speedangle,woba_value,woba_denom,babip_value,iso_value,launch_speed_angle,at_bat_number,pitch_number,pitch_name,home_score,away_score,bat_score,fld_score,post_away_score,post_home_score,post_bat_score,post_fld_score,if_fielding_alignment,of_fielding_alignment,spin_axis,delta_home_win_exp,delta_run_exp,bat_speed,swing_length,estimated_slg_using_speedangle,delta_pitcher_run_exp,hyper_speed,home_score_diff,bat_score_diff,home_win_exp,bat_win_exp,age_pit_legacy,age_bat_legacy,age_pit,age_bat,n_thruorder_pitcher,n_priorpa_thisgame_player_at_bat,pitcher_days_since_prev_game,batter_days_since_prev_game,pitcher_days_until_next_game,batter_days_until_next_game,api_break_z_with_gravity,api_break_x_arm,api_break_x_batter_in,arm_angle,pitcher_id,season
0,FC,2021-10-08,97.0,-1.01,5.95,"Burnes, Corbin",645277,669203,strikeout,called_strike,,,,,1.0,Ozzie Albies called out on strikes.,D,L,R,MIL,ATL,S,2.0,,3,2,2021,0.19,1.27,-0.74,3.2,,,,2,6,Top,,,,,,,0.279137,-141.353044,-4.813216,2.500205,28.209943,-14.214561,3.19,1.54,,,,98.1,2658.0,6.5,660917,553882,642133,543939,500871,642715,592885,456715,541645,53.96,,0.0,0.0,1.0,0.0,0.0,,38,7,Cutter,0,0,0,0,0,0,0,0,Infield shift,Standard,194.0,0.016,-0.348,,,,0.348,,0,0,0.558,0.442,26,24,27,24,3,2,6.0,5.0,,1.0,1.12,-0.19,0.19,39.2,669203,2021
1,CU,2021-10-08,83.9,-0.64,6.14,"Burnes, Corbin",645277,669203,,foul,,,,,14.0,Ozzie Albies called out on strikes.,D,L,R,MIL,ATL,S,,,3,2,2021,0.69,-0.4,0.91,1.02,,,,2,6,Top,,,,,,,2.229462,-122.120994,-4.66496,6.42296,24.051011,-35.49205,3.26,1.48,19.0,86.4,-2.0,84.1,2946.0,6.4,660917,553882,642133,543939,500871,642715,592885,456715,541645,54.09,,,,,,,,38,6,Curveball,0,0,0,0,0,0,0,0,Infield shift,Standard,42.0,0.0,0.0,,,,0.0,88.0,0,0,0.558,0.442,26,24,27,24,3,2,6.0,5.0,,1.0,3.64,-0.69,0.69,47.9,669203,2021
2,FC,2021-10-08,96.4,-0.91,5.98,"Burnes, Corbin",645277,669203,,ball,,,,,11.0,Ozzie Albies called out on strikes.,D,L,R,MIL,ATL,B,,,2,2,2021,0.4,1.08,-0.45,3.38,,,,2,6,Top,,,,,,,0.318922,-140.452278,-3.865182,5.246654,28.506826,-17.222603,3.15,1.48,,,,97.2,2624.0,6.4,660917,553882,642133,543939,500871,642715,592885,456715,541645,54.08,,,,,,,,38,5,Cutter,0,0,0,0,0,0,0,0,Infield shift,Standard,196.0,0.0,0.112,,,,-0.112,,0,0,0.558,0.442,26,24,27,24,3,2,6.0,5.0,,1.0,1.35,-0.4,0.4,39.8,669203,2021
3,SI,2021-10-08,98.8,-0.86,5.96,"Burnes, Corbin",645277,669203,,ball,,,,,11.0,Ozzie Albies called out on strikes.,D,L,R,MIL,ATL,B,,,1,2,2021,-1.05,0.73,-1.37,2.51,,,,2,6,Top,,,,,,,1.061222,-143.846673,-5.686037,-14.73179,34.505461,-20.8707,3.13,1.48,,,,99.1,2536.0,6.5,660917,553882,642133,543939,500871,642715,592885,456715,541645,54.0,,,,,,,,38,4,Sinker,0,0,0,0,0,0,0,0,Infield shift,Standard,204.0,0.0,0.048,,,,-0.048,,0,0,0.558,0.442,26,24,27,24,3,2,6.0,5.0,,1.0,1.61,1.05,-1.05,39.8,669203,2021
4,CU,2021-10-08,83.2,-0.77,6.11,"Burnes, Corbin",645277,669203,,swinging_strike,,,,,13.0,Ozzie Albies called out on strikes.,D,L,R,MIL,ATL,S,,,1,1,2021,0.75,-0.69,-0.24,1.06,,,,2,6,Top,,,,,,,-0.236329,-121.159548,-3.765989,7.379327,24.325473,-38.464955,3.26,1.48,,,,83.4,2886.0,6.4,660917,553882,642133,543939,500871,642715,592885,456715,541645,54.07,,,,,,,,38,3,Curveball,0,0,0,0,0,0,0,0,Infield shift,Standard,49.0,0.0,-0.069,,,,0.069,,0,0,0.558,0.442,26,24,27,24,3,2,6.0,5.0,,1.0,3.99,-0.75,0.75,46.3,669203,2021


In [24]:
df_team_batting.to_csv('team_batting.csv')
df_batters.to_csv('statcast_batters_subset.csv')
df_pitchers.to_csv('statcast_pitchers_subset.csv')

In [24]:
with NotebookDBConnection() as conn:
    cursor = conn.cursor()
    cursor.execute("PRAGMA table_info(statcast_pitcher)")
    print(cursor.fetchall())
    

[(0, 'pitch_type', 'TEXT', 0, None, 0), (1, 'game_date', 'TEXT', 0, None, 0), (2, 'release_speed', 'REAL', 0, None, 0), (3, 'release_pos_x', 'REAL', 0, None, 0), (4, 'release_pos_z', 'REAL', 0, None, 0), (5, 'player_name', 'TEXT', 0, None, 0), (6, 'batter', 'INTEGER', 0, None, 0), (7, 'pitcher', 'INTEGER', 0, None, 0), (8, 'events', 'TEXT', 0, None, 0), (9, 'description', 'TEXT', 0, None, 0), (10, 'spin_dir', 'INTEGER', 0, None, 0), (11, 'spin_rate_deprecated', 'INTEGER', 0, None, 0), (12, 'break_angle_deprecated', 'INTEGER', 0, None, 0), (13, 'break_length_deprecated', 'INTEGER', 0, None, 0), (14, 'zone', 'INTEGER', 0, None, 0), (15, 'des', 'TEXT', 0, None, 0), (16, 'game_type', 'TEXT', 0, None, 0), (17, 'stand', 'TEXT', 0, None, 0), (18, 'p_throws', 'TEXT', 0, None, 0), (19, 'home_team', 'TEXT', 0, None, 0), (20, 'away_team', 'TEXT', 0, None, 0), (21, 'type', 'TEXT', 0, None, 0), (22, 'hit_location', 'INTEGER', 0, None, 0), (23, 'bb_type', 'TEXT', 0, None, 0), (24, 'balls', 'INTEGER'