# Creating Bullpen Features
- Bullpen = relief pitchers not in starting rotation
- Bullpen dynamics are very complicated
- To simplify, we will just consider the team bullpen
- For each game, we can look at the performance of the bullpen (by subtracting the starting pitcher from the overall stats)
- Then we can do $n$ game lookbacks (similar to how we get team hitting stats) to create features based on recent bullpen performance
- This will not account for which pitchers are rested / available

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns',5000)
pd.set_option('display.max_rows',5000)

In [15]:
df = pd.read_csv('df_bp5.csv')
df.shape

  df = pd.read_csv('df_bp5.csv')


(92946, 451)

# Plan of Attack
- For each game, we have summary stats of the starting pitchers performance
- Also have (or can calculate) those stats for the overall game
- Difference between those two will be accountable to the bullpen


In [16]:
df.sample(5)

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,Strt_GS_h,Strt_GS_v,Strt_IP_h,Strt_IP_v,Strt_H_h,Strt_H_v,Strt_BFP_h,Strt_BFP_v,Strt_HR_h,Strt_HR_v,Strt_R_h,Strt_R_v,Strt_ER_h,Strt_ER_v,Strt_BB_h,Strt_BB_v,Strt_IB_h,Strt_IB_v,Strt_SO_h,Strt_SO_v,Strt_SH_h,Strt_SH_v,Strt_SF_h,Strt_SF_v,Strt_WP_h,Strt_WP_v,Strt_HBP_h,Strt_HBP_v,Strt_BK_h,Strt_BK_v,Strt_2B_h,Strt_2B_v,Strt_3B_h,Strt_3B_v,Strt_IP_real_h,Strt_IP_real_v,Strt_rollsum_IP_real_10_h,Strt_rollsum_IP_real_10_v,Strt_rollsum_H_10_h,Strt_rollsum_H_10_v,Strt_rollsum_BFP_10_h,Strt_rollsum_BFP_10_v,Strt_rollsum_HR_10_h,Strt_rollsum_HR_10_v,Strt_rollsum_R_10_h,Strt_rollsum_R_10_v,Strt_rollsum_ER_10_h,Strt_rollsum_ER_10_v,Strt_rollsum_BB_10_h,Strt_rollsum_BB_10_v,Strt_rollsum_IB_10_h,Strt_rollsum_IB_10_v,Strt_rollsum_SO_10_h,Strt_rollsum_SO_10_v,Strt_rollsum_SH_10_h,Strt_rollsum_SH_10_v,Strt_rollsum_SF_10_h,Strt_rollsum_SF_10_v,Strt_rollsum_WP_10_h,Strt_rollsum_WP_10_v,Strt_rollsum_HBP_10_h,Strt_rollsum_HBP_10_v,Strt_rollsum_BK_10_h,Strt_rollsum_BK_10_v,Strt_rollsum_2B_10_h,Strt_rollsum_2B_10_v,Strt_rollsum_3B_10_h,Strt_rollsum_3B_10_v,Strt_rollsum_IP_real_35_h,Strt_rollsum_IP_real_35_v,Strt_rollsum_H_35_h,Strt_rollsum_H_35_v,Strt_rollsum_BFP_35_h,Strt_rollsum_BFP_35_v,Strt_rollsum_HR_35_h,Strt_rollsum_HR_35_v,Strt_rollsum_R_35_h,Strt_rollsum_R_35_v,Strt_rollsum_ER_35_h,Strt_rollsum_ER_35_v,Strt_rollsum_BB_35_h,Strt_rollsum_BB_35_v,Strt_rollsum_IB_35_h,Strt_rollsum_IB_35_v,Strt_rollsum_SO_35_h,Strt_rollsum_SO_35_v,Strt_rollsum_SH_35_h,Strt_rollsum_SH_35_v,Strt_rollsum_SF_35_h,Strt_rollsum_SF_35_v,Strt_rollsum_WP_35_h,Strt_rollsum_WP_35_v,Strt_rollsum_HBP_35_h,Strt_rollsum_HBP_35_v,Strt_rollsum_BK_35_h,Strt_rollsum_BK_35_v,Strt_rollsum_2B_35_h,Strt_rollsum_2B_35_v,Strt_rollsum_3B_35_h,Strt_rollsum_3B_35_v,Strt_rollsum_IP_real_75_h,Strt_rollsum_IP_real_75_v,Strt_rollsum_H_75_h,Strt_rollsum_H_75_v,Strt_rollsum_BFP_75_h,Strt_rollsum_BFP_75_v,Strt_rollsum_HR_75_h,Strt_rollsum_HR_75_v,Strt_rollsum_R_75_h,Strt_rollsum_R_75_v,Strt_rollsum_ER_75_h,Strt_rollsum_ER_75_v,Strt_rollsum_BB_75_h,Strt_rollsum_BB_75_v,Strt_rollsum_IB_75_h,Strt_rollsum_IB_75_v,Strt_rollsum_SO_75_h,Strt_rollsum_SO_75_v,Strt_rollsum_SH_75_h,Strt_rollsum_SH_75_v,Strt_rollsum_SF_75_h,Strt_rollsum_SF_75_v,Strt_rollsum_WP_75_h,Strt_rollsum_WP_75_v,Strt_rollsum_HBP_75_h,Strt_rollsum_HBP_75_v,Strt_rollsum_BK_75_h,Strt_rollsum_BK_75_v,Strt_rollsum_2B_75_h,Strt_rollsum_2B_75_v,Strt_rollsum_3B_75_h,Strt_rollsum_3B_75_v,Strt_H_BB_roll_10_h,Strt_H_BB_roll_10_v,Strt_XB_roll_10_h,Strt_XB_roll_10_v,Strt_TB_roll_10_h,Strt_TB_roll_10_v,Strt_IP_mod_10_h,Strt_IP_mod_10_v,Strt_BF_mod_10_h,Strt_BF_mod_10_v,Strt_ER_mod_10_h,Strt_ER_mod_10_v,Strt_FIP_numer_10_h,Strt_FIP_numer_10_v,Strt_FIP_numer_mod_10_h,Strt_FIP_numer_mod_10_v,Strt_FIP_numer_mod2_10_h,Strt_FIP_numer_mod2_10_v,Strt_H_BB_mod_10_h,Strt_H_BB_mod_10_v,Strt_H_BB_mod2_10_h,Strt_H_BB_mod2_10_v,Strt_SO_mod_10_h,Strt_SO_mod_10_v,Strt_TB_BB_mod_10_h,Strt_TB_BB_mod_10_v,Strt_ERA_10_h,Strt_ERA_10_v,Strt_FIP_10_h,Strt_FIP_10_v,Strt_FIP_perc_10_h,Strt_FIP_perc_10_v,Strt_WHIP_10_h,Strt_WHIP_10_v,Strt_SO_perc_10_h,Strt_SO_perc_10_v,Strt_TB_BB_perc_10_h,Strt_TB_BB_perc_10_v,Strt_H_BB_perc_10_h,Strt_H_BB_perc_10_v,Strt_H_BB_roll_35_h,Strt_H_BB_roll_35_v,Strt_XB_roll_35_h,Strt_XB_roll_35_v,Strt_TB_roll_35_h,Strt_TB_roll_35_v,Strt_IP_mod_35_h,Strt_IP_mod_35_v,Strt_BF_mod_35_h,Strt_BF_mod_35_v,Strt_ER_mod_35_h,Strt_ER_mod_35_v,Strt_FIP_numer_35_h,Strt_FIP_numer_35_v,Strt_FIP_numer_mod_35_h,Strt_FIP_numer_mod_35_v,Strt_FIP_numer_mod2_35_h,Strt_FIP_numer_mod2_35_v,Strt_H_BB_mod_35_h,Strt_H_BB_mod_35_v,Strt_H_BB_mod2_35_h,Strt_H_BB_mod2_35_v,Strt_SO_mod_35_h,Strt_SO_mod_35_v,Strt_TB_BB_mod_35_h,Strt_TB_BB_mod_35_v,Strt_ERA_35_h,Strt_ERA_35_v,Strt_FIP_35_h,Strt_FIP_35_v,Strt_FIP_perc_35_h,Strt_FIP_perc_35_v,Strt_WHIP_35_h,Strt_WHIP_35_v,Strt_SO_perc_35_h,Strt_SO_perc_35_v,Strt_TB_BB_perc_35_h,Strt_TB_BB_perc_35_v,Strt_H_BB_perc_35_h,Strt_H_BB_perc_35_v,Strt_H_BB_roll_75_h,Strt_H_BB_roll_75_v,Strt_XB_roll_75_h,Strt_XB_roll_75_v,Strt_TB_roll_75_h,Strt_TB_roll_75_v,Strt_IP_mod_75_h,Strt_IP_mod_75_v,Strt_BF_mod_75_h,Strt_BF_mod_75_v,Strt_ER_mod_75_h,Strt_ER_mod_75_v,Strt_FIP_numer_75_h,Strt_FIP_numer_75_v,Strt_FIP_numer_mod_75_h,Strt_FIP_numer_mod_75_v,Strt_FIP_numer_mod2_75_h,Strt_FIP_numer_mod2_75_v,Strt_H_BB_mod_75_h,Strt_H_BB_mod_75_v,Strt_H_BB_mod2_75_h,Strt_H_BB_mod2_75_v,Strt_SO_mod_75_h,Strt_SO_mod_75_v,Strt_TB_BB_mod_75_h,Strt_TB_BB_mod_75_v,Strt_ERA_75_h,Strt_ERA_75_v,Strt_FIP_75_h,Strt_FIP_75_v,Strt_FIP_perc_75_h,Strt_FIP_perc_75_v,Strt_WHIP_75_h,Strt_WHIP_75_v,Strt_SO_perc_75_h,Strt_SO_perc_75_v,Strt_TB_BB_perc_75_h,Strt_TB_BB_perc_75_v,Strt_H_BB_perc_75_h,Strt_H_BB_perc_75_v
35881,19970628,0,Sat,SFN,NL,79,COL,NL,80,2,9,51,N,,,,DEN02,48273.0,149,010001000,30312000x,32,6,1,0,1,2,0,0,0,3,0,5,2,0,1,0,6,4,9,9,0,0,24,13,1,0,0,0,32,9,1,1,4,9,1,0,0,6,1,5,1,0,0,0,6,1,2,2,1,0,27,12,0,0,1,0,riekr901,Rich Rieker,drecb901,Bruce Dreckman,poncl901,Larry Poncino,marsr901,Randy Marsh,,(none),,(none),baked002,Dusty Baker,bayld001,Don Baylor,ritzk001,Kevin Ritz,gardm001,Mark Gardner,,(none),bichd001,Dante Bichette,gardm001,Mark Gardner,ritzk001,Kevin Ritz,hamid001,Darryl Hamilton,8,vizcj001,Jose Vizcaino,6,hillg001,Glenallen Hill,9,bondb001,Barry Bonds,7,kentj001,Jeff Kent,4,snowj001,J.T. Snow,3,lewim001,Mark Lewis,5,wilkr001,Rick Wilkins,2,gardm001,Mark Gardner,1,mccrq001,Quinton McCracken,8,peren001,Neifi Perez,4,walkl001,Larry Walker,9,galaa001,Andres Galarraga,3,bichd001,Dante Bichette,7,reedj001,Jeff Reed,2,castv001,Vinny Castilla,5,weisw001,Walt Weiss,6,ritzk001,Kevin Ritz,1,,Y,1997,7,1,11,199706280,0.286047,0.250695,0.348586,0.330184,0.46767,0.38346,0.816256,0.713644,193.0,110.0,71.0,64.0,124.0,139.0,0.28839,0.262425,0.353191,0.334529,0.467228,0.442346,0.82042,0.776875,26.0,12.0,14.0,11.0,17.0,25.0,0.0,0.0,0.5,0.0,,1.0,1.0,9.0,2.0,6.0,5.0,35.0,14.0,1.0,2.0,2.0,6.0,2.0,6.0,3.0,3.0,0.0,0.0,5.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,9.0,2.0,57.0,63.666667,76.0,66.0,265.0,278.0,7.0,8.0,42.0,31.0,40.0,27.0,30.0,25.0,2.0,2.0,28.0,38.0,2.0,5.0,3.0,2.0,5.0,0.0,1.0,1.0,0.0,1.0,17.0,9.0,2.0,4.0,213.0,222.666667,265.0,245.0,969.0,964.0,28.0,31.0,141.0,122.0,134.0,108.0,91.0,72.0,4.0,6.0,111.0,160.0,7.0,11.0,5.0,6.0,9.0,1.0,9.0,7.0,1.0,3.0,47.0,36.0,7.0,6.0,441.666667,358.333333,501.0,376.0,1973.0,1542.0,52.0,51.0,276.0,185.0,254.0,156.0,198.0,118.0,9.0,10.0,242.0,283.0,16.0,17.0,12.0,8.0,22.0,3.0,18.0,12.0,1.0,4.0,96.0,65.0,12.0,9.0,106.0,91.0,42.0,41.0,118.0,107.0,57.0,63.666667,265.0,278.0,40.0,27.0,353.0,301.0,353.0,301.0,353.0,301.0,106.0,91.0,106.0,91.0,28.0,38.0,148.0,132.0,6.315789,3.816754,6.192982,4.727749,1.332075,1.082734,1.859649,1.429319,0.10566,0.136691,0.558491,0.47482,0.4,0.327338,356.0,317.0,145.0,141.0,410.0,386.0,213.0,222.666667,969.0,964.0,134.0,108.0,1210.0,1034.0,1210.0,1034.0,1210.0,1034.0,356.0,317.0,356.0,317.0,111.0,160.0,501.0,458.0,5.661972,4.365269,5.680751,4.643713,1.24871,1.072614,1.671362,1.423653,0.114551,0.165975,0.517028,0.475104,0.367389,0.328838,699.0,494.0,276.0,236.0,777.0,612.0,441.666667,358.333333,1973.0,1542.0,254.0,156.0,2289.0,1579.0,2289.0,1579.0,2289.0,1579.0,699.0,494.0,699.0,494.0,242.0,283.0,975.0,730.0,5.175849,3.91814,5.182642,4.406512,1.160162,1.023995,1.582642,1.378605,0.122656,0.183528,0.494171,0.473411,0.354283,0.320363
54398,20050429,0,Fri,SEA,AL,23,OAK,AL,23,4,2,54,N,,,,OAK01,18545.0,179,002100010,100000010,36,8,1,1,1,4,0,0,0,3,0,12,0,0,0,0,8,5,2,2,0,0,27,12,0,0,1,0,35,9,1,0,0,2,0,0,1,5,0,2,0,0,1,0,12,4,3,3,1,0,27,8,1,0,0,0,relic901,Charlie Reliford,herna901,Angel Hernandez,gibsg901,Greg Gibson,rungb901,Brian Runge,,(none),,(none),hargm001,Mike Hargrove,machk101,Ken Macha,selea001,Aaron Sele,hared001,Dan Haren,guare001,Eddie Guardado,reedj004,Jeremy Reed,selea001,Aaron Sele,hared001,Dan Haren,suzui001,Ichiro Suzuki,9,reedj004,Jeremy Reed,8,belta001,Adrian Beltre,5,sexsr001,Richie Sexson,3,boonb002,Bret Boone,4,ibanr001,Raul Ibanez,10,winnr001,Randy Winn,7,olivm001,Miguel Olivo,2,valdw001,Wilson Valdez,6,kotsm001,Mark Kotsay,8,kendj001,Jason Kendall,2,chave001,Eric Chavez,5,hatts001,Scott Hatteberg,3,durae001,Erubiel Durazo,10,ellim001,Mark Ellis,4,swisn001,Nick Swisher,9,scutm001,Marco Scutaro,6,thomc001,Charles Thomas,7,,Y,2005,-2,0,6,200504290,0.266432,0.26921,0.33609,0.327073,0.424605,0.394936,0.760695,0.722008,43.0,112.0,23.0,45.0,93.0,107.0,0.228771,0.264966,0.294977,0.322172,0.333666,0.373896,0.628644,0.696068,4.0,17.0,4.0,11.0,20.0,19.0,0.0,0.0,0.5,0.0,,1.0,1.0,7.0,6.0,6.0,7.0,31.0,27.0,0.0,0.0,3.0,1.0,2.0,1.0,3.0,2.0,0.0,0.0,9.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,7.0,6.0,44.666667,47.0,39.0,65.0,189.0,217.0,2.0,3.0,18.0,39.0,15.0,35.0,17.0,19.0,0.0,2.0,39.0,17.0,2.0,0.0,2.0,4.0,2.0,1.0,2.0,1.0,0.0,0.0,11.0,11.0,2.0,2.0,142.333333,167.333333,149.0,200.0,620.0,745.0,15.0,20.0,82.0,107.0,76.0,97.0,52.0,65.0,2.0,2.0,95.0,69.0,8.0,3.0,5.0,10.0,5.0,4.0,8.0,6.0,0.0,2.0,35.0,30.0,5.0,4.0,142.333333,388.666667,149.0,445.0,620.0,1729.0,15.0,48.0,82.0,241.0,76.0,226.0,52.0,154.0,2.0,4.0,95.0,174.0,8.0,10.0,5.0,21.0,5.0,11.0,8.0,20.0,0.0,2.0,35.0,70.0,5.0,9.0,56.0,84.0,21.0,24.0,60.0,89.0,44.666667,47.0,189.0,217.0,15.0,35.0,116.0,257.0,116.0,257.0,116.0,257.0,56.0,84.0,56.0,84.0,39.0,17.0,77.0,108.0,3.022388,6.702128,2.597015,5.468085,0.613757,1.184332,1.253731,1.787234,0.206349,0.078341,0.407407,0.497696,0.296296,0.387097,201.0,265.0,90.0,98.0,239.0,298.0,142.333333,167.333333,620.0,745.0,76.0,97.0,608.0,917.0,608.0,917.0,608.0,917.0,201.0,265.0,201.0,265.0,95.0,69.0,291.0,363.0,4.805621,5.217131,4.271663,5.48008,0.980645,1.230872,1.412178,1.583665,0.153226,0.092617,0.469355,0.487248,0.324194,0.355705,201.0,599.0,90.0,232.0,239.0,677.0,225.0,388.666667,900.0,1729.0,121.925926,226.0,608.0,2073.0,980.992,2073.0,916.0,2073.0,325.0,599.0,304.6,599.0,151.0,174.0,417.0,831.0,4.877037,5.233276,4.359964,5.333619,1.089991,1.198959,1.444444,1.541166,0.167778,0.100636,0.463333,0.480625,0.338444,0.346443
32003,19950821,2,Mon,KCA,AL,105,MIL,AL,107,18,9,54,N,,,,MIL05,13920.0,195,300(11)02110,040100112,43,19,3,2,0,16,0,1,2,7,0,7,2,0,2,0,8,3,9,9,0,0,27,6,0,0,0,0,39,13,5,1,3,9,0,1,1,3,0,6,0,0,0,0,8,4,17,17,0,0,27,9,3,0,2,0,cedeg901,Gary Cederstrom,tscht901,Tim Tschida,craft901,Terry Craft,shulj901,John Shulock,,(none),,(none),boonb001,Bob Boone,garnp001,Phil Garner,appik001,Kevin Appier,scanb001,Bob Scanlan,,(none),damoj001,Johnny Damon,appik001,Kevin Appier,scanb001,Bob Scanlan,damoj001,Johnny Damon,8,goodt001,Tom Goodwin,7,joynw001,Wally Joyner,3,gaetg001,Gary Gaetti,5,lockk001,Keith Lockhart,4,tuckm001,Michael Tucker,10,gagng001,Greg Gagne,6,nunnj001,Jon Nunnally,9,merch001,Henry Mercedes,2,hamid001,Darryl Hamilton,8,listp001,Pat Listach,4,surhb001,B.J. Surhoff,3,nilsd001,Dave Nilsson,10,olivj001,Joe Oliver,2,cirij001,Jeff Cirillo,5,valej003,Jose Valentin,6,miesm001,Matt Mieske,9,hulsd001,David Hulse,7,,Y,1995,-9,0,27,199508212,0.268397,0.261138,0.338901,0.326202,0.416637,0.400436,0.755538,0.726639,112.0,155.0,47.0,82.0,108.0,97.0,0.265761,0.264565,0.339442,0.333333,0.42192,0.413563,0.761362,0.746896,14.0,19.0,7.0,9.0,18.0,21.0,0.0,0.0,0.5,0.0,,1.0,1.0,3.1,5.0,5.0,7.0,20.0,26.0,0.0,0.0,9.0,5.0,8.0,5.0,4.0,3.0,0.0,0.0,1.0,5.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,4.0,1.0,1.0,3.333333,5.0,57.666667,57.0,63.0,56.0,264.0,247.0,6.0,7.0,36.0,35.0,33.0,35.0,30.0,23.0,1.0,0.0,20.0,52.0,0.0,2.0,3.0,1.0,2.0,2.0,5.0,4.0,0.0,0.0,11.0,11.0,2.0,4.0,148.666667,240.333333,165.0,182.0,650.0,979.0,17.0,11.0,83.0,87.0,75.0,83.0,54.0,93.0,3.0,5.0,78.0,219.0,1.0,9.0,5.0,6.0,4.0,12.0,9.0,9.0,1.0,0.0,28.0,34.0,4.0,11.0,196.0,509.666667,219.0,413.0,860.0,2080.0,21.0,28.0,112.0,188.0,101.0,178.0,71.0,186.0,7.0,9.0,108.0,452.0,2.0,15.0,7.0,13.0,5.0,18.0,12.0,11.0,2.0,1.0,43.0,78.0,6.0,15.0,93.0,79.0,33.0,40.0,96.0,96.0,57.666667,57.0,264.0,247.0,33.0,35.0,317.0,224.0,317.0,224.0,317.0,224.0,93.0,79.0,93.0,79.0,20.0,52.0,126.0,119.0,5.150289,5.526316,5.49711,3.929825,1.200758,0.906883,1.612717,1.385965,0.075758,0.210526,0.477273,0.481781,0.352273,0.319838,219.0,275.0,87.0,89.0,252.0,271.0,148.666667,240.333333,650.0,979.0,75.0,83.0,722.0,530.0,722.0,530.0,722.0,530.0,219.0,275.0,219.0,275.0,78.0,219.0,306.0,364.0,4.540359,3.108183,4.856502,2.20527,1.110769,0.541369,1.473094,1.144244,0.12,0.223698,0.470769,0.371808,0.336923,0.280899,290.0,599.0,118.0,192.0,337.0,605.0,225.0,509.666667,900.0,2080.0,117.111111,178.0,927.0,1257.0,1057.848,1257.0,971.0,1257.0,333.5,599.0,304.8,599.0,116.0,452.0,426.0,791.0,4.684444,3.143231,4.701547,2.466318,1.175387,0.604327,1.482222,1.175278,0.128889,0.217308,0.473333,0.380288,0.338667,0.287981
19558,19890729,0,Sat,CIN,NL,103,ATL,NL,105,4,1,54,N,,,,ATL01,31470.0,136,000102001,000100000,31,6,0,1,2,4,2,0,0,3,0,5,0,0,0,0,5,3,1,1,0,0,27,8,1,0,1,0,31,5,1,0,1,1,1,0,0,2,1,4,0,0,1,0,6,2,4,4,0,0,27,16,0,0,0,0,rennd901,Dutch Rennert,brocf901,Fred Brocklander,engeb901,Bob Engel,hallt901,Tom Hallion,,(none),,(none),rosep001,Pete Rose,nixor101,Russ Nixon,browt001,Tom Browning,smolj001,John Smoltz,franj001,John Franco,grifk001,Ken Griffey,browt001,Tom Browning,smolj001,John Smoltz,duncm001,Mariano Duncan,6,madis001,Scotti Madison,5,davie001,Eric Davis,8,grifk001,Ken Griffey,7,roomr001,Rolando Roomes,9,benzt001,Todd Benzinger,3,reedj001,Jeff Reed,2,oestr001,Ron Oester,4,browt001,Tom Browning,1,blauj001,Jeff Blauser,4,gregt001,Tommy Gregg,3,smitl002,Lonnie Smith,7,murpd001,Dale Murphy,8,thoma001,Andres Thomas,6,berrg001,Geronimo Berroa,9,davij001,Jody Davis,2,white003,Ed Whited,5,smolj001,John Smoltz,1,,Y,1989,-3,0,5,198907290,0.231275,0.250183,0.291821,0.308876,0.332664,0.367755,0.624484,0.676631,77.0,149.0,63.0,80.0,158.0,117.0,0.225366,0.23249,0.292335,0.291741,0.321951,0.346304,0.614286,0.638045,10.0,19.0,11.0,21.0,32.0,24.0,0.0,0.0,0.5,0.0,,1.0,1.0,7.0,7.1,5.0,4.0,28.0,29.0,1.0,1.0,3.0,1.0,3.0,1.0,2.0,2.0,0.0,1.0,4.0,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,7.0,7.333333,75.333333,67.666667,55.0,62.0,295.0,266.0,5.0,5.0,18.0,25.0,16.0,24.0,20.0,10.0,0.0,0.0,66.0,38.0,3.0,3.0,2.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,9.0,9.0,1.0,0.0,220.666667,238.666667,185.0,219.0,918.0,979.0,20.0,32.0,84.0,108.0,76.0,92.0,81.0,60.0,4.0,5.0,165.0,113.0,9.0,13.0,6.0,9.0,7.0,2.0,3.0,1.0,3.0,2.0,40.0,45.0,4.0,2.0,220.666667,506.666667,185.0,451.0,918.0,2064.0,20.0,67.0,84.0,216.0,76.0,197.0,81.0,138.0,4.0,9.0,165.0,265.0,9.0,21.0,6.0,18.0,7.0,5.0,3.0,11.0,3.0,7.0,40.0,96.0,4.0,6.0,75.0,72.0,26.0,24.0,81.0,86.0,75.333333,67.666667,295.0,266.0,16.0,24.0,158.0,205.0,158.0,205.0,158.0,205.0,75.0,72.0,75.0,72.0,66.0,38.0,101.0,96.0,1.911504,3.192118,2.097345,3.029557,0.535593,0.770677,0.995575,1.064039,0.223729,0.142857,0.342373,0.360902,0.254237,0.270677,266.0,279.0,108.0,145.0,293.0,364.0,220.666667,238.666667,918.0,979.0,76.0,92.0,728.0,1027.0,728.0,1027.0,728.0,1027.0,266.0,279.0,266.0,279.0,165.0,113.0,374.0,424.0,3.099698,3.469274,3.299094,4.303073,0.793028,1.04903,1.205438,1.168994,0.179739,0.115424,0.407407,0.433095,0.28976,0.284985,266.0,589.0,108.0,309.0,293.0,760.0,225.0,506.666667,918.0,2064.0,78.407407,197.0,728.0,2108.0,747.552,2108.0,728.0,2108.0,272.5,589.0,266.0,589.0,165.0,265.0,374.0,898.0,3.136296,3.499342,3.322453,4.160526,0.814327,1.021318,1.211111,1.1625,0.179739,0.128391,0.407407,0.435078,0.28976,0.285368
39596,19990412,0,Mon,KCA,AL,7,CLE,AL,7,2,5,58,D,,,,CLE08,42798.0,193,0011000000,0000000203,35,8,1,1,1,2,0,0,0,2,0,9,1,0,2,0,5,4,5,5,0,0,28,11,0,0,3,0,36,11,1,0,2,5,0,0,0,7,1,7,0,0,3,0,10,3,2,2,0,0,30,14,0,0,3,0,merrd901,Durwood Merrill,hirsj901,John Hirschbeck,phild901,Dave Phillips,roe-r901,Rocky Roe,,(none),,(none),muset101,Tony Muser,hargm001,Mike Hargrove,shuep001,Paul Shuey,santj002,Jose Santiago,,(none),frymt001,Travis Fryman,rosaj001,Jose Rosado,burbd001,Dave Burba,beltc001,Carlos Beltran,8,randj002,Joe Randa,5,damoj001,Johnny Damon,7,kingj001,Jeff King,3,sweem002,Mike Sweeney,10,dye-j001,Jermaine Dye,9,kreuc001,Chad Kreuter,2,sancr001,Rey Sanchez,6,feblc001,Carlos Febles,4,loftk001,Kenny Lofton,8,wilse001,Enrique Wilson,6,alomr001,Roberto Alomar,4,ramim002,Manny Ramirez,9,thomj002,Jim Thome,3,cordw001,Wil Cordero,7,frymt001,Travis Fryman,5,sexsr001,Richie Sexson,10,aloms001,Sandy Alomar,2,,Y,1999,3,1,7,199904120,0.27508,0.262213,0.348311,0.321328,0.451613,0.399964,0.799924,0.721292,141.0,133.0,55.0,51.0,107.0,125.0,0.274194,0.258929,0.350594,0.325812,0.468691,0.404762,0.819285,0.730574,31.0,27.0,7.0,11.0,16.0,23.0,0.0,0.0,0.5,0.0,,1.0,1.0,6.1,6.1,7.0,6.0,27.0,26.0,1.0,0.0,2.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,5.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,6.333333,6.333333,58.333333,62.333333,65.0,61.0,248.0,268.0,12.0,9.0,33.0,39.0,29.0,39.0,16.0,23.0,0.0,2.0,40.0,47.0,1.0,1.0,2.0,1.0,1.0,3.0,1.0,3.0,0.0,0.0,7.0,11.0,0.0,2.0,222.0,174.0,227.0,179.0,947.0,753.0,32.0,24.0,108.0,102.0,100.0,87.0,75.0,56.0,4.0,2.0,149.0,135.0,3.0,1.0,10.0,3.0,7.0,6.0,7.0,5.0,0.0,1.0,41.0,27.0,3.0,4.0,441.333333,399.666667,436.0,419.0,1906.0,1737.0,62.0,54.0,225.0,235.0,207.0,209.0,178.0,134.0,19.0,5.0,329.0,278.0,12.0,7.0,17.0,14.0,18.0,12.0,16.0,10.0,0.0,3.0,82.0,67.0,7.0,9.0,81.0,84.0,43.0,42.0,108.0,103.0,58.333333,62.333333,248.0,268.0,29.0,39.0,319.0,275.0,319.0,275.0,319.0,275.0,81.0,84.0,81.0,84.0,40.0,47.0,124.0,126.0,4.474286,5.631016,5.468571,4.411765,1.28629,1.026119,1.388571,1.347594,0.16129,0.175373,0.5,0.470149,0.326613,0.313433,302.0,235.0,143.0,107.0,370.0,286.0,222.0,174.0,947.0,753.0,100.0,87.0,1024.0,747.0,1024.0,747.0,1024.0,747.0,302.0,235.0,302.0,235.0,149.0,135.0,445.0,342.0,4.054054,4.5,4.612613,4.293103,1.081309,0.992032,1.36036,1.350575,0.157339,0.179283,0.469905,0.454183,0.318902,0.312085,614.0,553.0,282.0,247.0,718.0,666.0,441.333333,399.666667,1906.0,1737.0,207.0,209.0,1990.0,1805.0,1990.0,1805.0,1990.0,1805.0,614.0,553.0,614.0,553.0,329.0,278.0,896.0,800.0,4.221299,4.706422,4.509063,4.516264,1.044071,1.039148,1.391239,1.383653,0.172613,0.160046,0.470094,0.460564,0.322141,0.318365


In [17]:
for i in df.columns:
    print(i)

date
dblheader_code
day_of_week
team_v
league_v
game_no_v
team_h
league_h
game_no_h
runs_v
runs_h
outs_total
day_night
completion_info
forfeit_info
protest_info
ballpark_id
attendance
game_minutes
linescore_v
linescore_h
AB_v
H_v
2B_v
3B_v
HR_v
RBI_v
SH_v
SF_v
HBP_v
BB_v
IBB_v
SO_v
SB_v
CS_v
GIDP_v
CI_v
LOB_v
P_num_v
ERind_v
ERteam_v
WP_v
balk_v
PO_v
ASST_v
ERR_v
PB_v
DP_v
TP_v
AB_h
H_h
2B_h
3B_h
HR_h
RBI_h
SH_h
SF_h
HBP_h
BB_h
IBB_h
SO_h
SB_h
CS_h
GIDP_h
CI_h
LOB_h
P_num_h
ERind_h
ERteam_h
WP_h
balk_h
PO_h
ASST_h
ERR_h
PB_h
DP_h
TP_h
ump_HB_id
ump_HB_name
ump_1B_id
ump_1B_name
ump_2B_id
ump_2B_name
ump_3B_id
ump_3B_name
ump_LF_id
ump_LF_name
ump_RF_id
ump_RF_name
mgr_id_v
mgr_name_v
mgr_id_h
mgr_name_h
pitcher_id_w
pitcher_name_w
pitcher_id_l
pitcher_name_l
pitcher_id_s
pitcher_name_s
GWRBI_id
GWRBI_name
pitcher_start_id_v
pitcher_start_name_v
pitcher_start_id_h
pitcher_start_name_h
batter1_name_v
batter1_id_v
batter1_pos_v
batter2_name_v
batter2_id_v
batter2_pos_v
batter3_name_v
batt

In [18]:
## Calculate some game level stats, specifically about
## relative stats for starting pitcher vs bullpen
full_innings = (df.outs_total//6)
additional_outs = df.outs_total - 6*full_innings
added_innings_pitched_h = np.minimum(additional_outs,3)/3
added_innings_pitched_v = (np.maximum(additional_outs,3)-3)/3
df['innings_pitched_h'] = full_innings + added_innings_pitched_h
df['innings_pitched_v'] = full_innings + added_innings_pitched_v
df['Bpen_IP_h'] = df['innings_pitched_h']-df['Strt_IP_real_h']
df['Bpen_IP_v'] = df['innings_pitched_v']-df['Strt_IP_real_v']
df['Bpen_BFP_h'] = df['AB_v']+df['BB_v']+df['HBP_v']-df['Strt_BFP_h']
df['Bpen_BFP_v'] = df['AB_h']+df['BB_h']+df['HBP_h']-df['Strt_BFP_v']
df['Bpen_R_h'] = df['runs_v']-df['Strt_R_h']
df['Bpen_R_v'] = df['runs_h']-df['Strt_R_v']
df['Bpen_H_h'] = df['H_v']-df['Strt_H_h']
df['Bpen_H_v'] = df['H_h']-df['Strt_H_v']
df['Bpen_HR_h'] = df['HR_v']-df['Strt_HR_h']
df['Bpen_HR_v'] = df['HR_h']-df['Strt_HR_v']
df['Bpen_2B_h'] = df['2B_v']-df['Strt_2B_h']
df['Bpen_2B_v'] = df['2B_h']-df['Strt_2B_v']
df['Bpen_3B_h'] = df['3B_v']-df['Strt_3B_h']
df['Bpen_3B_v'] = df['3B_h']-df['Strt_3B_v']
df['Bpen_BB_h'] = df['BB_v']-df['Strt_BB_h']
df['Bpen_BB_v'] = df['BB_h']-df['Strt_BB_v']
df['Bpen_HBP_h'] = df['HBP_v']-df['Strt_HBP_h']
df['Bpen_HBP_v'] = df['HBP_h']-df['Strt_HBP_v']
df['Bpen_SO_h'] = df['SO_v']-df['Strt_SO_h']
df['Bpen_SO_v'] = df['SO_h']-df['Strt_SO_v']

In [19]:
def roll_column(df, col, winsize):
    # do the standard Pandas rolling calc
    t_col = df[col].rolling(winsize, closed='left').sum().to_numpy()
    
    # for the early columns, just do a rolling sum from the beginning
    t_col[:winsize] = np.concatenate(([0],df[col].iloc[:(winsize)].cumsum().to_numpy()[:-1]))

    return(t_col)

def strip_suffix(x, suff):
    if x.endswith(suff):
        return(x[:-len(suff)])
    else:
        return(x)

In [20]:
## Process df again to get bullpen stats
def get_bullpen_team_df(team):
    visit_cols = [col for col in df.columns if not col.endswith('_h')]
    visit_cols_stripped = [strip_suffix(col,'_v') for col in visit_cols]
    home_cols = [col for col in df.columns if not col.endswith('_v')]
    home_cols_stripped = [strip_suffix(col,'_h') for col in home_cols]    
    df_team_v = df[(df.team_v==team)]
    opponent = df_team_v['team_h']
    df_team_v = df_team_v[visit_cols]
    df_team_v.columns = visit_cols_stripped
    df_team_v['home_game'] = 0
    df_team_v['opponent'] = opponent

    df_team_h = df[(df.team_h==team)]
    opponent = df_team_h['team_v']
    df_team_h = df_team_h[home_cols]
    df_team_h.columns = home_cols_stripped
    df_team_h['home_game'] = 1
    df_team_h['opponent'] = opponent

    df_team = pd.concat((df_team_h, df_team_v))
    df_team.sort_values(['date_dblhead'],inplace=True)

    er_per_ip_def = (5/9)
    h_bb_per_ip_def = 1.5
    h_bb_per_bf_def = .37
    so_per_bf_def = .2
    ip_per_game_def = 2
    bf_per_game_def = 6
    tb_bb_perc_def = .45

    cols_to_agg = ['IP', 'H','BFP', 'HR', 'R',  'BB', 'SO',  'HBP',
       '2B', '3B']
    winsizes = [10,35,75]
    for winsize in winsizes:
        for raw_col in cols_to_agg:
            col_agg = 'Bpen_'+raw_col
            new_colname = 'Bpen_rollsum_'+raw_col+'_'+str(winsize)        
            df_team[new_colname] = roll_column(df_team, col_agg, winsize)

        hit_col = 'Bpen_rollsum_H_'+str(winsize)
        bb_col = 'Bpen_rollsum_BB_'+str(winsize)
        h_bb_col = 'Bpen_H_BB_roll_'+str(winsize)
        double_col = 'Bpen_rollsum_2B_'+str(winsize)
        triple_col = 'Bpen_rollsum_3B_'+str(winsize)
        hr_col = 'Bpen_rollsum_HR_'+str(winsize)
        xb_col = 'Bpen_XB_roll_'+str(winsize)
        tb_col = 'Bpen_TB_roll_'+str(winsize)
        so_col = 'Bpen_rollsum_SO_'+str(winsize)
        so_mod_col = 'Bpen_SO_mod_'+str(winsize)
        ip_col = 'Bpen_rollsum_IP_'+str(winsize)
        ip_mod_col = 'Bpen_IP_mod_'+str(winsize)
        bf_col = 'Bpen_rollsum_BFP_'+str(winsize)
        bf_mod_col = 'Bpen_BF_mod_'+str(winsize)
        whip_col = 'Bpen_WHIP_'+str(winsize)
        so_perc_col = 'Bpen_SO_perc_'+str(winsize)
        h_bb_perc_col = 'Bpen_H_BB_perc_'+str(winsize)
        h_bb_mod_col = 'Bpen_H_BB_mod_'+str(winsize)
        h_bb_mod2_col = 'Bpen_Bpen_H_BB_mod2_'+str(winsize)
        tb_bb_mod_col = 'Bpen_TB_BB_mod_'+str(winsize)
        tb_bb_perc_col = 'Bpen_TB_BB_perc_'+str(winsize)
        df_team[h_bb_col] = df_team[hit_col]+df_team[bb_col]
        df_team[xb_col] = df_team[double_col]+2*df_team[triple_col]+3*df_team[hr_col]
        df_team[tb_col] = df_team[hit_col]+df_team[xb_col]
        df_team[ip_mod_col] = np.maximum(df_team[ip_col], winsize*ip_per_game_def)
        df_team[bf_mod_col] = np.maximum(df_team[bf_col], winsize*bf_per_game_def)
        df_team[h_bb_mod_col] = df_team[h_bb_col] + h_bb_per_ip_def*(df_team[ip_mod_col]-df_team[ip_col])
        df_team[h_bb_mod2_col] = df_team[h_bb_col] + h_bb_per_bf_def*(df_team[bf_mod_col]-df_team[bf_col])
        df_team[so_mod_col] = df_team[so_col] + so_per_bf_def*(df_team[bf_mod_col]-df_team[bf_col])
        df_team[tb_bb_mod_col] = (df_team[tb_col] + df_team[bb_col])+ tb_bb_perc_def*(df_team[bf_mod_col]-df_team[bf_col])
        df_team[whip_col] = df_team[h_bb_mod_col]/df_team[ip_mod_col]
        df_team[so_perc_col] = df_team[so_mod_col]/df_team[bf_mod_col]
        df_team[tb_bb_perc_col] = df_team[tb_bb_mod_col]/df_team[bf_mod_col]
        df_team[h_bb_perc_col] = df_team[h_bb_mod2_col]/df_team[bf_mod_col]
       
    df_team.set_index('date_dblhead', inplace=True)
    return(df_team)

In [21]:
bullpen_team_data_dict = {}
for team in df.team_v.unique():
    bullpen_team_data_dict[team] = get_bullpen_team_df(team)

  df_team[h_bb_col] = df_team[hit_col]+df_team[bb_col]
  df_team[xb_col] = df_team[double_col]+2*df_team[triple_col]+3*df_team[hr_col]
  df_team[tb_col] = df_team[hit_col]+df_team[xb_col]
  df_team[ip_mod_col] = np.maximum(df_team[ip_col], winsize*ip_per_game_def)
  df_team[bf_mod_col] = np.maximum(df_team[bf_col], winsize*bf_per_game_def)
  df_team[h_bb_mod_col] = df_team[h_bb_col] + h_bb_per_ip_def*(df_team[ip_mod_col]-df_team[ip_col])
  df_team[h_bb_mod2_col] = df_team[h_bb_col] + h_bb_per_bf_def*(df_team[bf_mod_col]-df_team[bf_col])
  df_team[so_mod_col] = df_team[so_col] + so_per_bf_def*(df_team[bf_mod_col]-df_team[bf_col])
  df_team[tb_bb_mod_col] = (df_team[tb_col] + df_team[bb_col])+ tb_bb_perc_def*(df_team[bf_mod_col]-df_team[bf_col])
  df_team[whip_col] = df_team[h_bb_mod_col]/df_team[ip_mod_col]
  df_team[so_perc_col] = df_team[so_mod_col]/df_team[bf_mod_col]
  df_team[tb_bb_perc_col] = df_team[tb_bb_mod_col]/df_team[bf_mod_col]
  df_team[h_bb_perc_col] = df_team[h_bb_mod2_c

In [22]:
np.array(bullpen_team_data_dict['NYN'].columns)

array(['date', 'dblheader_code', 'day_of_week', 'team', 'league',
       'game_no', 'runs', 'outs_total', 'day_night', 'completion_info',
       'forfeit_info', 'protest_info', 'ballpark_id', 'attendance',
       'game_minutes', 'linescore', 'AB', 'H', '2B', '3B', 'HR', 'RBI',
       'SH', 'SF', 'HBP', 'BB', 'IBB', 'SO', 'SB', 'CS', 'GIDP', 'CI',
       'LOB', 'P_num', 'ERind', 'ERteam', 'WP', 'balk', 'PO', 'ASST',
       'ERR', 'PB', 'DP', 'TP', 'ump_HB_id', 'ump_HB_name', 'ump_1B_id',
       'ump_1B_name', 'ump_2B_id', 'ump_2B_name', 'ump_3B_id',
       'ump_3B_name', 'ump_LF_id', 'ump_LF_name', 'ump_RF_id',
       'ump_RF_name', 'mgr_id', 'mgr_name', 'pitcher_id_w',
       'pitcher_name_w', 'pitcher_id_l', 'pitcher_name_l', 'pitcher_id_s',
       'pitcher_name_s', 'GWRBI_id', 'GWRBI_name', 'pitcher_start_id',
       'pitcher_start_name', 'batter1_name', 'batter1_id', 'batter1_pos',
       'batter2_name', 'batter2_id', 'batter2_pos', 'batter3_name',
       'batter3_id', 'batter3_pos'

In [23]:
raw_cols_to_add = ['Bpen_IP', 'Bpen_BFP', 'Bpen_R', 'Bpen_H', 'Bpen_HR', 'Bpen_2B',
       'Bpen_3B', 'Bpen_BB', 'Bpen_HBP', 'Bpen_SO',  'Bpen_rollsum_IP_10', 'Bpen_rollsum_H_10',
       'Bpen_rollsum_BFP_10', 'Bpen_rollsum_HR_10', 'Bpen_rollsum_R_10',
       'Bpen_rollsum_BB_10', 'Bpen_rollsum_SO_10', 'Bpen_rollsum_HBP_10',
       'Bpen_rollsum_2B_10', 'Bpen_rollsum_3B_10', 'Bpen_H_BB_roll_10',
       'Bpen_XB_roll_10', 'Bpen_TB_roll_10', 'Bpen_IP_mod_10',
       'Bpen_BF_mod_10', 'Bpen_H_BB_mod_10', 'Bpen_Bpen_H_BB_mod2_10',
       'Bpen_SO_mod_10', 'Bpen_TB_BB_mod_10', 'Bpen_WHIP_10',
       'Bpen_SO_perc_10', 'Bpen_TB_BB_perc_10', 'Bpen_H_BB_perc_10',
       'Bpen_rollsum_IP_35', 'Bpen_rollsum_H_35', 'Bpen_rollsum_BFP_35',
       'Bpen_rollsum_HR_35', 'Bpen_rollsum_R_35', 'Bpen_rollsum_BB_35',
       'Bpen_rollsum_SO_35', 'Bpen_rollsum_HBP_35', 'Bpen_rollsum_2B_35',
       'Bpen_rollsum_3B_35', 'Bpen_H_BB_roll_35', 'Bpen_XB_roll_35',
       'Bpen_TB_roll_35', 'Bpen_IP_mod_35', 'Bpen_BF_mod_35',
       'Bpen_H_BB_mod_35', 'Bpen_Bpen_H_BB_mod2_35', 'Bpen_SO_mod_35',
       'Bpen_TB_BB_mod_35', 'Bpen_WHIP_35', 'Bpen_SO_perc_35',
       'Bpen_TB_BB_perc_35', 'Bpen_H_BB_perc_35', 'Bpen_rollsum_IP_75',
       'Bpen_rollsum_H_75', 'Bpen_rollsum_BFP_75', 'Bpen_rollsum_HR_75',
       'Bpen_rollsum_R_75', 'Bpen_rollsum_BB_75', 'Bpen_rollsum_SO_75',
       'Bpen_rollsum_HBP_75', 'Bpen_rollsum_2B_75', 'Bpen_rollsum_3B_75',
       'Bpen_H_BB_roll_75', 'Bpen_XB_roll_75', 'Bpen_TB_roll_75',
       'Bpen_IP_mod_75', 'Bpen_BF_mod_75', 'Bpen_H_BB_mod_75',
       'Bpen_Bpen_H_BB_mod2_75', 'Bpen_SO_mod_75', 'Bpen_TB_BB_mod_75',
       'Bpen_WHIP_75', 'Bpen_SO_perc_75', 'Bpen_TB_BB_perc_75',
       'Bpen_H_BB_perc_75']

cols_to_add = [col+suff for col in raw_cols_to_add for suff in ['_h','_v']]
col_add_dict = {col:np.zeros(df.shape[0]) for col in cols_to_add}

In [24]:
for i in range(df.shape[0]):
    row = df.iloc[i,:]
    if i%1000==0:
        print(i)
    home_team = row['team_h']
    visit_team = row['team_v']
    date_dblhead = row['date_dblhead']
    curr_df = bullpen_team_data_dict[home_team]
    if date_dblhead in curr_df.index:
        for col in raw_cols_to_add:
            col_add_dict[col+'_h'][i] = curr_df.loc[date_dblhead,col]
    else:
            print(f'no match for {home_team} date {date_dblhead}')
    curr_df = bullpen_team_data_dict[visit_team]
    if date_dblhead in curr_df.index:
        for col in raw_cols_to_add:
            col_add_dict[col+'_v'][i] = curr_df.loc[date_dblhead,col]
    else:
        print(f'no match for {visit_team} date {date_dblhead}')

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000


In [25]:
for col in cols_to_add:
    df[col] = col_add_dict[col]

  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] 

In [26]:
df.to_csv('df_bp7.csv', index=False)