## Baseball Prediction: 5b - Adding Starting Pitching Features
Now that we have raw game-level data for each pitcher, we can derive features based on the starting pitchers to help our prediction model for individual games.

In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)



## Overall Plan of Attack
- For each starting pitcher we will load their raw data, create features for each game based on their previous performance, and then save the dataframe in a dictionary structure for easy lookup
- Then we can iterate through our game-level dataframe, add in the features for each starting pitcher, and use those to improve our model


In [25]:
df_gooden = pd.read_csv('pitching_data_goodd001.csv')

In [26]:
df_gooden.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 430 entries, 0 to 429
Data columns (total 31 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   at_vs        430 non-null    object 
 1   Opponent     430 non-null    object 
 2   League       430 non-null    object 
 3   GS           430 non-null    int64  
 4   CG           430 non-null    int64  
 5   SHO          430 non-null    int64  
 6   GF           430 non-null    int64  
 7   SV           430 non-null    int64  
 8   IP           430 non-null    float64
 9   H            430 non-null    int64  
 10  BFP          430 non-null    int64  
 11  HR           430 non-null    int64  
 12  R            430 non-null    int64  
 13  ER           430 non-null    int64  
 14  BB           430 non-null    int64  
 15  IB           430 non-null    int64  
 16  SO           430 non-null    int64  
 17  SH           430 non-null    int64  
 18  SF           430 non-null    int64  
 19  WP      

In [27]:
df_gooden.head(20)

Unnamed: 0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,Date,dblhead_num
0,AT,HOU,N,1,0,0,0,0,5.0,3,20,0,1,1,2,0,5,0,0,0,0,0,0,0,0,0,1,0,1.8,4- 7-1984,
1,AT,CHI,N,1,0,0,0,0,3.1,7,20,0,6,6,3,0,4,0,0,0,0,0,1,0,0,0,0,1,7.56,4-13-1984,
2,VS,MON,N,1,0,0,0,0,5.0,5,26,0,4,0,4,0,7,0,0,0,1,0,0,0,0,1,0,0,4.72,4-19-1984,
3,AT,MON,N,1,0,0,0,0,7.0,2,24,0,1,0,1,0,10,0,0,0,0,1,0,0,0,1,0,0,3.1,4-25-1984,
4,VS,CHI,N,1,0,0,0,0,7.0,4,30,0,1,1,5,0,10,0,0,1,0,0,2,0,0,0,1,0,2.63,5- 1-1984,
5,VS,HOU,N,1,0,0,0,0,2.1,6,15,0,8,8,2,0,4,0,0,0,0,1,0,0,0,0,0,1,4.85,5- 6-1984,
6,AT,LA,N,1,1,1,0,0,9.0,4,32,0,0,0,2,0,11,0,0,0,0,1,0,0,0,0,1,0,3.72,5-11-1984,
7,AT,SD,N,1,0,0,0,0,4.2,5,22,0,4,4,3,0,8,0,0,0,0,0,1,1,0,0,0,1,4.15,5-18-1984,
8,VS,LA,N,1,0,0,0,0,8.0,3,30,1,1,1,3,0,14,0,0,0,0,0,1,0,0,0,1,0,3.68,5-25-1984,
9,VS,STL,N,1,0,0,0,0,7.0,4,28,0,1,1,3,0,7,1,1,0,0,0,0,0,0,0,0,0,3.39,6- 1-1984,


In [28]:
df_gooden['IP_real'] = df_gooden['IP_real'] = (df_gooden.IP - (df_gooden.IP % 1)) + (df_gooden.IP % 1) * (10/3)
df_gooden.head(10)

Unnamed: 0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,Date,dblhead_num,IP_real
0,AT,HOU,N,1,0,0,0,0,5.0,3,20,0,1,1,2,0,5,0,0,0,0,0,0,0,0,0,1,0,1.8,4- 7-1984,,5.0
1,AT,CHI,N,1,0,0,0,0,3.1,7,20,0,6,6,3,0,4,0,0,0,0,0,1,0,0,0,0,1,7.56,4-13-1984,,3.333333
2,VS,MON,N,1,0,0,0,0,5.0,5,26,0,4,0,4,0,7,0,0,0,1,0,0,0,0,1,0,0,4.72,4-19-1984,,5.0
3,AT,MON,N,1,0,0,0,0,7.0,2,24,0,1,0,1,0,10,0,0,0,0,1,0,0,0,1,0,0,3.1,4-25-1984,,7.0
4,VS,CHI,N,1,0,0,0,0,7.0,4,30,0,1,1,5,0,10,0,0,1,0,0,2,0,0,0,1,0,2.63,5- 1-1984,,7.0
5,VS,HOU,N,1,0,0,0,0,2.1,6,15,0,8,8,2,0,4,0,0,0,0,1,0,0,0,0,0,1,4.85,5- 6-1984,,2.333333
6,AT,LA,N,1,1,1,0,0,9.0,4,32,0,0,0,2,0,11,0,0,0,0,1,0,0,0,0,1,0,3.72,5-11-1984,,9.0
7,AT,SD,N,1,0,0,0,0,4.2,5,22,0,4,4,3,0,8,0,0,0,0,0,1,1,0,0,0,1,4.15,5-18-1984,,4.666667
8,VS,LA,N,1,0,0,0,0,8.0,3,30,1,1,1,3,0,14,0,0,0,0,0,1,0,0,0,1,0,3.68,5-25-1984,,8.0
9,VS,STL,N,1,0,0,0,0,7.0,4,28,0,1,1,3,0,7,1,1,0,0,0,0,0,0,0,0,0,3.39,6- 1-1984,,7.0


In [29]:
df_gooden.columns

Index(['at_vs', 'Opponent', 'League', 'GS', 'CG', 'SHO', 'GF', 'SV', 'IP', 'H',
       'BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B', 'GDP', 'ROE', 'W', 'L', 'ERA', 'Date', 'dblhead_num',
       'IP_real'],
      dtype='object')

### Generating Features for a Starting Pitcher
- Want to creature features based on a lookback across $n$ games for each pitcher
- Need to aggregate statistics like innings pitched (IP), runs or earned runs given up, hits and walks given up, strikeouts, and so on
- We will also need to decide how to handle pitchers early in their career when they have relatively few games on which to base their performance.

In [30]:
def roll_column(df, col, winsize):
    # do the standard Pandas rolling calc
    t_col = df[col].rolling(winsize, closed='left').sum().to_numpy()
    
    # for the early columns, just do a rolling sum from the beginning
    t_col[:winsize] = np.concatenate(([0],df[col].iloc[:(winsize)].cumsum().to_numpy()[:-1]))

    return(t_col)

In [31]:
def load_and_process_pitch_df(p_id, filepath=''):
    fname = filepath+'pitching_data_'+p_id+'.csv'
    pitch_df = pd.read_csv(fname)
    
    # Convert date, fix dblhead_num to be 0,1,2
    pitch_df['Date'] = (pd.to_datetime(pitch_df.Date, format="%m-%d-%Y", errors="coerce")
                    .astype(str)
                    .str.replace('-', '')
                    .astype(int))
    pitch_df.dblhead_num.fillna(0, inplace=True)
    pitch_df['dblhead_num'] = pitch_df['dblhead_num'].astype(int)
    
    # Convert IP to proper mathematical format
    pitch_df['IP_real'] = (pitch_df.IP - (pitch_df.IP % 1)) + (pitch_df.IP % 1) * (10/3)
    
    cols_to_agg = ['IP_real', 'H','BFP', 'HR', 'R', 'ER', 'BB', 'IB', 'SO', 'SH', 'SF', 'WP', 'HBP', 'BK',
       '2B', '3B']
    winsizes = [10,35]
    for winsize in winsizes:
        for raw_col in cols_to_agg:
            new_colname = 'rollsum_'+raw_col+'_'+str(winsize)        
            pitch_df[new_colname] = roll_column(pitch_df, raw_col, winsize)

    
    er_per_ip_def = (5/9)
    h_bb_per_ip_def = 1.5
    h_bb_per_bf_def = .37
    so_per_bf_def = .2
    ip_per_game_def = 3
    bf_per_game_def = 12
    tb_bb_perc_def = .45
    for winsize in winsizes:
        hit_col = 'rollsum_H_'+str(winsize)
        bb_col = 'rollsum_BB_'+str(winsize)
        h_bb_col = 'H_BB_roll_'+str(winsize)
        double_col = 'rollsum_2B_'+str(winsize)
        triple_col = 'rollsum_3B_'+str(winsize)
        hr_col = 'rollsum_HR_'+str(winsize)
        xb_col = 'XB_roll_'+str(winsize)
        tb_col = 'TB_roll_'+str(winsize)
        so_col = 'rollsum_SO_'+str(winsize)
        so_mod_col = 'SO_mod_'+str(winsize)
        ip_col = 'rollsum_IP_real_'+str(winsize)
        ip_mod_col = 'IP_mod_'+str(winsize)
        er_col = 'rollsum_ER_'+str(winsize)
        er_mod_col = 'ER_mod_'+str(winsize)
        bf_col = 'rollsum_BFP_'+str(winsize)
        bf_mod_col = 'BF_mod_'+str(winsize)
        era_col = 'ERA_'+str(winsize)
        whip_col = 'WHIP_'+str(winsize)
        so_perc_col = 'SO_perc_'+str(winsize)
        h_bb_perc_col = 'H_BB_perc_'+str(winsize)
        h_bb_mod_col = 'H_BB_mod_'+str(winsize)
        h_bb_mod2_col = 'H_BB_mod2_'+str(winsize)
        tb_bb_mod_col = 'TB_BB_mod_'+str(winsize)
        tb_bb_perc_col = 'TB_BB_perc_'+str(winsize)
        pitch_df[h_bb_col] = pitch_df[hit_col]+pitch_df[bb_col]
        pitch_df[xb_col] = pitch_df[double_col]+2*pitch_df[triple_col]+2*pitch_df[hr_col]
        pitch_df[tb_col] = pitch_df[hit_col]+pitch_df[xb_col]
        pitch_df[ip_mod_col] = np.maximum(pitch_df[ip_col], winsize*ip_per_game_def)
        pitch_df[bf_mod_col] = np.maximum(pitch_df[bf_col], winsize*bf_per_game_def)
        pitch_df[er_mod_col] = pitch_df[er_col] + er_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[h_bb_mod_col] = pitch_df[h_bb_col] + h_bb_per_ip_def*(pitch_df[ip_mod_col]-pitch_df[ip_col])
        pitch_df[h_bb_mod2_col] = pitch_df[h_bb_col] + h_bb_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[so_mod_col] = pitch_df[so_col] + so_per_bf_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[tb_bb_mod_col] = (pitch_df[tb_col] + pitch_df[bb_col])+ tb_bb_perc_def*(pitch_df[bf_mod_col]-pitch_df[bf_col])
        pitch_df[era_col] = (pitch_df[er_mod_col]/pitch_df[ip_mod_col])*9
        pitch_df[whip_col] = pitch_df[h_bb_mod_col]/pitch_df[ip_mod_col]
        pitch_df[so_perc_col] = pitch_df[so_mod_col]/pitch_df[bf_mod_col]
        pitch_df[tb_bb_perc_col] = pitch_df[tb_bb_mod_col]/pitch_df[bf_mod_col]
        pitch_df[h_bb_perc_col] = pitch_df[h_bb_mod2_col]/pitch_df[bf_mod_col]
    pitch_df['date_dblhead'] = (pitch_df['Date'].astype(str) + pitch_df['dblhead_num'].astype(str)).astype(int)
    pitch_df.set_index('date_dblhead', inplace=True)
    return(pitch_df)

In [32]:
df_gooden.head(50)

Unnamed: 0,at_vs,Opponent,League,GS,CG,SHO,GF,SV,IP,H,BFP,HR,R,ER,BB,IB,SO,SH,SF,WP,HBP,BK,2B,3B,GDP,ROE,W,L,ERA,Date,dblhead_num,IP_real
0,AT,HOU,N,1,0,0,0,0,5.0,3,20,0,1,1,2,0,5,0,0,0,0,0,0,0,0,0,1,0,1.8,4- 7-1984,,5.0
1,AT,CHI,N,1,0,0,0,0,3.1,7,20,0,6,6,3,0,4,0,0,0,0,0,1,0,0,0,0,1,7.56,4-13-1984,,3.333333
2,VS,MON,N,1,0,0,0,0,5.0,5,26,0,4,0,4,0,7,0,0,0,1,0,0,0,0,1,0,0,4.72,4-19-1984,,5.0
3,AT,MON,N,1,0,0,0,0,7.0,2,24,0,1,0,1,0,10,0,0,0,0,1,0,0,0,1,0,0,3.1,4-25-1984,,7.0
4,VS,CHI,N,1,0,0,0,0,7.0,4,30,0,1,1,5,0,10,0,0,1,0,0,2,0,0,0,1,0,2.63,5- 1-1984,,7.0
5,VS,HOU,N,1,0,0,0,0,2.1,6,15,0,8,8,2,0,4,0,0,0,0,1,0,0,0,0,0,1,4.85,5- 6-1984,,2.333333
6,AT,LA,N,1,1,1,0,0,9.0,4,32,0,0,0,2,0,11,0,0,0,0,1,0,0,0,0,1,0,3.72,5-11-1984,,9.0
7,AT,SD,N,1,0,0,0,0,4.2,5,22,0,4,4,3,0,8,0,0,0,0,0,1,1,0,0,0,1,4.15,5-18-1984,,4.666667
8,VS,LA,N,1,0,0,0,0,8.0,3,30,1,1,1,3,0,14,0,0,0,0,0,1,0,0,0,1,0,3.68,5-25-1984,,8.0
9,VS,STL,N,1,0,0,0,0,7.0,4,28,0,1,1,3,0,7,1,1,0,0,0,0,0,0,0,0,0,3.39,6- 1-1984,,7.0


## Load in our game level data

In [33]:
df=pd.read_csv('df_bp3.csv')

  df=pd.read_csv('df_bp3.csv')


In [34]:
start_pitchers_h = df.pitcher_start_id_h.unique()
start_pitchers_v = df.pitcher_start_id_v.unique()
start_pitchers_all = np.union1d(start_pitchers_h, start_pitchers_v)


In [35]:
pitcher_data_dict = {}
for i, p_id in enumerate(start_pitchers_all):
    if i%100==0:
        print(i)
    pitcher_data_dict[p_id] = load_and_process_pitch_df(p_id)

0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

100


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

200


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

300


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

400


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

500


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

600


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

700


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

800


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

900


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1000


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1100


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1200


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1300


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1400


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1500


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1600


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1700


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1800


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

1900


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2000


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2100


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2200


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2300


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2400


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2500


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2600


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2700


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2800


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

2900


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  pitch_df.dblhead_num.fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

In [36]:
raw_cols_to_add = ['IP_real', 'rollsum_IP_real_10', 'rollsum_H_10',
       'rollsum_BFP_10', 'rollsum_HR_10', 'rollsum_R_10', 'rollsum_ER_10',
       'rollsum_BB_10', 'rollsum_IB_10', 'rollsum_SO_10', 'rollsum_SH_10',
       'rollsum_SF_10', 'rollsum_WP_10', 'rollsum_HBP_10',
       'rollsum_BK_10', 'rollsum_2B_10', 'rollsum_3B_10',
       'rollsum_IP_real_35', 'rollsum_H_35', 'rollsum_BFP_35',
       'rollsum_HR_35', 'rollsum_R_35', 'rollsum_ER_35', 'rollsum_BB_35',
       'rollsum_IB_35', 'rollsum_SO_35', 'rollsum_SH_35', 'rollsum_SF_35',
       'rollsum_WP_35', 'rollsum_HBP_35', 'rollsum_BK_35',
       'rollsum_2B_35', 'rollsum_3B_35', 'H_BB_roll_10', 'XB_roll_10',
       'TB_roll_10', 'IP_mod_10', 'BF_mod_10', 'ER_mod_10', 'H_BB_mod_10', 'H_BB_perc_10',
       'SO_mod_10', 'TB_BB_mod_10', 'ERA_10', 'WHIP_10', 'SO_perc_10',
       'TB_BB_perc_10', 'H_BB_roll_35', 'XB_roll_35', 'TB_roll_35',
       'IP_mod_35', 'BF_mod_35', 'ER_mod_35', 'H_BB_mod_35','H_BB_perc_35', 'SO_mod_35',
       'TB_BB_mod_35', 'ERA_35', 'WHIP_35', 'SO_perc_35', 'TB_BB_perc_35']
cols_to_add = [col+suff for col in raw_cols_to_add for suff in ['_h','_v']]

col_add_dict = {col:np.zeros(df.shape[0]) for col in cols_to_add}

In [37]:
for i in range(df.shape[0]):
    row = df.iloc[i,:]
    if i%1000==0:
        print(i)
    sp_id_v = row['pitcher_start_id_v']
    sp_id_h = row['pitcher_start_id_h']
    date_dblhead = row['date_dblhead']
    if sp_id_v in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_v]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict[col+'_v'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_v} date {date_dblhead}')
    if sp_id_h in pitcher_data_dict.keys():
        curr_df = pitcher_data_dict[sp_id_h]
        if date_dblhead in curr_df.index:
            for col in raw_cols_to_add:
                col_add_dict[col+'_h'][i] = curr_df.loc[date_dblhead,col]
        else:
            print(f'no match for {sp_id_h} date {date_dblhead}')

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000
57000
58000
59000
60000
61000
62000
63000
64000
65000
66000
67000
68000
69000
70000
71000
72000
73000
74000
75000
76000
77000
78000
79000
80000
81000
82000
83000
84000
85000
86000
87000
88000
89000
90000
91000
92000


In [38]:
for col in cols_to_add:
    df[col] = col_add_dict[col]

  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]
  df[col] = col_add_dict[col]


In [39]:
df.sample(5)

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,IP_real_h,IP_real_v,rollsum_IP_real_10_h,rollsum_IP_real_10_v,rollsum_H_10_h,rollsum_H_10_v,rollsum_BFP_10_h,rollsum_BFP_10_v,rollsum_HR_10_h,rollsum_HR_10_v,rollsum_R_10_h,rollsum_R_10_v,rollsum_ER_10_h,rollsum_ER_10_v,rollsum_BB_10_h,rollsum_BB_10_v,rollsum_IB_10_h,rollsum_IB_10_v,rollsum_SO_10_h,rollsum_SO_10_v,rollsum_SH_10_h,rollsum_SH_10_v,rollsum_SF_10_h,rollsum_SF_10_v,rollsum_WP_10_h,rollsum_WP_10_v,rollsum_HBP_10_h,rollsum_HBP_10_v,rollsum_BK_10_h,rollsum_BK_10_v,rollsum_2B_10_h,rollsum_2B_10_v,rollsum_3B_10_h,rollsum_3B_10_v,rollsum_IP_real_35_h,rollsum_IP_real_35_v,rollsum_H_35_h,rollsum_H_35_v,rollsum_BFP_35_h,rollsum_BFP_35_v,rollsum_HR_35_h,rollsum_HR_35_v,rollsum_R_35_h,rollsum_R_35_v,rollsum_ER_35_h,rollsum_ER_35_v,rollsum_BB_35_h,rollsum_BB_35_v,rollsum_IB_35_h,rollsum_IB_35_v,rollsum_SO_35_h,rollsum_SO_35_v,rollsum_SH_35_h,rollsum_SH_35_v,rollsum_SF_35_h,rollsum_SF_35_v,rollsum_WP_35_h,rollsum_WP_35_v,rollsum_HBP_35_h,rollsum_HBP_35_v,rollsum_BK_35_h,rollsum_BK_35_v,rollsum_2B_35_h,rollsum_2B_35_v,rollsum_3B_35_h,rollsum_3B_35_v,H_BB_roll_10_h,H_BB_roll_10_v,XB_roll_10_h,XB_roll_10_v,TB_roll_10_h,TB_roll_10_v,IP_mod_10_h,IP_mod_10_v,BF_mod_10_h,BF_mod_10_v,ER_mod_10_h,ER_mod_10_v,H_BB_mod_10_h,H_BB_mod_10_v,H_BB_perc_10_h,H_BB_perc_10_v,SO_mod_10_h,SO_mod_10_v,TB_BB_mod_10_h,TB_BB_mod_10_v,ERA_10_h,ERA_10_v,WHIP_10_h,WHIP_10_v,SO_perc_10_h,SO_perc_10_v,TB_BB_perc_10_h,TB_BB_perc_10_v,H_BB_roll_35_h,H_BB_roll_35_v,XB_roll_35_h,XB_roll_35_v,TB_roll_35_h,TB_roll_35_v,IP_mod_35_h,IP_mod_35_v,BF_mod_35_h,BF_mod_35_v,ER_mod_35_h,ER_mod_35_v,H_BB_mod_35_h,H_BB_mod_35_v,H_BB_perc_35_h,H_BB_perc_35_v,SO_mod_35_h,SO_mod_35_v,TB_BB_mod_35_h,TB_BB_mod_35_v,ERA_35_h,ERA_35_v,WHIP_35_h,WHIP_35_v,SO_perc_35_h,SO_perc_35_v,TB_BB_perc_35_h,TB_BB_perc_35_v
29406,19940514,0,Sat,PIT,NL,34,PHI,NL,36,2,3,51,N,,,,PHI12,39916.0,128,10001000,00200100x,31,8,1,1,0,2,0,1,0,4,0,4,0,0,2,0,7,1,3,3,0,0,24,13,0,0,2,0,28,7,1,0,1,3,1,0,0,2,0,4,1,0,2,0,4,3,2,2,0,0,27,11,0,0,3,0,wintm901,Mike Winters,bellw901,Wally Bell,crawj901,Jerry Crawford,poncl901,Larry Poncino,,(none),,(none),leylj801,Jim Leyland,fregj101,Jim Fregosi,willm005,Mike Williams,cooks001,Steve Cooke,joned001,Doug Jones,holld001,Dave Hollins,cooks001,Steve Cooke,willm005,Mike Williams,marta001,Al Martin,7,garcc001,Carlos Garcia,4,vansa001,Andy Van Slyke,8,kingj001,Jeff King,5,merco002,Orlando Merced,9,huntb001,Brian Hunter,3,parrl001,Lance Parrish,2,folet001,Tom Foley,6,cooks001,Steve Cooke,1,dyksl001,Lenny Dykstra,8,duncm001,Mariano Duncan,4,holld001,Dave Hollins,5,incap001,Pete Incaviglia,7,jordr001,Ricky Jordan,3,chamw001,Wes Chamberlain,9,pratt001,Todd Pratt,2,batik002,Kim Batiste,6,willm005,Mike Williams,1,,Y,1994,1,1,5,199405140,0.277106,0.267357,0.352323,0.333829,0.428972,0.393312,0.781294,0.727141,86.0,89.0,27.0,53.0,146.0,109.0,0.263006,0.272727,0.337662,0.350397,0.403661,0.383399,0.741323,0.733796,14.0,12.0,4.0,6.0,23.0,20.0,0.0,0.0,0.5,0.0,,7.0,8.0,30.333333,58.0,29.0,71.0,132.0,257.0,1.0,8.0,13.0,35.0,12.0,34.0,16.0,21.0,4.0,3.0,25.0,28.0,1.0,3.0,1.0,3.0,2.0,2.0,0.0,2.0,0.0,0.0,6.0,17.0,0.0,3.0,87.666667,230.0,87.0,235.0,377.0,970.0,8.0,25.0,54.0,112.0,49.0,103.0,35.0,68.0,5.0,7.0,43.0,142.0,3.0,15.0,2.0,8.0,2.0,4.0,0.0,4.0,0.0,2.0,21.0,61.0,3.0,7.0,45.0,92.0,8.0,39.0,37.0,110.0,30.333333,58.0,132.0,257.0,12.0,34.0,45.0,92.0,0.340909,0.357977,25.0,28.0,53.0,131.0,3.56044,5.275862,1.483516,1.586207,0.189394,0.108949,0.401515,0.509728,122.0,303.0,43.0,125.0,130.0,360.0,105.0,230.0,420.0,970.0,58.62963,103.0,148.0,303.0,0.328357,0.312371,51.6,142.0,184.35,428.0,5.025397,4.030435,1.409524,1.317391,0.122857,0.146392,0.438929,0.441237
85629,20170930,0,Sat,CIN,NL,161,CHN,NL,161,0,9,51,D,,,,CHI11,41493.0,184,0,02200410x,33,8,1,0,0,0,0,0,1,1,0,15,0,1,1,0,8,4,9,9,3,0,24,10,1,0,1,0,33,10,1,0,2,9,0,1,0,7,0,7,0,1,0,0,8,5,0,0,1,0,27,6,0,0,1,0,wintm901,Mike Winters,fostm901,Marty Foster,tumpj901,John Tumpane,muchm901,Mike Muchlinski,,(none),,(none),pricb801,Bryan Price,maddj801,Joe Maddon,lestj001,Jon Lester,stepj002,Jackson Stephens,,(none),schwk001,Kyle Schwarber,stepj002,Jackson Stephens,lestj001,Jon Lester,hamib001,Billy Hamilton,8,suare001,Eugenio Suarez,5,vottj001,Joey Votto,3,genns001,Scooter Gennett,4,duvaa001,Adam Duvall,7,sches001,Scott Schebler,9,peraj003,Jose Peraza,6,turns002,Stuart Turner,2,stepj002,Jackson Stephens,1,happi001,Ian Happ,8,bryak001,Kris Bryant,5,rizza001,Anthony Rizzo,3,contw001,Willson Contreras,2,schwk001,Kyle Schwarber,7,russa002,Addison Russell,6,heywj001,Jason Heyward,9,baezj001,Javier Baez,4,lestj001,Jon Lester,1,,Y,2017,9,1,9,201709300,0.255636,0.253331,0.33181,0.323802,0.438,0.433108,0.76981,0.75691,63.0,125.0,30.0,37.0,94.0,78.0,0.271556,0.244376,0.350133,0.318266,0.435084,0.400818,0.785217,0.719084,14.0,19.0,7.0,7.0,11.0,15.0,0.0,0.0,0.5,0.0,,5.0,4.0,52.666667,21.0,61.0,15.0,238.0,83.0,9.0,5.0,36.0,9.0,33.0,9.0,22.0,6.0,1.0,0.0,47.0,16.0,1.0,0.0,2.0,0.0,3.0,0.0,2.0,1.0,0.0,0.0,10.0,3.0,1.0,0.0,202.333333,21.0,193.0,15.0,848.0,83.0,27.0,5.0,107.0,9.0,93.0,9.0,65.0,6.0,3.0,0.0,199.0,16.0,5.0,0.0,5.0,0.0,3.0,0.0,6.0,1.0,0.0,0.0,37.0,3.0,4.0,0.0,83.0,21.0,30.0,13.0,91.0,28.0,52.666667,30.0,238.0,120.0,33.0,14.0,83.0,34.5,0.348739,0.289083,47.0,23.4,113.0,50.65,5.639241,4.2,1.575949,1.15,0.197479,0.195,0.47479,0.422083,258.0,21.0,99.0,13.0,292.0,28.0,202.333333,105.0,848.0,420.0,93.0,55.666667,258.0,147.0,0.304245,0.346881,199.0,83.4,357.0,185.65,4.136738,4.771429,1.275124,1.4,0.23467,0.198571,0.420991,0.442024
75993,20140404,0,Fri,MIN,AL,4,CLE,AL,4,2,7,51,N,,,,CLE08,41274.0,181,200000000,00000340x,33,9,3,0,1,2,0,1,0,3,1,11,0,1,0,0,8,4,7,7,0,0,24,12,0,0,0,0,33,10,3,0,2,7,1,0,0,4,1,5,0,0,0,0,7,5,2,2,0,0,27,7,0,0,1,0,conrc901,Chris Conroy,bakej902,Jordan Baker,mealj901,Jerry Meals,emmep901,Paul Emmel,,(none),,(none),gardr001,Ron Gardenhire,frant001,Terry Francona,outmj001,Josh Outman,pelfm001,Mike Pelfrey,,(none),swisn001,Nick Swisher,pelfm001,Mike Pelfrey,salad001,Danny Salazar,dozib001,Brian Dozier,4,mauej001,Joe Mauer,3,willj004,Josh Willingham,7,colac001,Chris Colabello,10,plout001,Trevor Plouffe,5,arcio001,Oswaldo Arcia,9,suzuk001,Kurt Suzuki,2,hicka001,Aaron Hicks,8,florp001,Pedro Florimon,6,morgn001,Nyjer Morgan,8,swisn001,Nick Swisher,3,kipnj001,Jason Kipnis,4,santc002,Carlos Santana,5,branm003,Michael Brantley,7,cabra002,Asdrubal Cabrera,6,murpd005,David Murphy,9,gomey001,Yan Gomes,2,chisl001,Lonnie Chisenhall,10,,Y,2014,5,1,9,201404040,0.253625,0.242115,0.323407,0.307969,0.407231,0.379749,0.730638,0.687718,119.0,52.0,35.0,33.0,97.0,80.0,0.262575,0.250239,0.336652,0.319167,0.417505,0.366762,0.754157,0.68593,24.0,10.0,7.0,6.0,13.0,23.0,0.0,0.0,0.5,0.0,,5.666667,5.333333,52.0,54.333333,44.0,62.0,211.0,247.0,7.0,3.0,18.0,32.0,18.0,30.0,15.0,23.0,0.0,0.0,65.0,42.0,1.0,0.0,0.0,5.0,3.0,1.0,0.0,3.0,0.0,0.0,6.0,9.0,1.0,1.0,52.0,188.333333,44.0,233.0,211.0,838.0,7.0,14.0,18.0,109.0,18.0,103.0,15.0,60.0,0.0,1.0,65.0,123.0,1.0,3.0,0.0,8.0,3.0,2.0,0.0,6.0,0.0,0.0,6.0,40.0,1.0,5.0,59.0,85.0,22.0,17.0,66.0,79.0,52.0,54.333333,211.0,247.0,18.0,30.0,59.0,85.0,0.279621,0.34413,65.0,42.0,81.0,102.0,3.115385,4.969325,1.134615,1.564417,0.308057,0.17004,0.383886,0.412955,59.0,293.0,22.0,78.0,66.0,311.0,105.0,188.333333,420.0,838.0,47.444444,103.0,138.5,293.0,0.324595,0.349642,106.8,123.0,175.05,371.0,4.066667,4.922124,1.319048,1.555752,0.254286,0.146778,0.416786,0.442721
75851,20130923,0,Mon,MIL,NL,156,ATL,NL,156,5,0,54,N,,,,ATL02,19893.0,149,100020020,000000000,38,11,2,0,3,4,0,0,0,0,0,8,0,0,0,0,6,3,0,0,1,0,27,13,0,0,1,0,29,3,0,1,0,0,0,0,0,3,0,6,1,0,1,0,5,3,4,4,0,0,27,8,1,0,0,0,herna901,Angel Hernandez,demud901,Dana DeMuth,nauep901,Paul Nauert,eddid901,Doug Eddings,,(none),,(none),roenr001,Ron Roenicke,gonzf801,Fredi Gonzalez,estrm001,Marco Estrada,minom001,Mike Minor,,(none),gomec002,Carlos Gomez,estrm001,Marco Estrada,minom001,Mike Minor,aokin001,Nori Aoki,9,gomec002,Carlos Gomez,8,lucrj001,Jonathan Lucroy,2,ramia001,Aramis Ramirez,5,davik003,Khris Davis,7,halts002,Sean Halton,3,betay001,Yuniesky Betancourt,4,bianj001,Jeff Bianchi,6,estrm001,Marco Estrada,1,heywj001,Jason Heyward,8,schaj002,Jordan Schafer,9,uptoj001,Justin Upton,7,johnc003,Chris Johnson,5,gatte001,Evan Gattis,2,simma001,Andrelton Simmons,6,terdj001,Joey Terdoslavich,3,janip001,Paul Janish,4,minom001,Mike Minor,1,,Y,2013,-5,0,5,201309230,0.247193,0.251693,0.315252,0.303696,0.398859,0.398133,0.714111,0.701829,64.0,142.0,32.0,50.0,84.0,115.0,0.229592,0.252546,0.294393,0.319741,0.366327,0.382892,0.660719,0.702633,16.0,24.0,8.0,8.0,12.0,17.0,0.0,0.0,0.5,0.0,,7.0,7.0,61.666667,62.666667,53.0,49.0,244.0,238.0,6.0,8.0,26.0,23.0,25.0,23.0,15.0,9.0,0.0,0.0,46.0,58.0,3.0,0.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,11.0,7.0,0.0,1.0,222.666667,204.333333,177.0,192.0,877.0,833.0,21.0,26.0,77.0,93.0,71.0,87.0,52.0,46.0,2.0,0.0,197.0,195.0,6.0,9.0,7.0,4.0,6.0,6.0,2.0,2.0,0.0,1.0,45.0,37.0,3.0,5.0,68.0,58.0,23.0,25.0,76.0,74.0,61.666667,62.666667,244.0,238.0,25.0,23.0,68.0,58.0,0.278689,0.243697,46.0,58.0,91.0,83.0,3.648649,3.303191,1.102703,0.925532,0.188525,0.243697,0.372951,0.348739,229.0,238.0,93.0,99.0,270.0,291.0,222.666667,204.333333,877.0,833.0,71.0,87.0,229.0,238.0,0.261117,0.285714,197.0,195.0,322.0,337.0,2.86976,3.831974,1.028443,1.164763,0.224629,0.234094,0.367161,0.404562
82685,20160824,0,Wed,SFN,NL,126,LAN,NL,126,0,1,51,N,,,,LOS03,43957.0,167,0,00010000x,33,6,0,0,0,0,0,0,0,2,0,7,0,0,0,0,8,3,1,1,0,0,24,12,0,0,1,0,28,5,1,0,1,1,0,0,0,1,0,8,0,1,0,0,4,5,0,0,0,0,27,7,0,0,0,0,torrc901,Carlos Torres,holbs901,Sam Holbrook,davig901,Gerry Davis,drakr901,Rob Drake,,(none),,(none),bochb002,Bruce Bochy,robed001,Dave Roberts,hillr001,Rich Hill,cuetj001,Johnny Cueto,jansk001,Kenley Jansen,turnj001,Justin Turner,cuetj001,Johnny Cueto,hillr001,Rich Hill,nunee002,Eduardo Nunez,5,pagaa001,Angel Pagan,7,poseb001,Buster Posey,2,pench001,Hunter Pence,9,crawb001,Brandon Crawford,6,herng001,Gorkys Hernandez,8,beltb001,Brandon Belt,3,adrie001,Ehire Adrianza,4,cuetj001,Johnny Cueto,1,utlec001,Chase Utley,4,seagc001,Corey Seager,6,turnj001,Justin Turner,5,gonza003,Adrian Gonzalez,3,grany001,Yasmani Grandal,2,reddj001,Josh Reddick,9,pedej001,Joc Pederson,8,kendh001,Howie Kendrick,7,hillr001,Rich Hill,1,,Y,2016,1,1,1,201608240,0.249458,0.261276,0.316793,0.330238,0.408123,0.402156,0.724915,0.732394,64.0,93.0,26.0,41.0,76.0,84.0,0.276794,0.259013,0.329879,0.334753,0.476235,0.388994,0.806114,0.723747,8.0,16.0,4.0,8.0,15.0,14.0,0.0,0.0,0.5,0.0,,6.0,6.0,57.0,64.333333,35.0,69.0,225.0,270.0,1.0,10.0,12.0,32.0,12.0,31.0,19.0,12.0,0.0,1.0,61.0,56.0,0.0,3.0,1.0,2.0,0.0,1.0,4.0,2.0,0.0,0.0,5.0,10.0,1.0,1.0,110.666667,233.0,77.0,241.0,448.0,962.0,4.0,23.0,29.0,99.0,26.0,94.0,39.0,46.0,1.0,1.0,135.0,191.0,0.0,7.0,1.0,4.0,1.0,3.0,11.0,9.0,0.0,1.0,9.0,42.0,2.0,4.0,54.0,81.0,9.0,32.0,44.0,101.0,57.0,64.333333,225.0,270.0,12.0,31.0,54.0,81.0,0.24,0.3,61.0,56.0,63.0,113.0,1.894737,4.336788,0.947368,1.259067,0.271111,0.207407,0.28,0.418519,116.0,287.0,21.0,96.0,98.0,337.0,110.666667,233.0,448.0,962.0,26.0,94.0,116.0,287.0,0.258929,0.298337,135.0,191.0,137.0,383.0,2.114458,3.630901,1.048193,1.23176,0.301339,0.198545,0.305804,0.398129


In [40]:
df.shape

(92946, 321)

In [41]:
(df.IP_mod_10_h==0).sum()

0

In [42]:
(df.IP_mod_10_v==0).sum()

0

In [43]:
df.loc[df.IP_mod_10_v==0]

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,IP_real_h,IP_real_v,rollsum_IP_real_10_h,rollsum_IP_real_10_v,rollsum_H_10_h,rollsum_H_10_v,rollsum_BFP_10_h,rollsum_BFP_10_v,rollsum_HR_10_h,rollsum_HR_10_v,rollsum_R_10_h,rollsum_R_10_v,rollsum_ER_10_h,rollsum_ER_10_v,rollsum_BB_10_h,rollsum_BB_10_v,rollsum_IB_10_h,rollsum_IB_10_v,rollsum_SO_10_h,rollsum_SO_10_v,rollsum_SH_10_h,rollsum_SH_10_v,rollsum_SF_10_h,rollsum_SF_10_v,rollsum_WP_10_h,rollsum_WP_10_v,rollsum_HBP_10_h,rollsum_HBP_10_v,rollsum_BK_10_h,rollsum_BK_10_v,rollsum_2B_10_h,rollsum_2B_10_v,rollsum_3B_10_h,rollsum_3B_10_v,rollsum_IP_real_35_h,rollsum_IP_real_35_v,rollsum_H_35_h,rollsum_H_35_v,rollsum_BFP_35_h,rollsum_BFP_35_v,rollsum_HR_35_h,rollsum_HR_35_v,rollsum_R_35_h,rollsum_R_35_v,rollsum_ER_35_h,rollsum_ER_35_v,rollsum_BB_35_h,rollsum_BB_35_v,rollsum_IB_35_h,rollsum_IB_35_v,rollsum_SO_35_h,rollsum_SO_35_v,rollsum_SH_35_h,rollsum_SH_35_v,rollsum_SF_35_h,rollsum_SF_35_v,rollsum_WP_35_h,rollsum_WP_35_v,rollsum_HBP_35_h,rollsum_HBP_35_v,rollsum_BK_35_h,rollsum_BK_35_v,rollsum_2B_35_h,rollsum_2B_35_v,rollsum_3B_35_h,rollsum_3B_35_v,H_BB_roll_10_h,H_BB_roll_10_v,XB_roll_10_h,XB_roll_10_v,TB_roll_10_h,TB_roll_10_v,IP_mod_10_h,IP_mod_10_v,BF_mod_10_h,BF_mod_10_v,ER_mod_10_h,ER_mod_10_v,H_BB_mod_10_h,H_BB_mod_10_v,H_BB_perc_10_h,H_BB_perc_10_v,SO_mod_10_h,SO_mod_10_v,TB_BB_mod_10_h,TB_BB_mod_10_v,ERA_10_h,ERA_10_v,WHIP_10_h,WHIP_10_v,SO_perc_10_h,SO_perc_10_v,TB_BB_perc_10_h,TB_BB_perc_10_v,H_BB_roll_35_h,H_BB_roll_35_v,XB_roll_35_h,XB_roll_35_v,TB_roll_35_h,TB_roll_35_v,IP_mod_35_h,IP_mod_35_v,BF_mod_35_h,BF_mod_35_v,ER_mod_35_h,ER_mod_35_v,H_BB_mod_35_h,H_BB_mod_35_v,H_BB_perc_35_h,H_BB_perc_35_v,SO_mod_35_h,SO_mod_35_v,TB_BB_mod_35_h,TB_BB_mod_35_v,ERA_35_h,ERA_35_v,WHIP_35_h,WHIP_35_v,SO_perc_35_h,SO_perc_35_v,TB_BB_perc_35_h,TB_BB_perc_35_v


In [44]:
df.loc[df.IP_mod_10_h==0]

Unnamed: 0,date,dblheader_code,day_of_week,team_v,league_v,game_no_v,team_h,league_h,game_no_h,runs_v,runs_h,outs_total,day_night,completion_info,forfeit_info,protest_info,ballpark_id,attendance,game_minutes,linescore_v,linescore_h,AB_v,H_v,2B_v,3B_v,HR_v,RBI_v,SH_v,SF_v,HBP_v,BB_v,IBB_v,SO_v,SB_v,CS_v,GIDP_v,CI_v,LOB_v,P_num_v,ERind_v,ERteam_v,WP_v,balk_v,PO_v,ASST_v,ERR_v,PB_v,DP_v,TP_v,AB_h,H_h,2B_h,3B_h,HR_h,RBI_h,SH_h,SF_h,HBP_h,BB_h,IBB_h,SO_h,SB_h,CS_h,GIDP_h,CI_h,LOB_h,P_num_h,ERind_h,ERteam_h,WP_h,balk_h,PO_h,ASST_h,ERR_h,PB_h,DP_h,TP_h,ump_HB_id,ump_HB_name,ump_1B_id,ump_1B_name,ump_2B_id,ump_2B_name,ump_3B_id,ump_3B_name,ump_LF_id,ump_LF_name,ump_RF_id,ump_RF_name,mgr_id_v,mgr_name_v,mgr_id_h,mgr_name_h,pitcher_id_w,pitcher_name_w,pitcher_id_l,pitcher_name_l,pitcher_id_s,pitcher_name_s,GWRBI_id,GWRBI_name,pitcher_start_id_v,pitcher_start_name_v,pitcher_start_id_h,pitcher_start_name_h,batter1_name_v,batter1_id_v,batter1_pos_v,batter2_name_v,batter2_id_v,batter2_pos_v,batter3_name_v,batter3_id_v,batter3_pos_v,batter4_name_v,batter4_id_v,batter4_pos_v,batter5_name_v,batter5_id_v,batter5_pos_v,batter6_name_v,batter6_id_v,batter6_pos_v,batter7_name_v,batter7_id_v,batter7_pos_v,batter8_name_v,batter8_id_v,batter8_pos_v,batter9_name_v,batter9_id_v,batter9_pos_v,batter1_name_h,batter1_id_h,batter1_pos_h,batter2_name_h,batter2_id_h,batter2_pos_h,batter3_name_h,batter3_id_h,batter3_pos_h,batter4_name_h,batter4_id_h,batter4_pos_h,batter5_name_h,batter5_id_h,batter5_pos_h,batter6_name_h,batter6_id_h,batter6_pos_h,batter7_name_h,batter7_id_h,batter7_pos_h,batter8_name_h,batter8_id_h,batter8_pos_h,batter9_name_h,batter9_id_h,batter9_pos_h,misc_info,acqui_info,season,run_diff,home_victory,run_total,date_dblhead,BATAVG_162_h,BATAVG_162_v,OBP_162_h,OBP_162_v,SLG_162_h,SLG_162_v,OBS_162_h,OBS_162_v,SB_162_h,SB_162_v,CS_162_h,CS_162_v,ERR_162_h,ERR_162_v,BATAVG_30_h,BATAVG_30_v,OBP_30_h,OBP_30_v,SLG_30_h,SLG_30_v,OBS_30_h,OBS_30_v,SB_30_h,SB_30_v,CS_30_h,CS_30_v,ERR_30_h,ERR_30_v,implied_prob_h,implied_prob_v,implied_prob_h_mid,over_under_line,over_under_result,IP_real_h,IP_real_v,rollsum_IP_real_10_h,rollsum_IP_real_10_v,rollsum_H_10_h,rollsum_H_10_v,rollsum_BFP_10_h,rollsum_BFP_10_v,rollsum_HR_10_h,rollsum_HR_10_v,rollsum_R_10_h,rollsum_R_10_v,rollsum_ER_10_h,rollsum_ER_10_v,rollsum_BB_10_h,rollsum_BB_10_v,rollsum_IB_10_h,rollsum_IB_10_v,rollsum_SO_10_h,rollsum_SO_10_v,rollsum_SH_10_h,rollsum_SH_10_v,rollsum_SF_10_h,rollsum_SF_10_v,rollsum_WP_10_h,rollsum_WP_10_v,rollsum_HBP_10_h,rollsum_HBP_10_v,rollsum_BK_10_h,rollsum_BK_10_v,rollsum_2B_10_h,rollsum_2B_10_v,rollsum_3B_10_h,rollsum_3B_10_v,rollsum_IP_real_35_h,rollsum_IP_real_35_v,rollsum_H_35_h,rollsum_H_35_v,rollsum_BFP_35_h,rollsum_BFP_35_v,rollsum_HR_35_h,rollsum_HR_35_v,rollsum_R_35_h,rollsum_R_35_v,rollsum_ER_35_h,rollsum_ER_35_v,rollsum_BB_35_h,rollsum_BB_35_v,rollsum_IB_35_h,rollsum_IB_35_v,rollsum_SO_35_h,rollsum_SO_35_v,rollsum_SH_35_h,rollsum_SH_35_v,rollsum_SF_35_h,rollsum_SF_35_v,rollsum_WP_35_h,rollsum_WP_35_v,rollsum_HBP_35_h,rollsum_HBP_35_v,rollsum_BK_35_h,rollsum_BK_35_v,rollsum_2B_35_h,rollsum_2B_35_v,rollsum_3B_35_h,rollsum_3B_35_v,H_BB_roll_10_h,H_BB_roll_10_v,XB_roll_10_h,XB_roll_10_v,TB_roll_10_h,TB_roll_10_v,IP_mod_10_h,IP_mod_10_v,BF_mod_10_h,BF_mod_10_v,ER_mod_10_h,ER_mod_10_v,H_BB_mod_10_h,H_BB_mod_10_v,H_BB_perc_10_h,H_BB_perc_10_v,SO_mod_10_h,SO_mod_10_v,TB_BB_mod_10_h,TB_BB_mod_10_v,ERA_10_h,ERA_10_v,WHIP_10_h,WHIP_10_v,SO_perc_10_h,SO_perc_10_v,TB_BB_perc_10_h,TB_BB_perc_10_v,H_BB_roll_35_h,H_BB_roll_35_v,XB_roll_35_h,XB_roll_35_v,TB_roll_35_h,TB_roll_35_v,IP_mod_35_h,IP_mod_35_v,BF_mod_35_h,BF_mod_35_v,ER_mod_35_h,ER_mod_35_v,H_BB_mod_35_h,H_BB_mod_35_v,H_BB_perc_35_h,H_BB_perc_35_v,SO_mod_35_h,SO_mod_35_v,TB_BB_mod_35_h,TB_BB_mod_35_v,ERA_35_h,ERA_35_v,WHIP_35_h,WHIP_35_v,SO_perc_35_h,SO_perc_35_v,TB_BB_perc_35_h,TB_BB_perc_35_v


In [45]:
df.drop(df.index[df.IP_mod_10_v==0],inplace=True)
df.drop(df.index[df.IP_mod_10_h==0],inplace=True)
df.shape

(92946, 321)

In [46]:
df.reset_index(drop=True, inplace=True)

In [47]:
df.to_csv('df_bp5.csv', index=False)