In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression as LinReg
import warnings
warnings.filterwarnings("ignore")

In [2]:
batting = pd.read_csv('baseball-databank/Batting.csv')

In [3]:
b = batting[batting['yearID'] == 2015]

In [4]:
b.head(3)

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,...,RBI,SB,CS,BB,SO,IBB,HBP,SH,SF,GIDP
99846,aardsda01,2015,1,ATL,NL,33,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
99847,abadfe01,2015,1,OAK,AL,62,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99848,abreujo02,2015,1,CHA,AL,154,613.0,88.0,178.0,34.0,...,101.0,0.0,0.0,39.0,140.0,11.0,15.0,0.0,1.0,16.0


In [5]:
b.columns

Index(['playerID', 'yearID', 'stint', 'teamID', 'lgID', 'G', 'AB', 'R', 'H',
       '2B', '3B', 'HR', 'RBI', 'SB', 'CS', 'BB', 'SO', 'IBB', 'HBP', 'SH',
       'SF', 'GIDP'],
      dtype='object')

### Find the wOBA weight League wOBA(the wOBA column) and wOBA scale here
#### https://www.fangraphs.com/guts.aspx?type=cn
### Find the League wOBA and PA here
#### https://www.fangraphs.com/leaders.aspx?pos=all&stats=bat&lg=all&qual=0&type=c,4,6,11,12,13,21,-1,34,35,40,41,-1,23,37,38,50,61,-1,111,-1,203,199,58&season=2015&month=0&season1=2015&ind=0&team=0,ss&rost=0&age=&filter=&players=0
### The wOBA formula in FanGraphs
#### https://www.fangraphs.com/library/offense/woba/

In [6]:
lgwOBA = .313
wOBAScale = 1.251
wBB = .687
wHBP = .718
w1B = .881
w2B = 1.256
w3B = 1.594
wHR = 2.065
runSB = .200
runCS = -.392
lgRoverPA = .113
lgRoverW = 9.421
cFIP = 3.134
PA = 183627
ALwRC = 10593
NLwRC = 10112
ALPA = 91485
NLPA = 86736

###  lgwOBA can calculated by yourself
lgwOBA = (wBB \* (14073-951) + wHBP \* 1602 + w1B \* 28016 + w2B \* 8242
    + w3B \* 939 + wHR \* 4909) / (165488 + 14073 - 951 + 1232 + 1602)

In [7]:
b['uBB'] = b['BB']-b['IBB']
b['1B'] = b['H'] - b['2B'] - b['3B'] - b['HR']
b['wOBA'] = (wBB * b['uBB'] + wHBP * b['HBP'] + w1B * b['1B'] + w2B * b['2B'] + w3B * b['3B'] 
             + wHR * b['HR']) / (b['AB'] + b['BB'] - b['IBB'] + b['SF'] + b['HBP'])
b['PA'] = b['AB'] + b['BB'] + b['HBP'] + b['SH'] + b['SF'] # plate appearance
b['wRAA'] = ((b['wOBA'] - lgwOBA)/wOBAScale) * b['PA']

### Download the Park Factors here,
#### https://www.fangraphs.com/guts.aspx?type=pf&season=2015&teamid=0
### Add short form from,
#### https://www.baseball-reference.com/about/team_IDs.shtml

In [8]:
pf = pd.read_csv('FanGraphs Leaderboard Park Factors.csv')
pf['teamID'] = ['ANA', 'BAL', 'BOS', 'CHW', 'CLE', 'DET', 'KCR', 'MIN',
                'NYY', 'OAK', 'SEA', 'TBR', 'TEX', 'TOR', 'ARI', 'ATL',
                'CHC', 'CIN', 'COL', 'MIA', 'HOU', 'LAD', 'MIL', 'WSN',
                'NYM', 'PHI', 'PIT', 'STL', 'SDP', 'SFG']
pf = pf[['teamID', 'Basic']].rename(columns={'Basic':'PF'})
b = pd.merge(b, pf, on='teamID')

In [9]:
b['lgwRC'] = [ALwRC if r == 'AL' else NLwRC for r in b['lgID']]
b['lgPA'] = [ALPA if r == 'AL' else NLPA for r in b['lgID']]
b['BattingRuns'] = b['wRAA'] + (lgRoverPA - (b['PF'] / 100 * lgRoverPA)) * \
    b['PA'] + (lgRoverPA - b['lgwRC'] / b['lgPA']) * b['PA']

In [10]:
#lgSB, lgCS = 2505, 1064
#_1B, BB, HBP, IBB = 28016, 14073, 1602, 951
#lgVS = 2 * Runs / Out + 0.075
#lgwSB = (lgSB * runSB + lgCS * runCS) / (_1B + BB + HBP - IBB)
lgwSB = 0.0

In [11]:
b['wSB'] = b['SB'] * runSB + b['CS'] * runCS - \
    lgwSB * (b['1B'] + b['BB'] + b['HBP'] - b['IBB'])

In [12]:
b.head()

Unnamed: 0,playerID,yearID,stint,teamID,lgID,G,AB,R,H,2B,...,uBB,1B,wOBA,PA,wRAA,PF,lgwRC,lgPA,BattingRuns,wSB
0,aardsda01,2015,1,ATL,NL,33,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,-0.2502,99,10112,86736,-0.252653,0.0
1,avilalu01,2015,1,ATL,NL,50,0.0,0.0,0.0,0.0,...,0.0,0.0,,0.0,,99,10112,86736,,0.0
2,banuema01,2015,1,ATL,NL,7,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,7.0,-1.751399,99,10112,86736,-1.768574,0.0
3,bethach01,2015,1,ATL,NL,48,155.0,16.0,31.0,8.0,...,4.0,21.0,0.222811,160.0,-11.534923,99,10112,86736,-11.927508,-0.192
4,bournmi01,2015,2,ATL,NL,46,136.0,10.0,30.0,3.0,...,17.0,26.0,0.257723,156.0,-6.893107,99,10112,86736,-7.275878,0.016
