In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.express as px
import boxball_loader as bbl
import baseball_stats_utils as bsu

In [2]:
# The 1980-2016 range is chosen to match Baseball Prospectus's examination
# https://www.baseballprospectus.com/news/article/41203/prospectus-feature-ops-and-woba-briefly-revisited/
teams = bbl.load_batting(range(1980, 2017), coalesce_type=bbl.CoalesceMode.SEASON_TEAM)
bsu.add_batting_rate_stats(teams)
teams

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ab,r,h,_2b,_3b,hr,rbi,sb,cs,bb,...,pa,outs,tb,ob,ba,obp,slg,ops,r27,woba
yr,team_id,lg_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1980,ATL,NL,5402,630,1352,226,22,144,597.0,73.0,52.0,434,...,5889.0,4135.0,2054,1806.0,0.250278,0.306673,0.380230,0.686903,4.113664,0.292718
1980,BAL,AL,5585,805,1523,258,29,156,751.0,111.0,38.0,587,...,6239.0,4146.0,2307,2131.0,0.272695,0.341561,0.413071,0.754632,5.242402,0.323020
1980,BOS,AL,5603,757,1588,297,36,162,717.0,79.0,48.0,475,...,6160.0,4113.0,2443,2095.0,0.283420,0.340097,0.436016,0.776114,4.969365,0.328440
1980,CAL,AL,5443,698,1442,236,32,106,655.0,91.0,63.0,539,...,6063.0,4113.0,2060,2013.0,0.264927,0.332014,0.378468,0.710482,4.582057,0.306362
1980,CHA,AL,5444,587,1408,255,38,91,547.0,68.0,54.0,399,...,5933.0,4141.0,2012,1846.0,0.258633,0.311141,0.369581,0.680722,3.827336,0.290871
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,SLN,NL,5548,779,1415,299,32,225,745.0,35.0,26.0,526,...,6185.0,4200.0,2453,2011.0,0.255047,0.325141,0.442141,0.767283,5.007857,0.325244
2016,TBA,AL,5481,672,1333,288,32,216,647.0,60.0,37.0,449,...,6027.0,4213.0,2333,1851.0,0.243204,0.307118,0.425652,0.732770,4.306670,0.311077
2016,TEX,AL,5525,765,1446,257,23,215,746.0,99.0,36.0,436,...,6071.0,4155.0,2394,1952.0,0.261719,0.321529,0.433303,0.754832,4.971119,0.320152
2016,TOR,AL,5479,759,1358,276,18,221,728.0,54.0,24.0,632,...,6206.0,4185.0,2333,2045.0,0.247855,0.329520,0.425808,0.755327,4.896774,0.323488


In [3]:
teams.columns

Index(['ab', 'r', 'h', '_2b', '_3b', 'hr', 'rbi', 'sb', 'cs', 'bb', 'so',
       'ibb', 'hbp', 'sh', 'sf', 'gidp', 'pa', 'outs', 'tb', 'ob', 'ba', 'obp',
       'slg', 'ops', 'r27', 'woba'],
      dtype='object')

In [4]:
px.scatter(teams, x='r27', y='obp')

In [5]:
px.scatter(teams, x='r27', y='slg')

In [6]:
px.scatter(teams, x='r27', y='ops')

In [7]:
teams['ots'] = teams['obp']*teams['slg']
teams['rpa'] = teams['r']/teams['pa']

In [8]:
px.scatter(teams, x='r27', y='ots')

In [9]:
# Create X and y.
feature_cols = ['obp', 'slg']
X = teams[feature_cols]
y = teams['r27']

In [10]:
lr = LinearRegression()
type(lr)

sklearn.linear_model._base.LinearRegression

In [11]:
# Fit the model
lr.fit(X, y)

LinearRegression()

In [12]:
# Print the coefficients - Why is this a list?
print(f'coefficients: {lr.coef_}')

# Print the intercept.
print(f'intercept: {lr.intercept_}')

coefficients: [20.51844704 10.05006216]
intercept: -6.089762749547565


### How about correlations with run-scoring?

Inspired by this reddit thread, which implies that OPS correlates with scoring better than wOBA:
https://www.reddit.com/r/Sabermetrics/comments/skt0nb/why_is_ops_more_predictive_than_woba/

There's also this Cyrill Morong article from 2013 (https://cybermetric.blogspot.com/2013/07/how-well-do-ops-and-woba-predict-team.html).  Morong uses runs/game

In [13]:
print(teams[['rpa', 'r27', 'ops', 'woba', 'ots']].corr().to_markdown())

|      |      rpa |      r27 |      ops |     woba |      ots |
|:-----|---------:|---------:|---------:|---------:|---------:|
| rpa  | 1        | 0.995267 | 0.943038 | 0.941863 | 0.944594 |
| r27  | 0.995267 | 1        | 0.954645 | 0.957921 | 0.959443 |
| ops  | 0.943038 | 0.954645 | 1        | 0.996172 | 0.998674 |
| woba | 0.941863 | 0.957921 | 0.996172 | 1        | 0.996942 |
| ots  | 0.944594 | 0.959443 | 0.998674 | 0.996942 | 1        |
