# Who are all the all-time leaders in Strikeout %?

Depending on which BF threshold you choose, you will get a different all-time strikeout% leader.  Who are all the players who have a claim to the record (e.g., there exists some BF threshold for which this player is the all-time leader in strikeout%)? 

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import boxball_loader as bbl
import baseball_stats_utils as bsu

In [2]:
careers = bbl.load_pitching(coalesce_type=bbl.CoalesceMode.PLAYER_CAREER, drop_cols=['bfp']).sort_values('bfp', ascending=False)
careers['name'] = bsu.get_player_names_df(careers, 'player_id')
careers['k_rt'] = careers['so']/careers['bfp']
careers

Unnamed: 0_level_0,w,l,g,gs,cg,sho,sv,ip_outs,h,er,...,hbp,bk,bfp,gf,r,sh,sf,gidp,name,k_rt
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
youngcy01,511,315,906,815,749,76,18,22068,7092,2147,...,161.0,3,29579.0,84,3167,0.0,0.0,0.0,Cy Young,0.094763
galvipu01,365,310,705,688,646,57,2,18010,6405,1903,...,61.0,2,25415.0,19,3352,0.0,0.0,0.0,Pud Galvin,0.071100
johnswa01,417,279,802,666,531,110,34,17744,4913,1424,...,203.0,4,23642.0,129,1902,0.0,0.0,0.0,Walter Johnson,0.148422
niekrph01,318,274,864,716,245,45,29,16213,5044,2012,...,123.0,42,22677.0,83,2337,167.0,87.0,292.0,Phil Niekro,0.147374
ryanno01,324,292,807,773,222,61,3,16158,3923,1911,...,158.0,33,22575.0,13,2178,191.0,140.0,263.0,Nolan Ryan,0.253112
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
valenda01,0,0,1,0,0,0,0,1,0,0,...,0.0,0,1.0,1,0,0.0,0.0,0.0,Danny Valencia,1.000000
kellydo01,0,0,1,0,0,0,0,1,0,0,...,0.0,0,1.0,1,0,0.0,0.0,0.0,Don Kelly,0.000000
chaseha01,0,0,1,0,0,0,0,1,0,0,...,0.0,0,1.0,0,0,0.0,0.0,0.0,Hal Chase,0.000000
gonzaal03,0,0,1,0,0,0,0,1,0,0,...,0.0,0,1.0,1,0,0.0,0.0,0.0,Alberto Gonzalez,0.000000


In [3]:
# Iterating through the players in order of descending BF, look for the K% leader for that number of BF or more
# Anybody matching that is an all-time leader
cummax_k_rt = careers['k_rt'].cummax()
all_time_leaders = careers.query('k_rt == @cummax_k_rt') \
    [['name', 'bfp', 'so', 'k_rt']] \
    .sort_values('k_rt', ascending=False)
all_time_leaders

Unnamed: 0_level_0,name,bfp,so,k_rt
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sheldsc01,Scott Sheldon,1.0,1,1.0
seitzke01,Kevin Seitzer,1.0,1,1.0
valenda01,Danny Valencia,1.0,1,1.0
niesoch01,Chuck Nieson,8.0,5,0.625
karinja01,James Karinchak,131.0,61,0.465649
haderjo01,Josh Hader,861.0,380,0.441347
chapmar01,Aroldis Chapman,2196.0,905,0.412113
kimbrcr01,Craig Kimbrel,2252.0,926,0.41119
janseke01,Kenley Jansen,2502.0,936,0.374101
wagnebi02,Billy Wagner,3600.0,1196,0.332222


In [4]:
def write_output_row(row):
    name = row['name']
    kpct = 100*row["k_rt"]
    k = int(row['so'])
    bfp = int(row['bfp'])
    print(f'{name:<18} {kpct:>6.1f}% {k:>5} {bfp:>6}');

all_time_leaders.apply(write_output_row, axis=1)
None

Scott Sheldon       100.0%     1      1
Kevin Seitzer       100.0%     1      1
Danny Valencia      100.0%     1      1
Chuck Nieson         62.5%     5      8
James Karinchak      46.6%    61    131
Josh Hader           44.1%   380    861
Aroldis Chapman      41.2%   905   2196
Craig Kimbrel        41.1%   926   2252
Kenley Jansen        37.4%   936   2502
Billy Wagner         33.2%  1196   3600
Chris Sale           30.7%  2007   6544
Max Scherzer         29.1%  2784   9573
Randy Johnson        28.6%  4875  17067
Nolan Ryan           25.3%  5714  22575
Walter Johnson       14.8%  3509  23642
Cy Young              9.5%  2803  29579


In [5]:
fig1 = px.scatter(careers, x='bfp', y='k_rt', hover_name='name', hover_data=['so', 'k_rt'])
fig2 = px.line(all_time_leaders, x='bfp', y='k_rt', hover_name='name')
go.Figure(data=fig1.data + fig2.data).show()


In [6]:
fig1 = px.scatter(careers, x='bfp', y='so', hover_name='name', hover_data=['so', 'k_rt'])
fig2 = px.line(all_time_leaders, x='bfp', y='so', hover_name='name')
go.Figure(data=fig1.data + fig2.data).show()