Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
323 lines (266 sloc) 19.2 KB
# Script to produce graphics used in blog post on backwards pitching
library(tidyverse)
library(lubridate)
library(ggrepel)
library(scales)
# use pitchRx::scrape() to download data in reasonable-sized chunks, and serialize them to the PITCHRX_DATA_DIRECTORY
PITCHRX_DATA_DIRECTORY = '/opt/data/baseball/'
years <- c('2018', '2017', '2016', '2015', '2014')
dfs <- map(years, function(yr) {
games <- readRDS(file.path(PITCHRX_DATA_DIRECTORY, paste0('pitchRx', yr, '.rds'))) %>%
mutate(datetime=ymd_hm(time_date), date=as_date(datetime)) %>%
mutate(gameday_link=paste0('gid_', gameday_link)) %>%
select(gameday_link, away_code, home_code, away_team_name, home_team_name, date)
detail <- readRDS(file.path(PITCHRX_DATA_DIRECTORY, paste0('pitchRx', yr, '_1.rds')))
extractAtBats <- function(dayList) {
ret <- NULL
if (!is.null(dayList$atbat)) {
ret <- dayList$atbat %>% mutate_if(is.factor, as.character) %>%
as_tibble() %>%
select(gameday_link, num, inning, inning_side, pitcher, batter, pitcher_name, batter_name, p_throws, stand, atbat_des, event)
}
ret
}
extractPitches <- function(dayList) {
ret <- NULL
if (!is.null(dayList$pitch)) {
ret <- dayList$pitch %>% mutate_if(is.factor, as.character) %>%
as_tibble() %>%
select(gameday_link, num, inning, inning_side, tfs_zulu, count, result=type, pitch_type, start_speed, des, zone, spin_rate, px, pz)
}
ret
}
extractActions <- function(dayList) {
ret <- NULL
if (!is.null(dayList$action)) {
ret <- dayList$action %>% mutate_if(is.factor, as.character) %>%
as_tibble() %>%
select(gameday_link, num, inning, inning_side, tfs_zulu, des, event, b, s, o)
}
ret
}
atBats <- map_dfr(detail, extractAtBats)
pitches <- map_dfr(detail, extractPitches)
actions <- map_dfr(detail, extractActions)
if (file.exists(file.path(PITCHRX_DATA_DIRECTORY, paste0('pitchRx', yr, '_2.rds')))) {
detail <- readRDS(file.path(PITCHRX_DATA_DIRECTORY, paste0('pitchRx', yr, '_2.rds')))
atBats <- atBats %>% bind_rows(map_dfr(detail, extractAtBats))
pitches <- pitches %>% bind_rows(map_dfr(detail, extractPitches))
actions <- actions %>% bind_rows(map_dfr(detail, extractActions))
}
games <- games %>% semi_join(pitches, by='gameday_link')
pitches <- pitches %>% as_tibble() %>%
mutate(count=as.character(count)) %>%
mutate(
regular_pitch=pitch_type %in% c('FA','FF','FT','FC','FS','SI','SL','CU','KC','EP','CH','SC','KN'),
pitch_type=case_when(
pitch_type=='FA' ~ 'Fastball',
pitch_type=='FF' ~ 'Four-seam Fastball',
pitch_type=='FT' ~ 'Two-seam Fastball',
pitch_type=='FC' ~ 'Cut Fastball',
pitch_type=='FS' ~ 'Split-Fingered Fastball',
pitch_type=='FO' ~ 'Pitch Out',
pitch_type=='PO' ~ 'Pitch Out',
pitch_type=='SI' ~ 'Sinker',
pitch_type=='SL' ~ 'Slider',
pitch_type=='CU' ~ 'Curveball',
pitch_type=='KC' ~ 'Knuckle-curve',
pitch_type=='EP' ~ 'Eephus',
pitch_type=='CH' ~ 'Changeup',
pitch_type=='SC' ~ 'Screwball',
pitch_type=='KN' ~ 'Knuckleball',
pitch_type=='UN' ~ 'Unidentified',
pitch_type=='IN' ~ 'Intentional Ball',
pitch_type=='AB' ~ NA_character_,
TRUE ~ pitch_type
))
list(games=games, pitches=pitches, atBats=atBats, actions=actions)
})
games <- map_dfr(dfs, function(yearList) { yearList$games })
atBats <- map_dfr(dfs, function(yearList) { yearList$atBats })
pitches <- map_dfr(dfs, function(yearList) { yearList$pitches }) %>%
mutate(tfs_zulu=na_if(tfs_zulu, '')) %>% mutate(tfs_zulu=as_datetime(tfs_zulu))
actions <- map_dfr(dfs, function(yearList) { yearList$actions }) %>%
mutate(num=as.numeric(num))
rm(dfs)
nonAbOuts <- actions %>% filter(grepl(x=event, pattern='Caught Stealing|Picked off|Pickoff [123]|Runner Out'))
# clean up pitches and at-bats where a pitcher substituted after a 0-0 count
midAbPitchingChanges <- actions %>% filter(event=='Pitching Substitution') %>% filter(b+s > 0) %>% mutate(tfs_zulu=na_if(tfs_zulu, ''))
noTs <- nrow(filter(midAbPitchingChanges, is.na(tfs_zulu)))
if (noTs) warning(paste0(noTs, ' mid AB pitch change records had no timestamp, so those ABs will not be edited. Nothing we can do!'))
regex <- '.+: (.+) replaces (.+)\\..*'
midAbPitchingChanges <- midAbPitchingChanges %>% filter(!is.na(tfs_zulu)) %>% mutate(tfs_zulu=as_datetime(tfs_zulu)) %>%
mutate(Reliever=gsub(x=des, pattern=regex, replacement='\\1'), Relieved=gsub(x=des, pattern=regex, replacement='\\2')) %>%
mutate(Relieved=gsub(x=Relieved, pattern='(.+),.+', replacement='\\1')) %>%
mutate_at(.vars=vars(Reliever, Relieved), .funs=gsub, pattern='Jr$', replacement='Jr.') %>%
mutate_at(.vars=vars(Reliever, Relieved), .funs=gsub, pattern='([A-Z]\\.)[ ]+([A-Z]\\.)[ ]+', replacement='\\1\\2 ') %>%
mutate(Reliever=case_when(Reliever=='Daniel Coulombe' ~ 'Danny Coulombe', TRUE ~ Reliever)) %>%
mutate(Reliever=case_when(Reliever=='Felipe Rivero' ~ 'Felipe Vazquez', TRUE ~ Reliever)) %>%
filter(Reliever != Relieved)
players <- pitchRx::scrape(game.ids=unique(midAbPitchingChanges$gameday_link), suffix='players.xml')
players <- players$player %>% as_tibble() %>% mutate(fullName=paste0(first, ' ', last))
midAbPitchingChanges <- midAbPitchingChanges %>% inner_join(players %>% select(fullName, RelievedId=id, gameday_link) %>% distinct(), by=c('Relieved'='fullName', 'gameday_link'))
retainAtBats <- atBats %>% anti_join(midAbPitchingChanges, by=c('gameday_link', 'num')) %>% mutate(num_seq=1)
retainPitches <- pitches %>% anti_join(midAbPitchingChanges, by=c('gameday_link', 'num')) %>% mutate(num_seq=1)
retainNonAbOuts <- nonAbOuts %>% anti_join(midAbPitchingChanges, by=c('gameday_link', 'num')) %>% mutate(num_seq=1)
fixAtBats <- atBats %>% semi_join(midAbPitchingChanges, by=c('gameday_link', 'num'))
fixPitches <- pitches %>% semi_join(midAbPitchingChanges, by=c('gameday_link', 'num'))
fixNonAbOuts <- nonAbOuts %>% semi_join(midAbPitchingChanges, by=c('gameday_link', 'num'))
fixAtBatsFixed <- fixAtBats %>% mutate(num_seq=2) %>%
bind_rows(fixAtBats %>% select(-pitcher, -pitcher_name) %>%
inner_join(midAbPitchingChanges %>% select(gameday_link, num, pitcher=RelievedId, pitcher_name=Relieved), by=c('gameday_link', 'num')) %>% mutate(num_seq=1))
fixPitchesFixed <- fixPitches %>% inner_join(midAbPitchingChanges %>% select(gameday_link, num, PitchChangeTime=tfs_zulu), by=c('gameday_link', 'num')) %>%
filter(tfs_zulu < PitchChangeTime) %>% select(-PitchChangeTime) %>% mutate(num_seq=1) %>%
bind_rows(fixPitches %>% inner_join(midAbPitchingChanges %>% select(gameday_link, num, PitchChangeTime=tfs_zulu), by=c('gameday_link', 'num')) %>%
filter(tfs_zulu >= PitchChangeTime) %>% select(-PitchChangeTime) %>% mutate(num_seq=2))
fixNonAbOutsFixed <- fixNonAbOuts %>% inner_join(midAbPitchingChanges %>% select(gameday_link, num, PitchChangeTime=tfs_zulu), by=c('gameday_link', 'num')) %>%
filter(tfs_zulu < PitchChangeTime) %>% select(-PitchChangeTime) %>% mutate(num_seq=1) %>%
bind_rows(fixNonAbOuts %>% inner_join(midAbPitchingChanges %>% select(gameday_link, num, PitchChangeTime=tfs_zulu), by=c('gameday_link', 'num')) %>%
filter(tfs_zulu >= PitchChangeTime) %>% select(-PitchChangeTime) %>% mutate(num_seq=2))
pitches <- bind_rows(retainPitches, fixPitchesFixed)
atBats <- bind_rows(retainAtBats, fixAtBatsFixed)
nonAbOuts <- bind_rows(retainNonAbOuts, fixNonAbOutsFixed)
rm(midAbPitchingChanges, fixAtBats, fixAtBatsFixed, fixPitches, fixPitchesFixed, retainAtBats, retainPitches, actions, retainNonAbOuts, fixNonAbOutsFixed, fixNonAbOuts)
games <- games %>% mutate(date=case_when(
is.na(date) ~ ymd(gsub(x=gsub(x=gameday_link, pattern='gid_([0-9_]+)_[a-z].+', replacement='\\1'), pattern='_', replacement='-')),
TRUE ~ date
)) %>% group_by(gameday_link) %>% filter(date==max(date)) %>% filter(row_number()==1)
atBats <- atBats %>% inner_join(games, by='gameday_link') %>%
mutate(
defense_team_code=case_when(inning_side=='top' ~ home_code, TRUE ~ away_code),
defense_team_name=case_when(inning_side=='top' ~ home_team_name, TRUE ~ away_team_name),
offense_team_code=case_when(inning_side=='bottom' ~ home_code, TRUE ~ away_code),
offense_team_name=case_when(inning_side=='bottom' ~ home_team_name, TRUE ~ away_team_name)
) %>%
mutate(outsRecorded=case_when(
event %in% c('Strikeout', 'Groundout', 'Flyout', 'Lineout', 'Pop Out', 'Forceout', 'Sac Fly', 'Sac Bunt', 'Runner Out', 'Bunt Groundout', 'Fielders Choice Out', 'Bunt Pop Out',
'Batter Interference', 'Bunt Lineout') ~ 1,
event %in% c('Grounded Into DP', 'Double Play', 'Strikeout - DP', 'Sac Fly DP') ~ 2,
event == 'Triple Play' ~ 3,
TRUE ~ 0)
)
nonAbOuts <- nonAbOuts %>% inner_join(atBats %>% select(gameday_link, num, pitcher), by=c('gameday_link', 'num'))
rm(games)
pitches2 <- pitches %>% mutate(FirstPitch=count=='0-0') %>%
mutate(Fastball=case_when(
pitch_type %in% c('Fastball', 'Four-seam Fastball', 'Two-seam Fastball') ~ TRUE,
is.na(pitch_type) ~ NA,
TRUE ~ FALSE
)) %>% inner_join(atBats %>% select(-inning, -inning_side), by=c('gameday_link', 'num', 'num_seq'))
pitchMonth <- pitches2 %>% filter(!is.na(date)) %>% filter(!(month(date) %in% c(3, 10, 11))) %>% filter(regular_pitch) %>%
mutate(ym=format(date, '%Y-%m'), FirstFastball=case_when(FirstPitch ~ Fastball, TRUE ~ NA)) %>% group_by(ym) %>%
summarize(FirstFastball=mean(FirstFastball, na.rm=TRUE), Fastball=mean(Fastball, na.rm=TRUE), pitches=n())
ggplot(pitchMonth) +
geom_line(aes(x=ym, y=Fastball, group=1), color='red') +
geom_line(aes(x=ym, y=FirstFastball, group=1), color='green') +
scale_y_continuous(limits=c(.2, .6), labels=percent) +
scale_x_discrete(breaks=paste0(rep(2014:2018, each=2), '-', c('05','08'))) +
labs(y='% fastballs', x=NULL)
ggplot(pitchMonth) + geom_line(aes(x=ym, y=pitches, group=1))
pitches %>% mutate(y=year(tfs_zulu), m=month(tfs_zulu)) %>% filter(y==2016) %>% filter(m %in% 4:9) %>% filter(regular_pitch) %>%
ggplot() + geom_bar(aes(x=pitch_type)) + coord_flip() + facet_wrap(vars(m))
## Some 2018 by-pitcher analyses...
# first have to correct for some missing pitcher names in pitchFx...
pdf <- read_csv('/opt/data/retrosheet/people.csv') %>% transmute(pitcher=key_mlbam, pitcher_name_mlbam=paste0(name_first, ' ', name_last))
pitches2 <- pitches %>% mutate(FirstPitch=count=='0-0') %>%
mutate(Fastball=case_when(
pitch_type %in% c('Fastball', 'Four-seam Fastball', 'Two-seam Fastball', 'Cut Fastball', 'Sinker') ~ TRUE,
is.na(pitch_type) ~ NA,
TRUE ~ FALSE
)) %>% inner_join(atBats %>% select(-inning, -inning_side), by=c('gameday_link', 'num', 'num_seq')) %>%
mutate(y=year(tfs_zulu)) %>%
left_join(pdf, by='pitcher') %>%
mutate(pitcher_name=case_when(is.na(pitcher_name) ~ pitcher_name_mlbam, TRUE ~ pitcher_name)) %>% select(-pitcher_name_mlbam) %>%
mutate(pitcher_name=case_when(
pitcher_name=='CC Sabathia' ~ 'C.C. Sabathia',
pitcher_name=='Douglas Fister' ~ 'Doug Fister',
pitcher_name=='Hyunjin Ryu' ~ 'Hyun-Jin Ryu',
pitcher_name=='Michael Fiers' ~ 'Mike Fiers',
pitcher_name=='Jacob Faria' ~ 'Jake Faria',
TRUE ~ pitcher_name
))
listTeams <- function(teamCol) {
paste0(unique(toupper(teamCol)), collapse='/')
}
pitcherOuts <- atBats %>% filter(outsRecorded != 0) %>% select(date, pitcher, pitcher_name, outsRecorded) %>%
bind_rows(nonAbOuts %>% select(tfs_zulu, pitcher) %>% mutate(date=as_date(tfs_zulu), outsRecorded=1) %>% select(-tfs_zulu) %>%
inner_join(pdf %>% select(pitcher, pitcher_name=pitcher_name_mlbam), by='pitcher')) %>%
mutate(y=year(date))
pitcherWalkHits <- atBats %>% mutate(WH=event %in% c('Walk', 'Single', 'Double', 'Triple', 'Home Run'), y=year(date)) %>%
select(y, pitcher, pitcher_name, WH) %>% filter(WH)
totalSum <- pitches2 %>% group_by(y, pitcher, pitcher_name) %>% summarize(BF=sum(FirstPitch), AllFastballs=mean(Fastball, na.rm=TRUE), Team=listTeams(defense_team_code)) %>%
inner_join(pitches2 %>% group_by(y, pitcher, pitcher_name) %>% filter(FirstPitch) %>% summarize(FirstPitchFastballs=mean(Fastball, na.rm=TRUE)), by=c('y', 'pitcher', 'pitcher_name')) %>%
inner_join(pitches2 %>% group_by(y, pitcher, pitcher_name, gameday_link) %>%
mutate(fi=inning==1) %>%
summarize(StartedGame=max(fi, na.rm=TRUE)) %>%
group_by(y, pitcher, pitcher_name) %>% summarize(PctStart=mean(StartedGame, na.rm=TRUE)), by=c('y', 'pitcher', 'pitcher_name')) %>%
inner_join(pitcherOuts %>% group_by(y, pitcher, pitcher_name) %>% summarize(OutsRecorded=sum(outsRecorded)), by=c('y', 'pitcher', 'pitcher_name')) %>%
inner_join(pitcherWalkHits %>% group_by(y, pitcher, pitcher_name) %>% summarize(WH=n()), by=c('y', 'pitcher', 'pitcher_name')) %>%
filter(BF >= 30) %>% mutate(Starter=PctStart >= .8) %>% mutate(PitcherType=case_when(Starter ~ 'Starters', TRUE ~ 'Relievers')) %>% mutate(BackwardsRatio=AllFastballs/FirstPitchFastballs) %>%
mutate(PitcherNameTeam=paste0(pitcher_name, ' (', Team, ')')) %>%
mutate(WHIP=WH*3/OutsRecorded) %>%
ungroup()
pitches2 %>% filter(y==2018) %>% summarize(Pitches=n(), Fastballs=mean(Fastball, na.rm=TRUE)) %>% bind_cols(
pitches2 %>% filter(y==2018) %>% filter(FirstPitch) %>% summarize(FirstPitches=n(), FirstPitchFastballs=mean(Fastball, na.rm=TRUE))
) %>% bind_cols(
pitches2 %>% filter(y==2018) %>% filter(!FirstPitch) %>% summarize(NonFirstPitches=n(), NonFirstPitchFastballs=mean(Fastball, na.rm=TRUE))
)
integerPercent <- function(v) {percent(v) %>% gsub(x=., pattern='([0-9]+)[^0-9].+\\%', replacement='\\1%')}
totalSum %>% filter(y==2018) %>%
ggplot() +
geom_point(aes(x=AllFastballs, y=FirstPitchFastballs, color=PitcherType)) + geom_abline(slope=1, intercept=0, linetype=2, color='grey50') +
geom_label_repel(data=totalSum %>% filter(y==2018) %>% group_by(PitcherType) %>% top_n(wt=BackwardsRatio, n=5),
mapping=aes(x=AllFastballs, y=FirstPitchFastballs, label=PitcherNameTeam), nudge_x = .2, nudge_y=-0.15, force=0.1, segment.alpha = .2, size=2.75) +
# geom_label_repel(data=totalSum %>% filter(y==2018) %>% filter(Team=='SEA') %>% filter(BackwardsRatio <= 1),
# mapping=aes(x=AllFastballs, y=FirstPitchFastballs, label=PitcherNameTeam), nudge_x = -0.2, nudge_y=0.15, force=0.1, segment.alpha = .2, size=2.75) +
# geom_label_repel(data=totalSum %>% filter(y==2018) %>% filter(Team=='SEA') %>% filter(BackwardsRatio > 1),
# mapping=aes(x=AllFastballs, y=FirstPitchFastballs, label=PitcherNameTeam), nudge_x = 0.2, nudge_y=-0.15, force=0.1, segment.alpha = .2, size=2.75) +
scale_x_continuous(labels=integerPercent, limits=c(0,1)) + scale_y_continuous(labels=integerPercent, limits=c(0,1)) +
annotate('text', x=.15, y=.1, label='\u2193 Relatively Fewer First-Pitch Fastballs', size=3, alpha=.9, hjust='left') +
annotate('text', x=.025, y=.30, label='\u2191 Relatively More First-Pitch Fastballs', size=3, alpha=.9, hjust='left') +
theme_bw() +
labs(color=NULL, x='% of all pitches that were fastballs', y='% of first pitches that were fastballs',
title='"Backwards" Pitching on the First Pitch to Each Batter',
subtitle='2018 Season Prior to the All-Star Break, Pitchers with 30 or More Batters Faced',
caption='Source: MLBAM PitchFx Data\n"Fastball" is defined as a pitch identified by PitchFx as a 4-seam, 2-seam, or cut fastball, or sinker')
totalSum %>% filter(y==2018) %>% summarize(AllPitchers=n(), BackwardsPitchers=sum(BackwardsRatio > 1))
totalSum %>% filter(y==2018) %>%
ggplot() +
geom_point(aes(x=BackwardsRatio, y=WHIP, color=PitcherType)) +
geom_label_repel(data=totalSum %>% filter(y==2018) %>% filter(BackwardsRatio > 1) %>% filter(WHIP < 1) %>% filter(PitcherType=='Starters'),
mapping=aes(x=BackwardsRatio, y=WHIP, label=PitcherNameTeam), nudge_y=0.3, nudge_x=0.3, force=.5, segment.alpha = .2, size=2.75) +
geom_label_repel(data=totalSum %>% filter(y==2018) %>% filter(BackwardsRatio > 1) %>% filter(WHIP < 1) %>% filter(PitcherType=='Relievers'),
mapping=aes(x=BackwardsRatio, y=WHIP, label=PitcherNameTeam), nudge_y=-0.4, nudge_x=0.3, force=.5, segment.alpha = .2, size=2.75) +
geom_vline(xintercept = 1, alpha=.5, linetype='dashed') +
theme_bw() +
annotate('text', x=.98, y=3.4, label='Relatively More First-Pitch Fastballs \u2190', size=3, alpha=.9, hjust='right') +
annotate('text', x=1.02, y=3.4, label='\u2192 Relatively Fewer First-Pitch Fastballs', size=3, alpha=.9, hjust='left') +
scale_y_continuous(limits=c(0,3.5)) +
labs(color=NULL, x='First-Pitch-Backwards Ratio (Fastball Percentage / First-Pitch Fastball Percentage)', y='Walks + Hits per Inning Pitched',
title='Effectiveness of "Backwards" Pitching on the First Pitch to Each Batter',
subtitle='2018 Season Prior to the All-Star Break, Pitchers with 30 or More Batters Faced',
caption='Source: MLBAM PitchFx Data\n"Fastball" is defined as a pitch identified by PitchFx as a 4-seam, 2-seam, or cut fastball, or sinker')
strikeSum <- pitches2 %>% filter(y==2018) %>%
mutate(Strike=!(des %in% c('Automatic Ball', 'Ball', 'Ball In Dirt', 'Hit By Pitch', 'Intent Ball', 'Pitchout')), NonFastball=!Fastball, NonFastballStrike=NonFastball*Strike) %>%
group_by(y, pitcher, pitcher_name) %>%
summarize(NonFastball=sum(NonFastball, na.rm=TRUE), NonFastballStrikes=sum(NonFastballStrike, na.rm=TRUE), Pitches=n(), Strikes=sum(Strike), Team=listTeams(defense_team_code)) %>%
mutate(NonFastballStrikePct=NonFastballStrikes/NonFastball, StrikePct=Strikes/Pitches, NonStrikeDiff=NonFastballStrikePct/StrikePct) %>%
mutate(PitcherNameTeam=paste0(pitcher_name, ' (', Team, ')')) %>%
inner_join(totalSum %>% select(y, pitcher, BackwardsRatio, PitcherType) , by=c('y', 'pitcher')) %>% ungroup() %>%
filter(NonFastballStrikePct > .01)
strikeSum %>%
ggplot() + geom_point(aes(x=BackwardsRatio, y=NonFastballStrikePct, color=PitcherType)) +
geom_vline(xintercept = 1, alpha=.5, linetype='dashed') +
geom_smooth(data=strikeSum %>% filter(BackwardsRatio < 1.5), mapping=aes(x=BackwardsRatio, y=NonFastballStrikePct), method = lm, se = FALSE) +
geom_label_repel(data=strikeSum %>% top_n(wt=BackwardsRatio, n=5), mapping=aes(x=BackwardsRatio, y=NonFastballStrikePct, label=PitcherNameTeam),
nudge_x=0.05, nudge_y=-0.05, force=0.1, segment.alpha = .2, size=2.75) +
geom_label_repel(data=strikeSum %>% filter(BackwardsRatio > 1) %>% top_n(wt=NonFastballStrikePct, n=5), mapping=aes(x=BackwardsRatio, y=NonFastballStrikePct, label=PitcherNameTeam),
nudge_x=0.4, nudge_y=0.08, force=0.1, segment.alpha = .2, size=2.75) +
theme_bw() +
scale_y_continuous(labels=integerPercent, limits=c(.3,.8)) +
labs(color=NULL, x='First-Pitch-Backwards Ratio (Fastball Percentage / First-Pitch Fastball Percentage)', y='% of Non-Fastballs Thrown for Strikes',
title='First-Pitch-Backwards Pitching Tendency and Non-Fastball Command',
subtitle='2018 Season Prior to the All-Star Break, Pitchers with 30 or More Batters Faced',
caption='Source: MLBAM PitchFx Data\n"Fastball" is defined as a pitch identified by PitchFx as a 4-seam, 2-seam, or cut fastball, or sinker')
cor(x=strikeSum$BackwardsRatio, y=strikeSum$NonFastballStrikePct)
strikeSum %>% filter(BackwardsRatio > 1) %>% arrange(desc(NonFastballStrikePct)) %>% print(n=10)