In [1]:
library(tidyverse)
library(repr)
library(digest)
library(gridExtra)
library(cowplot)
library(dplyr)
library(tidymodels)
library(GGally)
library(splines)

“package ‘tidyverse’ was built under R version 4.1.2”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 0.3.4 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.0      [32m✔[39m [34mstringr[39m 1.4.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.1 
“package ‘ggplot2’ was built under R version 4.1.2”
“package ‘tibble’ was built under R version 4.1.2”
“package ‘tidyr’ was built under R version 4.1.2”
“package ‘readr’ was built under R version 4.1.2”
“package ‘dplyr’ was built under R version 4.1.2”
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
“package ‘repr’ was built under R version 4.

## Pitcher Team Factor Calculations

Similarily to team factor calculations for batters, pitcher team factor will standardize the average fantasy points per inning in 2022 to a normal distribution with mean at 1 and a very small standard deviation (~0.1). Unlike the model creation, fantasy points per inning will include team-based statistics such as wins, losses, saves and holds. These statistics are actually a crucial aspect of team factor, as we would like to account for the difference in opportunities to accumulate these statistics between teams. Once again, changes to the Blue Jays, Tigers and Mets ballpark dimensions are accounted for with an approximate estimate on how these changes will impact offensive production, and in turn pitching production. 

In [2]:
data <- read_csv("data/2022PitchBase.csv")
head(data)

[1mRows: [22m[34m470[39m [1mColumns: [22m[34m25[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): Name, Team
[32mdbl[39m (23): W, L, ERA, G, GS, CG, ShO, SV, HLD, BS, IP, TBF, H, R, ER, HR, BB,...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Name,Team,W,L,ERA,G,GS,CG,ShO,SV,⋯,R,ER,HR,BB,IBB,HBP,WP,BK,SO,playerid
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Yency Almonte,LAD,0,0,1.02,33,0,0,0,1,⋯,4,4,2,10,1,4,0,0,33,15068
Evan Phillips,LAD,7,3,1.14,64,0,0,0,2,⋯,11,8,2,15,1,3,0,0,77,17734
Ryne Stanek,HOU,2,1,1.15,59,0,0,0,1,⋯,8,7,2,31,1,0,4,0,62,15947
Ryan Helsley,STL,9,1,1.25,54,0,0,0,19,⋯,11,9,6,20,1,0,1,0,94,18138
Edwin Diaz,NYM,3,1,1.31,61,0,0,0,32,⋯,9,9,3,18,1,2,2,0,118,14710
Emmanuel Clase,CLE,3,4,1.36,77,0,0,0,42,⋯,18,11,3,10,2,1,4,0,77,21032


In [3]:
cleaned <- data %>% separate(Name, c("first_name", "last_name")) %>% select(first_name, last_name, Team, W, L, SV, HLD, ER, SO, IP, BB, H) 
head(cleaned)

“Expected 2 pieces. Additional pieces discarded in 9 rows [22, 42, 72, 81, 120, 135, 269, 315, 461].”


first_name,last_name,Team,W,L,SV,HLD,ER,SO,IP,BB,H
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Yency,Almonte,LAD,0,0,1,8,4,33,35.1,10,18
Evan,Phillips,LAD,7,3,2,19,8,77,63.0,15,33
Ryne,Stanek,HOU,2,1,1,17,7,62,54.2,31,36
Ryan,Helsley,STL,9,1,19,7,9,94,64.2,20,28
Edwin,Diaz,NYM,3,1,32,4,9,118,62.0,18,34
Emmanuel,Clase,CLE,3,4,42,0,11,77,72.2,10,43


In [4]:
names <- cleaned %>% unite('player', last_name:first_name, sep = ", ", remove = T)
head(names)

player,Team,W,L,SV,HLD,ER,SO,IP,BB,H
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
"Almonte, Yency",LAD,0,0,1,8,4,33,35.1,10,18
"Phillips, Evan",LAD,7,3,2,19,8,77,63.0,15,33
"Stanek, Ryne",HOU,2,1,1,17,7,62,54.2,31,36
"Helsley, Ryan",STL,9,1,19,7,9,94,64.2,20,28
"Diaz, Edwin",NYM,3,1,32,4,9,118,62.0,18,34
"Clase, Emmanuel",CLE,3,4,42,0,11,77,72.2,10,43


In [5]:
pitchteam <- names %>% select(player, Team)
write_csv(pitchteam, "data/PitchTeam.csv")

In [6]:
fpoints <- names %>% mutate(FpointsperIP = (2*W - 2*L + 5*SV + 2*HLD - 2*ER + SO - BB - H + 3*IP)/IP)
head(fpoints)

player,Team,W,L,SV,HLD,ER,SO,IP,BB,H,FpointsperIP
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
"Almonte, Yency",LAD,0,0,1,8,4,33,35.1,10,18,3.512821
"Phillips, Evan",LAD,7,3,2,19,8,77,63.0,15,33,4.095238
"Stanek, Ryne",HOU,2,1,1,17,7,62,54.2,31,36,3.405904
"Helsley, Ryan",STL,9,1,19,7,9,94,64.2,20,28,5.383178
"Diaz, Edwin",NYM,3,1,32,4,9,118,62.0,18,34,6.548387
"Clase, Emmanuel",CLE,3,4,42,0,11,77,72.2,10,43,5.908587


In [7]:
## Calculating team averages (dividing by five to reduce variance of the standardized values)
team_avg <- fpoints %>% group_by(Team) %>% summarize(avg = mean(FpointsperIP)/5) %>% filter(Team != "- - -")
head(team_avg)

Team,avg
<chr>,<dbl>
ARI,0.3827195
ATL,0.562325
BAL,0.4160539
BOS,0.4026853
CHC,0.3997784
CHW,0.4726583


In [8]:
## Standardizing team average points to a N(1, sd^(1/100)) disribution
team_avg$standardized <- ((team_avg$avg - mean(team_avg$avg))/(sd(team_avg$avg))^(1/10000)) + 1
team_avg

Team,avg,standardized
<chr>,<dbl>,<dbl>
ARI,0.3827195,0.9348945
ATL,0.562325,1.1145462
BAL,0.4160539,0.9682375
BOS,0.4026853,0.9548654
CHC,0.3997784,0.9519578
CHW,0.4726583,1.0248564
CIN,0.3270639,0.8792245
CLE,0.5164953,1.0687047
COL,0.3332485,0.8854108
DET,0.387844,0.9400203


In [9]:
team_avg[10,]$standardized = team_avg[10,]$standardized - 0.03
team_avg[29,]$standardized = team_avg[29,]$standardized - 0.03
team_avg[18,]$standardized = team_avg[18,]$standardized - 0.015
team_avg

Team,avg,standardized
<chr>,<dbl>,<dbl>
ARI,0.3827195,0.9348945
ATL,0.562325,1.1145462
BAL,0.4160539,0.9682375
BOS,0.4026853,0.9548654
CHC,0.3997784,0.9519578
CHW,0.4726583,1.0248564
CIN,0.3270639,0.8792245
CLE,0.5164953,1.0687047
COL,0.3332485,0.8854108
DET,0.387844,0.9100203


In [10]:
write_csv(team_avg, "data/Pitchteamfactor.csv")