# Chapter 5 Value of Plays Using Run Expectancy

In [1]:
library(tidyverse)
library(Lahman)

── [1mAttaching core tidyverse packages[22m ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.0     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
fields <- read_csv('datasets/fields.csv')
data_2011 <- read_csv('datasets/all2011.csv',col_names=pull(fields,Header),
                      na = character())

[1mRows: [22m[34m97[39m [1mColumns: [22m[34m3[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): Description, Header
[32mdbl[39m (1): Field number

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
“[1m[22mOne or more parsing issues, call `problems()` on your data frame for details, e.g.:
  dat <- vroom(...)
  problems(dat)”
[1mRows: [22m[34m191864[39m [1mColumns: [22m[34m97[39m
[36m──[39m [1mColumn specification[22m [36m──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (34): GAME_ID, AWAY_TEAM_ID, PITCH_SEQ_TX, BAT_ID, BAT_H

In [4]:
data_2011 %>%
mutate(RUNS = AWAY_SCORE_CT + HOME_SCORE_CT,
       HALF.INNING = paste(GAME_ID, INN_CT, BAT_HOME_ID),
        RUNS.SCORED =
         (BAT_DEST_ID> 3) + (RUN1_DEST_ID > 3)+
           (RUN2_DEST_ID>3) + (RUN3_DEST_ID> 3)) -> data_2011



## Now we want to compute the maximum total score for each half inning combining home and visitor scores

In [8]:
data_2011 %>%
    group_by(HALF.INNING) %>%
    summarize(Outs.Inning = sum(EVENT_OUTS_CT),
              Runs.Inning = sum(RUNS.SCORED),
                Runs.Start = first(RUNS),
                MAX.Runs = Runs.Inning + Runs.Start ) -> half_innings  
           

In [9]:
head(half_innings,10)

HALF.INNING,Outs.Inning,Runs.Inning,Runs.Start,MAX.Runs
<chr>,<dbl>,<int>,<dbl>,<dbl>
ANA201104080 1 0,3,0,0,0
ANA201104080 1 1,3,1,0,1
ANA201104080 2 0,3,0,1,1
ANA201104080 2 1,3,0,1,1
ANA201104080 3 0,3,0,1,1
ANA201104080 3 1,3,1,1,2
ANA201104080 4 0,3,0,2,2
ANA201104080 4 1,3,0,2,2
ANA201104080 5 0,3,0,2,2
ANA201104080 5 1,3,0,2,2
