<a href="https://colab.research.google.com/github/michiWS1920/nfl_data/blob/master/playingaround.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
library(tidyverse)
library(ggplot2)


── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.1     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 1.0.0
[32m✔[39m [34mtidyr  [39m 1.1.0     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [20]:
install.packages("googledrive")


Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependency ‘gargle’




In [21]:
library(googledrive)

In [None]:
chicken <- drive_upload(
  drive_example("chicken.csv"),
  "README-chicken.csv"

In [None]:
first <- 2009 
last <- 2019 

datalist = list()
for (yr in first:last) {
    pbp <- read_csv(url(paste0("https://github.com/ryurko/nflscrapR-data/raw/master/play_by_play_data/regular_season/reg_pbp_", yr, ".csv"))) #read every play by play data
    games <- read_csv(url(paste0("https://raw.githubusercontent.com/ryurko/nflscrapR-data/master/games_data/regular_season/reg_games_", yr, ".csv"))) #read game data including score/ week etc.
    pbp <- pbp %>% 
      inner_join(games %>% distinct(game_id, week, season)) %>% #join the two
      select(-fumble_recovery_2_yards, -blocked_player_id, -fumble_recovery_2_player_id) # throw some variables out that are inconsistent in format over the seasons
    datalist[[yr]] <- pbp 
}

pbp_all <- dplyr::bind_rows(datalist)

In [6]:
# Change teamnames for teams that changed their names in the last 10 seasons - to have consistent data
pbp_all <- pbp_all %>% 
	mutate_at(vars(home_team, away_team, posteam, defteam), funs(case_when(
            . %in% "JAX" ~ "JAC",
            . %in% "STL" ~ "LA",
            . %in% "SD" ~ "LAC",
            TRUE ~ .
        ))) 

In [7]:
saveRDS(pbp_all, file="NFLdata.rds")
pbp_all <- readRDS("NFLdata.rds")

In [10]:
# cleaning for further analysis
pbp_all_rp <- pbp_all %>%
      filter(!is.na(epa), !is.na(posteam), play_type=="no_play" | play_type=="pass" | play_type=="run") %>% #filter out certain NA values, only show three play_types
	mutate( #create new columns that indicates if the play was a pass or rush - this is done by parsing the description of the play (certain keywords are perfect indicators)
	pass = if_else(str_detect(desc, "( pass)|(sacked)|(scramble)"), 1, 0), #
	rush = if_else(str_detect(desc, "(left end)|(left tackle)|(left guard)|(up the middle)|(right guard)|(right tackle)|(right end)") & pass == 0, 1, 0),
	success = ifelse(epa>0, 1 , 0),
	passer_player_name = ifelse(play_type == "no_play" & pass == 1, #create new column that parses the player_name depending on the type of play (pass / receiver / rusher)
              str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((pass)|(sack)|(scramble)))"),
              passer_player_name),
        receiver_player_name = ifelse(play_type == "no_play" & str_detect(desc, "pass"), 
              str_extract(desc, "(?<=to\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?"),
              receiver_player_name),
        rusher_player_name = ifelse(play_type == "no_play" & rush == 1, 
              str_extract(desc, "(?<=\\s)[A-Z][a-z]*\\.\\s?[A-Z][A-z]+(\\s(I{2,3})|(IV))?(?=\\s((left end)|(left tackle)|(left guard)|		      (up the middle)|(right guard)|(right tackle)|(right end)))"),
              rusher_player_name),
	name = ifelse(!is.na(passer_player_name), passer_player_name, rusher_player_name),
	yards_gained=ifelse(play_type=="no_play",NA,yards_gained), # filter out NA for yards_gained
	play=1
	) %>%
	filter(pass==1 | rush==1) #only keep passes and rushes

In [19]:
pbp_all_rp %>%

           filter(rusher_player_name == "C.Newton") %>%
           select(desc, yardline_100, home_team, defteam, play_type, down, season, week)

desc,yardline_100,home_team,defteam,play_type,down,season,week
<chr>,<dbl>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
(4:19) C.Newton up the middle to CAR 30 for -4 yards (R.Marshall).,66,ARI,ARI,run,2,2011,1
(:07) C.Newton scrambles right tackle to CAR 41 for 3 yards (P.Lenon).,62,ARI,ARI,run,1,2011,1
(5:11) (Shotgun) C.Newton scrambles right end to CAR 14 for 2 yards (P.Lenon).,88,ARI,ARI,run,3,2011,1
(1:03) (Shotgun) C.Newton scrambles right end pushed ob at CAR 38 for 12 yards (D.Washington).,74,ARI,ARI,run,1,2011,1
(6:29) C.Newton up the middle to ARI 1 for 1 yard (D.Williams).,2,ARI,ARI,run,1,2011,1
"(5:54) (Shotgun) C.Newton up the middle for 1 yard, TOUCHDOWN. PENALTY on CAR-C.Newton, Unsportsmanlike Conduct, 15 yards, enforced between downs.",1,ARI,ARI,run,2,2011,1
"(5:51) (Shotgun) C.Newton scrambles up the middle to CAR 36 for 2 yards (N.Eason, P.Lenon).",66,ARI,ARI,run,3,2011,1
(1:54) (Shotgun) C.Newton scrambles right end ran ob at ARI 45 for 1 yard (C.Campbell).,46,ARI,ARI,run,1,2011,1
(10:43) (Shotgun) C.Newton scrambles right end to GB 1 for 3 yards (M.Burnett).,4,CAR,GB,run,2,2011,2
(8:18) (Shotgun) C.Newton scrambles up the middle to GB 5 for 6 yards (J.Bush).,11,CAR,GB,run,2,2011,2
