Skip to content

Commit

Permalink
Feat: adding wnba_pbps() function with on_court parameter courtes…
Browse files Browse the repository at this point in the history
…y of @shufinskiy
  • Loading branch information
saiemgilani committed Nov 25, 2023
1 parent 2cde7be commit 3b9d439
Show file tree
Hide file tree
Showing 11 changed files with 457 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: wehoop
Title: Access Women's Basketball Play by Play Data
Version: 1.9.0
Version: 2.0.0
Authors@R: c(person('Saiem', 'Gilani', email = 'saiem.gilani@gmail.com',role = c('aut','cre')),
person('Geoffery','Hutchinson', email = 'geoffery.hutchinson@gmail.com', role = c('aut')))
Description: A utility for working with women's basketball data. A scraping and aggregating interface for the WNBA Stats API <https://stats.wnba.com/> and ESPN's <https://www.espn.com> women's college basketball and WNBA statistics. It provides users with the capability to access the game play-by-plays, box scores, standings and results to analyze the data for themselves.
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ export(wnba_leaguestandingsv3)
export(wnba_live_boxscore)
export(wnba_live_pbp)
export(wnba_pbp)
export(wnba_pbps)
export(wnba_playerawards)
export(wnba_playercareerbycollege)
export(wnba_playercareerbycollegerollup)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

* ```load_wnba_*()``` functions now use `sportsdataverse-data` releases url instead of `wehoop-data` repository URL
* ```load_wbb_*()``` functions now use `sportsdataverse-data` releases url instead of `wehoop-data` repository URL
* ```wnba_pbp()``` function and new ```wnba_pbps()``` function added with `on_court` (default `TRUE`) parameter to return on court players for each play event

# **wehoop 1.9.0**

Expand Down
326 changes: 322 additions & 4 deletions R/wnba_stats_pbp.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,211 @@

#' **Add players on court in WNBA Stats API play-by-play**
#' @name .players_on_court
NULL
#' @title
#' **Add players on court in WNBA Stats API play-by-play**
#' @author Vladislav Shufinskiy
#' @param pbp_data PlayByPlay data frame received `wnba_pbp` function
#' @return Returns a data frame: PlayByPlay
#'
#' |col_name |types |
#' |:-------------------------|:---------|
#' |game_id |character |
#' |event_num |character |
#' |event_type |character |
#' |event_action_type |character |
#' |period |numeric |
#' |minute_game |numeric |
#' |time_remaining |numeric |
#' |wc_time_string |character |
#' |time_quarter |character |
#' |minute_remaining_quarter |numeric |
#' |seconds_remaining_quarter |numeric |
#' |home_description |character |
#' |neutral_description |character |
#' |visitor_description |character |
#' |score |character |
#' |away_score |numeric |
#' |home_score |numeric |
#' |score_margin |character |
#' |person1type |character |
#' |player1_id |character |
#' |player1_name |character |
#' |player1_team_id |character |
#' |player1_team_city |character |
#' |player1_team_nickname |character |
#' |player1_team_abbreviation |character |
#' |person2type |character |
#' |player2_id |character |
#' |player2_name |character |
#' |player2_team_id |character |
#' |player2_team_city |character |
#' |player2_team_nickname |character |
#' |player2_team_abbreviation |character |
#' |person3type |character |
#' |player3_id |character |
#' |player3_name |character |
#' |player3_team_id |character |
#' |player3_team_city |character |
#' |player3_team_nickname |character |
#' |player3_team_abbreviation |character |
#' |video_available_flag |character |
#' |team_leading |character |
#' |away_player1 |numeric |
#' |away_player2 |numeric |
#' |away_player3 |numeric |
#' |away_player4 |numeric |
#' |away_player5 |numeric |
#' |home_player1 |numeric |
#' |home_player2 |numeric |
#' |home_player3 |numeric |
#' |home_player4 |numeric |
#' |home_player5 |numeric |
#'
#' @importFrom jsonlite fromJSON toJSON
#' @importFrom dplyr filter select rename bind_cols bind_rows as_tibble
#' @import rvest
#' @noRd
#' @family WNBA PBP Functions
.players_on_court <- function(pbp_data) {

pbp_data <- dplyr::mutate(pbp_data, PCTIMESTRING = ifelse(.data$period < 5, abs((.data$minute_remaining_quarter * 60 + .data$seconds_remaining_quarter) - 720 * .data$period),
abs((.data$minute_remaining_quarter * 60 + .data$seconds_remaining_quarter) - (2880 + 300 * (.data$period - 4)))))

l <- lapply(sort(unique(pbp_data$period)), function(x){

pbp_data_period <- dplyr::filter(pbp_data, .data$period == x)
all_id <- unique(c(pbp_data_period$player1_id[!pbp_data_period$event_type %in% c(9, 18) & !is.na(pbp_data_period$player1_name) & !pbp_data_period$person1type %in% c(6, 7)],
pbp_data_period$player2_id[!pbp_data_period$event_type %in% c(9, 18) & !is.na(pbp_data_period$player2_name) & !pbp_data_period$person2type %in% c(6, 7)],
pbp_data_period$player3_id[!pbp_data_period$event_type %in% c(9, 18) & !is.na(pbp_data_period$player3_name) & !pbp_data_period$person3type %in% c(6, 7)]))
all_id <- as.numeric(all_id)

all_id <- all_id[all_id != 0 & all_id < 1610612737]

sub_off <- as.numeric(unique(pbp_data_period$player1_id[pbp_data_period$event_type == 8]))
sub_on <- as.numeric(unique(pbp_data_period$player2_id[pbp_data_period$event_type == 8]))

'%!in%' <- Negate(`%in%`)
all_id <- all_id[all_id %!in% setdiff(sub_on, sub_off)]

sub_on_off <- dplyr::intersect(sub_on, sub_off)

for (i in sub_on_off){
on <- min(pbp_data_period$PCTIMESTRING[pbp_data_period$event_type == 8 & pbp_data_period$player2_id == i])
off <- min(pbp_data_period$PCTIMESTRING[pbp_data_period$event_type == 8 & pbp_data_period$player1_id == i])
if (off > on){
all_id <- all_id[all_id != i]
} else if (off == on){
on_event <- min(pbp_data_period$event_num[pbp_data_period$event_type == 8 & pbp_data_period$player2_id == i])
off_event <- min(pbp_data_period$event_num[pbp_data_period$event_type == 8 & pbp_data_period$player1_id == i])
if(off_event > on_event){
all_id <- all_id[all_id != i]
}
}
}

if((length(all_id) == 10)){
ord_all_id <- pbp_data_period %>%
dplyr::select("player1_id", "person1type") %>%
dplyr::filter(.data$player1_id != 0 & .data$person1type %in% c(4, 5)) %>%
dplyr::rename("player_id" = "player1_id", "persontype" = "person1type") %>%
dplyr::bind_rows(pbp_data_period %>%
dplyr::select("player2_id", "person2type") %>%
dplyr::filter(.data$player2_id != 0 & .data$person2type %in% c(4, 5)) %>%
dplyr::rename("player_id" = "player2_id", "persontype" = "person2type")) %>%
dplyr::bind_rows(pbp_data_period %>%
dplyr::select("player3_id", "person3type") %>%
dplyr::filter(.data$player3_id != 0 & .data$person3type %in% c(4, 5)) %>%
dplyr::rename("player_id" = "player3_id", "persontype" = "person3type")) %>%
dplyr::distinct() %>%
dplyr::arrange(dplyr::desc(.data$persontype)) %>%
dplyr::select("player_id") %>%
dplyr::mutate(player_id = as.numeric(.data$player_id)) %>%
dplyr::pull()

all_id <- ord_all_id[ord_all_id %in% all_id]
}

if(length(all_id) != 10){

if(inherits(pbp_data$game_id[1], "integer")){
tmp_gameid <- paste0("00", as.character(pbp_data$game_id[1]))
} else{
tmp_gameid <- pbp_data$game_id[1]
}

tmp_data <- wnba_boxscoretraditionalv2(game_id = tmp_gameid, start_period = x, end_period = x, range_type = 1)$PlayerStats

all_id <- as.integer(tmp_data$PLAYER_ID)

sub_off <- unique(pbp_data_period$player1_id[pbp_data_period$event_type == 8])
sub_on <- unique(pbp_data_period$player2_id[pbp_data_period$event_type == 8])

'%!in%' <- Negate(`%in%`)
all_id <- all_id[all_id %!in% setdiff(sub_on, sub_off)]

sub_on_off <- dplyr::intersect(sub_on, sub_off)

for (i in sub_on_off){
on <- min(pbp_data_period$PCTIMESTRING[pbp_data_period$event_type == 8 & pbp_data_period$player2_id == i])
off <- min(pbp_data_period$PCTIMESTRING[pbp_data_period$event_type == 8 & pbp_data_period$player1_id == i])
if (off > on){
all_id <- all_id[all_id != i]
} else if (off == on){
on_event <- min(pbp_data_period$even_num[pbp_data_period$event_type == 8 & pbp_data_period$player2_id == i])
off_event <- min(pbp_data_period$even_num[pbp_data_period$event_type == 8 & pbp_data_period$player1_id == i])
if(off_event > on_event){
all_id <- all_id[all_id != i]
}
}
}
}

columns <- paste0("player", seq(1, 10))
pbp_data_period[columns] <- NA

for(i in seq(1:10)){
pbp_data_period[columns][i] <- all_id[i]
}

for(column in paste0("player", seq(1, 10))){
i <- 1
repeat{
n <- nrow(pbp_data_period)
if(length(which(pbp_data_period$event_type == 8 & as.numeric(pbp_data_period$player1_id) == pbp_data_period[, column])) == 0){
break
}
i <- min(which(pbp_data_period$event_type == 8 & pbp_data_period[, column] == as.numeric(pbp_data_period$player1_id)))
player_on <- as.numeric(pbp_data_period$player2_id[i])
pbp_data_period[i:n, column] <- player_on
}
}
return(dplyr::select(pbp_data_period, -"PCTIMESTRING"))
})
return(dplyr::bind_rows(l) %>% dplyr::rename(
"away_player1" = 'player1',
"away_player2" = 'player2',
"away_player3" = 'player3',
"away_player4" = 'player4',
"away_player5" = 'player5',
"home_player1" = 'player6',
"home_player2" = 'player7',
"home_player3" = 'player8',
"home_player4" = 'player9',
"home_player5" = 'player10'
))
}

#' **Get WNBA Stats API play-by-play**
#' @name wnba_pbp
NULL
#' @title
#' **Get WNBA Stats API play-by-play**
#' @rdname wnba_pbp
#' @param game_id Game ID
#' @param on_court IF TRUE will be added ID of players on court
#' @param version Play-by-play version ("v2" available from 2016-17 onwards)
#' @param p Progress bar
#' @param ... Additional arguments passed to an underlying function like httr.
#' @return Returns a data frame: PlayByPlay
#'
Expand Down Expand Up @@ -61,10 +260,12 @@ NULL
#' @family WNBA PBP Functions
#' @details
#' ```r
#' wnba_pbp(game_id = "1022200034")
#' wnba_pbp(game_id = "1022200034", on_court = TRUE)
#' ```
wnba_pbp <- function(game_id,
version = "v2",
wnba_pbp <- function(game_id,
on_court = TRUE,
version = "v2",
p,
...){

if (version == "v2") {
Expand Down Expand Up @@ -147,7 +348,7 @@ wnba_pbp <- function(game_id,
(((
60 - .data$seconds_remaining_quarter
) / 60) - 1), 2),
time_remaining = 48 - round(((.data$period - 1) * 12) - (12 - .data$minute_remaining_quarter) -
time_remaining = 40 - round(((.data$period - 1) * 12) - (12 - .data$minute_remaining_quarter) -
((60 - .data$seconds_remaining_quarter) / 60 - 1), 2)
) %>%
dplyr::select(
Expand All @@ -157,6 +358,10 @@ wnba_pbp <- function(game_id,
dplyr::everything()
) %>%
make_wehoop_data("WNBA Game Play-by-Play Information from WNBA.com", Sys.time())

if(on_court){
data <- .players_on_court(data)
}
}
},
error = function(e) {
Expand All @@ -171,6 +376,119 @@ wnba_pbp <- function(game_id,
}


#' **Get WNBA Stats API play-by-play (Multiple Games)**
#' @name wnba_pbps
NULL
#' @title
#' **Get WNBA Stats API play-by-play (Multiple Games)**
#' @rdname wnba_pbps
#' @author Jason Lee
#' @param game_ids Game IDs
#' @param on_court IF TRUE will be added ID of players on court
#' @param version Play-by-play version ("v2" available from 2016-17 onwards)
#' @param nest_data If TRUE returns nested data by game
#' @param ... Additional arguments passed to an underlying function like httr.
#' @return Returns a data frame: PlayByPlay
#'
#' |col_name |types |
#' |:-------------------------|:---------|
#' |game_id |character |
#' |event_num |character |
#' |event_type |character |
#' |event_action_type |character |
#' |period |numeric |
#' |minute_game |numeric |
#' |time_remaining |numeric |
#' |wc_time_string |character |
#' |time_quarter |character |
#' |minute_remaining_quarter |numeric |
#' |seconds_remaining_quarter |numeric |
#' |home_description |character |
#' |neutral_description |character |
#' |visitor_description |character |
#' |score |character |
#' |away_score |numeric |
#' |home_score |numeric |
#' |score_margin |character |
#' |person1type |character |
#' |player1_id |character |
#' |player1_name |character |
#' |player1_team_id |character |
#' |player1_team_city |character |
#' |player1_team_nickname |character |
#' |player1_team_abbreviation |character |
#' |person2type |character |
#' |player2_id |character |
#' |player2_name |character |
#' |player2_team_id |character |
#' |player2_team_city |character |
#' |player2_team_nickname |character |
#' |player2_team_abbreviation |character |
#' |person3type |character |
#' |player3_id |character |
#' |player3_name |character |
#' |player3_team_id |character |
#' |player3_team_city |character |
#' |player3_team_nickname |character |
#' |player3_team_abbreviation |character |
#' |video_available_flag |character |
#' |team_leading |character |
#' |away_player1 |numeric |
#' |away_player2 |numeric |
#' |away_player3 |numeric |
#' |away_player4 |numeric |
#' |away_player5 |numeric |
#' |home_player1 |numeric |
#' |home_player2 |numeric |
#' |home_player3 |numeric |
#' |home_player4 |numeric |
#' |home_player5 |numeric |
#'
#' @export
#' @family WNBA PBP Functions
#' @details
#' ```r
#' y <- c("1022200034", "1022200035" )
#'
#' wnba_pbps(game_ids = y, version = "v2")
#' ```
wnba_pbps <- function(
game_ids = NULL,
on_court = TRUE,
version = "v2",
nest_data = FALSE,
...) {

old <- options(list(stringsAsFactors = FALSE, scipen = 999))
on.exit(options(old))

if (game_ids %>% purrr::is_null()) {
stop("Please enter game ids")
}


p <- NULL
if (is_installed("progressr")) p <- progressr::progressor(along = game_ids)
get_pbp_safe <- progressively(wnba_pbp, p)


all_data <-
game_ids %>%
purrr::map_dfr(function(game_id) {
get_pbp_safe(game_id = game_id, on_court = on_court, ..., p = p)
})

if (nest_data) {
all_data <-
all_data %>%
dplyr::group_by(.data$game_id) %>%
tidyr::nest()
}

return(all_data)
}


#' **Get WNBA Stats API Live play-by-play**
#' @name wnba_live_pbp
NULL
Expand Down
1 change: 1 addition & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This is a minor release that:
* ```wnba_infographicfanduelplayer()``` function added.
* ```wnba_live_pbp()``` function added.
* ```wnba_live_boxscore()``` function added.
* ```wnba_pbps()``` function added.
* ```wnba_todays_scoreboard()``` function added.
* ```wnba_scoreboardv3()``` function added.
* ```wnba_boxscoretraditionalv3()``` function added.
Expand Down
Loading

0 comments on commit 3b9d439

Please sign in to comment.