-
Notifications
You must be signed in to change notification settings - Fork 3
/
update-depth-charts.R
119 lines (104 loc) · 3.61 KB
/
update-depth-charts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
most_rec_season <- stringi::stri_extract_all_regex(dir("data/seasons"), "depth_charts_[0-9]{4}") |>
unlist() |>
na.omit() |>
max() |>
(\(x)gsub("depth_charts_", "", x))()
most_rec_season <- ifelse(is.na(most_rec_season), 2001, most_rec_season)
seasons_to_scrape <- c(most_rec_season:nflreadr:::most_recent_season())
cli::cli_alert_info("Scraping teams...")
scrape_teams <- function(year) {
h <- httr::handle("https://www.nfl.info")
r <- httr::GET(
handle = h,
path = glue::glue(
"/nfldataexchange/dataexchange.asmx/getClubs?lseason={year}"
),
httr::authenticate("media", "media"),
url = NULL
)
teams_df <- httr::content(r) |>
XML::xmlParse() |>
XML::xmlToDataFrame()
rm(h) # close handle when finished, have had the api get mad when I don't close it
return(teams_df)
}
teams <- purrr::map_dfr(seasons_to_scrape, scrape_teams) |>
dplyr::filter(!(ClubCode %in% c("AFC", "NFC", "RIC", "SAN", "CRT", "IRV"))) |>
# remove all-star teams
dplyr::mutate(Season = as.integer(Season)) |>
dplyr::select(club_code = ClubCode, season = Season) |>
tidyr::expand_grid(season_type = c("REG", "POST"))
cli::cli_alert_info("Scraping depth charts...")
scrape_dc <- function(season, team, season_type) {
cli::cli_process_start("Loading {season} {team}, {season_type}")
h <- httr::handle("https://www.nfl.info")
r <- httr::GET(
handle = h,
path = glue::glue(
"/nfldataexchange/dataexchange.asmx/getGameDepthChart?lSeason={season}&lSeasonType={season_type}&lWeek=0&lClub={team}"
),
httr::authenticate("media", "media"),
url = NULL
)
dc_df <- httr::content(r) |>
XML::xmlParse() |>
XML::xmlToDataFrame()
rm(h)
cli::cli_process_done()
return(dc_df)
}
dc_df <- purrr::pmap_dfr(list(teams$season, teams$club_code, teams$season_type), scrape_dc)
if (nrow(dc_df) > 0){
dc_df <- dc_df |>
dplyr::mutate(
ClubCode = dplyr::case_when(
ClubCode == "ARZ" ~ "ARI",
ClubCode == "BLT" ~ "BAL",
ClubCode == "CLV" ~ "CLE",
ClubCode == "HST" ~ "HOU",
ClubCode == "SL" ~ "STL",
T ~ ClubCode
),
full_name = paste(FootballName, LastName),
dplyr::across(c(Season, Week, DepthTeam, JerseyNumber), as.integer),
Week = dplyr::case_when(SeasonType == 'POST' ~ as.integer(Week) + max(as.integer(Week[SeasonType == 'REG'])),
T~as.integer(Week)),
) |>
dplyr::select(
season = Season,
week = Week,
team = ClubCode,
season_type = SeasonType,
position = Position,
depth_chart_position = DepthPosition,
formation = Formation,
depth_team = DepthTeam,
jersey_number = JerseyNumber,
full_name,
first_name = FootballName,
last_name = LastName,
gsis_id = GsisID
)
cli::cli_alert_info("Saving depth charts...")
dc_split <- dc_df |>
dplyr::group_split(season)
purrr::walk(dc_split, function(x) {
saveRDS(x, glue::glue("data/seasons/depth_charts_{unique(x$season)}.rds"))
readr::write_csv(x, glue::glue("data/seasons/depth_charts_{unique(x$season)}.csv.gz"))
})
full_dc_df <- list.files("data/seasons", pattern = "depth_charts_[0-9]+\\.rds", full.names = TRUE) |>
purrr::map_dfr(readRDS)
saveRDS(full_dc_df, "data/nflfastR-depth_charts.rds")
readr::write_csv(full_dc_df, "data/nflfastR-depth_charts.csv.gz")
qs::qsave(
full_dc_df,
"data/nflfastR-depth_charts.qs",
preset = "custom",
algorithm = "zstd_stream",
compress_level = 22,
shuffle_control = 15
)
cli::cli_alert_success("Finished scraping depth charts!")
} else {
cli::cli_alert_info("No data to save!")
}