/
realtime.R
217 lines (189 loc) · 7.77 KB
/
realtime.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Copyright 2017 Province of British Columbia
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
#' Download a tibble of realtime river data from the last 30 days from the Meteorological Service of Canada datamart
#'
#' Download realtime river data from the last 30 days from the Meteorological Service of Canada (MSC) datamart.
#' The function will prioritize downloading data collected at the highest resolution. In instances where data is
#' not available at high (hourly or higher) resolution daily averages are used. Currently, if a station does not
#' exist or is not found, no data is returned.
#'
#' @inheritParams hy_stations
#'
#' @return A tibble of water flow and level values. The date and time of the query (in UTC) is also
#' stored as an attribute.
#'
#' @format A tibble with 8 variables:
#' \describe{
#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number}
#' \item{PROV_TERR_STATE_LOC}{The province, territory or state in which the station is located}
#' \item{Date}{Observation date and time for last thirty days. Formatted as a POSIXct class in UTC for consistency.}
#' \item{Parameter}{Parameter being measured. Only possible values are Flow and Level}
#' \item{Value}{Value of the measurement. If Parameter equals Flow the units are m^3/s.
#' If Parameter equals Level the units are metres.}
#' \item{Grade}{reserved for future use}
#' \item{Symbol}{reserved for future use}
#' \item{Code}{quality assurance/quality control flag for the discharge}
#' \item{station_tz}{Station timezone based on tidyhydat::allstations$station_tz}
#' }
#'
#' @examples
#' \dontrun{
#' ## Download from multiple provinces
#' realtime_dd(station_number = c("01CD005", "08MF005"))
#'
#' ## To download all stations in Prince Edward Island:
#' pei <- realtime_dd(prov_terr_state_loc = "PE")
#'
#' ## Access the time of query
#' attributes(pei)$query_time
#' }
#'
#' @family realtime functions
#' @export
realtime_dd <- function(station_number = NULL, prov_terr_state_loc = NULL) {
if (!has_internet()) stop("No access to internet", call. = FALSE)
## If station number isn't and user wants the province
if (is.null(station_number)) {
realtime_data <- lapply(prov_terr_state_loc, all_realtime_station)
} else {
realtime_data <- lapply(station_number, single_realtime_station)
}
as.realtime(dplyr::bind_rows(realtime_data))
}
#' Download a tibble of active realtime stations
#'
#' An up to date dataframe of all stations in the Realtime Water Survey of Canada
#' hydrometric network operated by Environment and Climate Change Canada
#'
#' @inheritParams hy_stations
#'
#' @family realtime functions
#'
#' @format A tibble with 6 variables:
#' \describe{
#' \item{STATION_NUMBER}{Unique 7 digit Water Survey of Canada station number}
#' \item{STATION_NAME}{Official name for station identification}
#' \item{LATITUDE}{North-South Coordinates of the gauging station in decimal degrees}
#' \item{LONGITUDE}{East-West Coordinates of the gauging station in decimal degrees}
#' \item{PROV_TERR_STATE_LOC}{The province, territory or state in which the station is located}
#' \item{TIMEZONE}{Timezone of the station}
#' }
#'
#' @export
#'
#' @examples
#' \dontrun{
#' ## Available inputs for prov_terr_state_loc argument:
#' unique(realtime_stations()$prov_terr_state_loc)
#'
#' realtime_stations(prov_terr_state_loc = "BC")
#' realtime_stations(prov_terr_state_loc = c("QC", "PE"))
#' }
#'
realtime_stations <- function(prov_terr_state_loc = NULL) {
if (!has_internet()) stop("No access to internet", call. = FALSE)
prov <- prov_terr_state_loc
realtime_link <- "https://dd.weather.gc.ca/hydrometric/doc/hydrometric_StationList.csv"
url_check <- httr::GET(realtime_link, httr::user_agent("https://github.com/ropensci/tidyhydat"))
## Checking to make sure the link is valid
if (httr::http_error(url_check) == "TRUE") {
stop(paste0(realtime_link, " is not a valid url. Datamart may be down or the url has changed."))
}
if (is_mac()) {
# temporary patch to work around vroom 1.6.4 bug
readr::local_edition(1)
}
net_tibble <- httr::content(url_check,
type = "text/csv",
encoding = "UTF-8",
skip = 1,
col_names = c(
"STATION_NUMBER",
"STATION_NAME",
"LATITUDE",
"LONGITUDE",
"PROV_TERR_STATE_LOC",
"TIMEZONE"
),
col_types = readr::cols(
STATION_NUMBER = readr::col_character(),
STATION_NAME = readr::col_character(),
LATITUDE = readr::col_double(),
LONGITUDE = readr::col_double(),
PROV_TERR_STATE_LOC = readr::col_character(),
TIMEZONE = readr::col_character()
)
)
if (is.null(prov)) {
return(net_tibble)
}
as.realtime(dplyr::filter(net_tibble, PROV_TERR_STATE_LOC %in% prov))
}
#' Add local datetime column to realtime tibble
#'
#' Adds `local_datetime` and `tz_used` columns based on either the most common timezone in the original data or
#' a user supplied timezone. This function is meant to used in a pipe with the `realtime_dd()` function.
#'
#' @param .data Tibble created by \code{realtime_dd}
#' @param set_tz A timezone string in the format of \code{OlsonNames()}
#'
#' @details `Date` from `realtime_dd` is supplied in UTC which is the easiest format to work with across timezones. This function
#' does not change `Date` from UTC. Rather `station_tz` specifies the local timezone name and is useful in instances where
#' `realtime_add_local_datetime` adjusts local_datetime to a common timezone that is not the `station_tz`. This function is most
#' useful when all stations exist within the same timezone.
#'
#' @examples
#' \dontrun{
#'
#' realtime_dd(c("08MF005", "02LA004")) %>%
#' realtime_add_local_datetime()
#' }
#'
#' @export
realtime_add_local_datetime <- function(.data, set_tz = NULL) {
timezone_data <- dplyr::left_join(.data, tidyhydat::allstations[, c("STATION_NUMBER", "station_tz")], by = c("STATION_NUMBER"))
tz_used <- names(sort(table(timezone_data$station_tz), decreasing = TRUE)[1])
if (dplyr::n_distinct(timezone_data$station_tz) > 1) {
warning(paste0("Multiple timezones detected. All times in local_time have been adjusted to ", tz_used), call. = FALSE)
}
if (!is.null(set_tz)) {
message(paste0("Using ", set_tz, " timezones"))
tz_used <- set_tz
}
timezone_data$local_datetime <- lubridate::with_tz(timezone_data$Date, tz = tz_used)
timezone_data$tz_used <- tz_used
dplyr::select(
timezone_data, STATION_NUMBER, PROV_TERR_STATE_LOC, Date,
station_tz, local_datetime, tz_used, dplyr::everything()
)
}
#' Calculate daily means from higher resolution realtime data
#'
#' This function is meant to be used within a pipe as a means of easily moving from higher resolution
#' data to daily means.
#'
#' @param .data A data argument that is designed to take only the output of realtime_dd
#' @param na.rm a logical value indicating whether NA values should be stripped before the computation proceeds.
#'
#' @examples
#' \dontrun{
#' realtime_dd("08MF005") %>% realtime_daily_mean()
#' }
#'
#' @export
realtime_daily_mean <- function(.data, na.rm = FALSE) {
df_mean <- dplyr::mutate(.data, Date = as.Date(Date))
df_mean <- dplyr::group_by(df_mean, STATION_NUMBER, PROV_TERR_STATE_LOC, Date, Parameter)
df_mean <- dplyr::summarise(df_mean, Value = mean(Value, na.rm = na.rm))
df_mean <- dplyr::arrange(df_mean, Parameter)
dplyr::ungroup(df_mean)
}