/
swdi-transform.R
182 lines (160 loc) · 8.15 KB
/
swdi-transform.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#' Setup the column names for NOAA SWDI
#'
#' @return (array): character array with the variable names
#' @export
noaa_swdi_transform_col_names <- function(){
TFORM_NOAA_SWDI_DAILY_COLNAMES <- c("record_dt",
"geo_lon",
"geo_lat",
"tot_count")
return(TFORM_NOAA_SWDI_DAILY_COLNAMES)
}
#' Provides a helper dataframe to list all of the files extracted for NOAA SWDI
#' for a specified year of download
#'
#' @param ds_source (character) : name of the data source, default to "noaa_swdi"
#' @param dl_date (date) : Date in which the file were downloaded. This is
#' going to look for a folder in the `data/{ds_source}/` named
#' as this date
#' @param yr (integer) : The specifics NOAA SWDI year CSV we want to
#' transform
#' @param noaa_swdi_ind_tiles (integer) : A value in \code{{0, 1}}, with 1
#' indicating that tiles data is to be downloaded and extracted,
#' and 0 indicating non-tiles data is to be downloaded and extracted. Currently
#' only a value of 1 i.e. tiles is supported.
#' @param noaa_swdi_type (character) : Specifying the type of NOAA-SWDI data to
#' download and extract. Can be one of the following values:
#' \code{hail, mda, meso, nldn, plsr, structure, tvs, warn}
#'
#' @return A tibble with filenames
#' @export
get_noaa_swdi_mtda_paths <- function(ds_source = 'noaa_swdi',
dl_date,
yr,
noaa_swdi_ind_tiles,
noaa_swdi_type){
# NOTE: Only tiles transform is supported currently
assertthat::assert_that(noaa_swdi_ind_tiles == 1)
tiles_path <- dplyr::if_else(noaa_swdi_ind_tiles == 1, "tiles", "no_tiles")
# Define key directories and metadata file paths
outpath_mtda <- here::here("data", ds_source, base::format(dl_date,
"%Y%m%d"),
tiles_path,
noaa_swdi_type,
stringr::str_c(ds_source,
"metadata.csv",
sep = "_"))
outpath_mtda_all <- here::here("data", ds_source, base::format(dl_date,
"%Y%m%d"),
tiles_path,
noaa_swdi_type,
stringr::str_c(ds_source,
"metadata_all.csv",
sep = "_"))
outdir_mtda <- outpath_mtda %>% fs::path_dir(path = .)
outdir_mtda_all <- outpath_mtda_all %>% fs::path_dir(path = .)
outdir_mtda_yr_dir <- fs::path_join(parts = c(outdir_mtda, yr))
outdir_mtda_yr_files <- outdir_mtda_yr_dir %>%
# TODO: Check if we can set recurse = FALSE and only specify the
# unzipped directory once we extract to a specified path
fs::dir_ls(glob = "*", recurse = TRUE) %>%
tibble::enframe(x = ., name = NULL, value = "fpath") %>%
dplyr::mutate(fextn = fs::path_ext(fpath),
fname = fs::path_file(fpath),
# Get different shape file type indicators
# https://www.earthdatascience.org/courses/earth-analytics/spatial-data-r/shapefile-structure/
ind_shp =
backburner::ind_re_match(string = fextn,
pattern = "shp$"),
ind_prj =
backburner::ind_re_match(string = fextn,
pattern = "prj$"),
ind_shx =
backburner::ind_re_match(string = fextn,
pattern = "shx$"),
ind_dbf =
backburner::ind_re_match(string = fextn,
pattern = "dbf$"),
ind_sqlite =
backburner::ind_re_match(string = fextn,
pattern = "sqlite$"),
ind_csv =
backburner::ind_re_match(string = fextn,
pattern = "csv$"),
year = yr)
base::return(outdir_mtda_yr_files)
}
#' Transform the NOAA SWDI file
#'
#' TODO: Need to add assertions to check the dimension of the column names and
#' also the values. Somehow need to parse in the metadata from the web
#' and get the field values in an automated manner.
#'
#' @param fpath (character) : The path to the shapefile we want to transform
#' @param new_colnames (character) : New list of colnames we want to set for
#' our transformed output shapefile. If \code{NULL} the column names will get
#' converted to lower case and spaces replaced by underscores via the
#' \code{janitor} package
#'
#' @return (sf object) : transformed shapefile
#' @export
get_transform_noaa_swdi <- function(fpath, new_colnames){
# Read the NOAA SWDI file efficiently using vroom
csv_df <- vroom::vroom(file = fpath,
skip = 3, # We need to skip 3 rows for the SWDI tiles datasets
delim = ",",
col_names = FALSE,
col_types = "cddd")
csv_df <- csv_df %>%
magrittr::set_colnames(x = ., value = new_colnames) %>%
dplyr::mutate(record_dt = lubridate::ymd(record_dt))
# Convert to sf object (for geo lat/long pairs) under standard projection
csv_sf <- sf::st_as_sf(csv_df,
coords = c("geo_lon", "geo_lat"),
crs = 4326,
agr = "constant") %>%
dplyr::select(record_dt, geometry, tot_count)
base::return(csv_sf)
}
#' Wrapper for NOAA SWDI transformation pipeline.
#'
#' @param ds_source (character) : data names. Default to "noaa_swdi"
#' @param dl_date (date) : Date in which the file were downloaded. This is
#' going to look for a folder in the `data/{ds_source}/` named
#' as this date
#' @param yrs (array): numerical array with the years which needs to be transformed
#' @return (list) : A list of transformed shapefiles, in which the names is the year
#'
#' @export
noaa_swdi_transform <- function(dl_date,
yrs,
ds_source = "noaa_swdi",
noaa_swdi_ind_tiles = 1,
noaa_swdi_type){
# NOTE: Only tiles transform is supported currently
assertthat::assert_that(noaa_swdi_ind_tiles == 1)
tform_columns <- noaa_swdi_transform_col_names()
outdir_mtda_yr_files <- yrs %>%
purrr::map_df(.x = ., .f =
~get_noaa_swdi_mtda_paths(
ds_source = ds_source,
dl_date = dl_date,
yr = .x,
noaa_swdi_ind_tiles = noaa_swdi_ind_tiles,
noaa_swdi_type = noaa_swdi_type)) %>%
dplyr::arrange(year)
out_mtda_fpaths <- outdir_mtda_yr_files %>%
dplyr::filter(ind_csv == 1) %>%
dplyr::select(fpath) %>%
base::unlist(x = ., use.names = FALSE) %>%
base::as.list(x = .)
# Created a repeated list of column names
# TODO: May be a way to do this much more elegantly with `purrr::pmap`
out_mtda_colnames <- purrr::map(.x = 1:length(out_mtda_fpaths),
function(x) {tform_columns})
outdir_mtda_yr_out <- purrr::map2(.x = out_mtda_fpaths, .y = out_mtda_colnames,
.f = ~get_transform_noaa_swdi(fpath = .x,
new_colnames = .y))
names(outdir_mtda_yr_out) <- yrs
return(outdir_mtda_yr_out)
}