/
form_schema_ext.R
574 lines (497 loc) · 19.3 KB
/
form_schema_ext.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
#' Show the extended schema of one form.
#'
#' `r lifecycle::badge("experimental")`
#'
#' @details ODK Central has introduced a new API endpoint in version 0.8 which
#' returns a parsed and flattened list of fields. This replaces the nested
#' form schema which is challenging to parse. This list is returned
#' by \code{\link{form_schema}}.
#'
#' However this still misses important elements, in particular \code{labels} and
#' \code{choice_lists}.
#'
#' \code{\link{form_schema_ext}} returns the same object as
#' \code{\link{form_schema}}
#' adding \code{labels} and \code{choice lists} in all languages available.
#' This is done by using the return object from \code{\link{form_xml}}.
#'
#' It has the exact function signature as \code{\link{form_schema}}.
#' In that sense, any call to \code{\link{form_schema}} can be replaced
#' by \code{\link{form_schema_ext}}
#'
#' This function, however, has been prepared with ODK Central version 0.8 or
#' higher. If you use it with an earlier version, a warning will be given.
#'
#'
#' @param flatten Whether to flatten the resulting list of lists (\code{TRUE})
#' or not (\code{FALSE}, default). Only applies to ODK Central version < 0.8.
#' @param odata Whether to sanitise the field names to match the way they will
#' be outputted for OData. While the original field names as given in the
#' XForms definition may be used as-is for CSV output, OData has some
#' restrictions related to the domain-qualified identifier syntax it uses.
#' Only applies to ODK Central version < 0.8.
#' Default: \code{FALSE}.
#' @param parse Whether to parse the form schema into a tibble of form field
#' type and name. This uses \code{\link{form_schema_parse}} internally.
#' If used together with `flatten=TRUE`, \code{\link{form_schema}} will raise
#' a warning and return the unparsed, flattened form schema.
#' Only applies to ODK Central version < 0.8.
#' Default: TRUE.
#' @template param-pid
#' @template param-fid
#' @template param-url
#' @template param-auth
#' @template param-retries
#' @template param-odkcv
#' @template param-verbose
#' @return A tibble containing the form definition.
#' For ODK Central 0.8, and with default parameters
#' (\code{parse=TRUE}) for ODK Central 0.7, \code{\link{form_schema}} returns
#' a tibble with the columns:
#'
#' \itemize{
#' \item \code{name} The field name as given in the form schema.
#' \item \code{type} The field type, e.g. "string", "select1", etc.
#' \item \code{path} The XForms path of the field,
#' \item \code{ruodk_name} The predicted field name as generated by
#' \code{\link{odata_submission_get}}, prefixed by the path, additionally
#' cleaned with \code{\link[janitor]{make_clean_names}} to match the
#' cleaned column names from \code{\link{odata_submission_rectangle}}.
#' \item \code{label} The field label as given in the form schema.
#' If specific languages are available,
#' this column will return the \code{default} language or it will be empty
#' if this is not specified.
#' \item \code{label_lang} The field label in languange \emph{_lang} as
#' given in the form schema.
#' \item \code{choices} A list of lists containing at least \code{values} and,
#' if available, \code{labels} of the choices as given in the form schema.
#' If specific languages are available, this column will return the
#' \code{default} language or it will be empty if this is not specified.
#' Please notice that whenever choice filters are applied, this will return
#' the unfiltered choice list.
#' \item \code{choices_lang} A list of lists containing at least
#' \code{values} and, if available, \code{labels} of the choices in language
#' \emph{_lang} as given in the form schema.
#' Please notice that whenever choice filters are applied, this will return
#' the unfiltered choice list.
#'
#' }
# nolint start
#' @seealso \url{https://odkcentral.docs.apiary.io/#reference/forms-and-submissions/'-individual-form/getting-form-schema-fields}
#' @seealso \url{https://odkcentral.docs.apiary.io/#reference/forms-and-submissions/'-individual-form/retrieving-form-schema-json}
#' @seealso \url{https://odkcentral.docs.apiary.io/#reference/forms-and-submissions/'-individual-form/retrieving-form-xml}
# nolint end
#' @family form-management
#' @export
#' @examples
#' \dontrun{
#' # See vignette("setup") for setup and authentication options
#' # ruODK::ru_setup(svc = "....svc", un = "me@email.com", pw = "...")
#'
#' # With current ODK Central (>0.7)
#' # get extended schema:
#' fsx <- form_schema_ext()
#'
#' # print choice list in english:
#' fsx[fsx$name == "test_yn", "choices_english_(en)"][[1]]
#'
#' # view the extended schema:
#' fsx
#' }
form_schema_ext <- function(flatten = FALSE,
odata = FALSE,
parse = TRUE,
pid = get_default_pid(),
fid = get_default_fid(),
url = get_default_url(),
un = get_default_un(),
pw = get_default_pw(),
odkc_version = get_default_odkc_version(),
retries = get_retries(),
verbose = get_ru_verbose()) {
# version warning
# nocov start
if (odkc_version < 0.8) {
"Form Schema Extended works better with ODK Central 0.8 and above" %>%
ru_msg_warn()
}
# nocov end
# gets basic schema
frm_schema <- form_schema(
flatten = flatten,
odata = odata,
parse = parse,
url = url,
pid = pid,
fid = fid,
un = un,
pw = pw,
odkc_version = odkc_version,
retries = retries,
verbose = verbose
)
# get xml representation
frm_xml <- form_xml(
parse = FALSE,
url = url,
pid = pid,
fid = fid,
un = un,
pw = pw,
retries = retries
) %>%
xml2::xml_ns_strip()
### parse translations
all_translations <- xml2::xml_find_all(frm_xml, "//text")
all_translations_ids <- xml2::xml_attr(all_translations, "id")
# initialize dataframe
extension <- data.frame(
path = character(0),
label = character(0),
stringsAsFactors = FALSE
)
### PART 1: parse labels
raw_labels <- xml2::xml_find_all(frm_xml, "//label")
# iterate through labels
for (i in seq_along(raw_labels)) {
## read label
this_rawlabel <- raw_labels[i]
## path
# get ref from parent, without leading "/data"
this_path <- sub(
"/data", "",
xml2::xml_attr(xml2::xml_parent(this_rawlabel), "ref")
)
# ensure this is a valid path
if (!is.na(this_path)) {
# add new empty row:
extension[nrow(extension) + 1, ] <- rep(NA, ncol(extension))
# add path
extension[nrow(extension), "path"] <- this_path
# first check if label is mapped with a translation function
has_translation <- xml2::xml_has_attr(this_rawlabel, "ref")
if (has_translation) {
# find all translations related to this path:
id <- sub(
"')",
"",
sub("jr:itext\\('", "", xml2::xml_attr(this_rawlabel, "ref"))
)
translations <- all_translations[
all_translations_ids == id
]
# iterate through translations
for (j in seq_along(translations)) {
this_translation <- translations[j]
# First check this is a regular text labels.
#
# Questions in ODK can have video, image and audio "labels",
# which will be skipped. This is identified by the presence of
# the 'form' attribute:
is_regular_label <- !xml2::xml_has_attr(
xml2::xml_find_first(this_translation, "./value"), "form"
)
if (is_regular_label) {
# read the parent node to identify language:
translation_parent <- xml2::xml_parent(this_translation)
this_lang <- gsub(" ", "_", tolower(xml2::xml_attr(
translation_parent, "lang"
)))
# decide if 'default' language or specific language
if (this_lang == "default") {
# if 'default' language, save under column 'label':
extension[nrow(extension), "label"] <- xml2::xml_text(
xml2::xml_find_first(this_translation, "./value")
)
} else {
# check if language already exists in the datafram
if (!(paste0("label_", this_lang) %in% colnames(extension))) {
# if not, create new column
extension <- cbind(
extension,
data.frame(
new_lang = rep(NA, nrow(extension))
)
)
colnames(extension)[ncol(extension)] <- paste0(
"label_", this_lang
)
}
# add the first value content of the translation
extension[
nrow(extension),
paste0("label_", this_lang)
] <- xml2::xml_text(
xml2::xml_find_first(this_translation, "./value")
)
}
}
}
} else {
# extract content
extension[nrow(extension), "label"] <- xml2::xml_text(this_rawlabel)
}
### PART 1.1: parse choice labels
## check existence of choice list:
choice_items <- xml2::xml_find_all(
xml2::xml_parent(this_rawlabel), "./item"
)
if (length(choice_items) > 0) {
# check if 'choices' column already exist
if (!("choices" %in% colnames(extension))) {
# if not, create new column
extension <- cbind(extension, data.frame(
choices = rep(NA, nrow(extension))
))
}
# initialize lists
choice_values <- list()
choice_labels <- list()
# iterate through choice list:
for (jj in seq_along(choice_items)) {
## read choice item
this_choiceitem <- choice_items[jj]
# value
this_choicevalue <- xml2::xml_text(
xml2::xml_find_first(this_choiceitem, "./value")
)
choice_values[jj] <- this_choicevalue
# raw label
this_rawchoicelabel <- xml2::xml_find_first(
this_choiceitem, "./label"
)
# first check if choice label is mapped with a translation function
has_translation_choice <- xml2::xml_has_attr(
this_rawchoicelabel, "ref"
)
if (has_translation_choice) {
id_choice <- sub(
"')",
"",
sub(
"jr:itext\\('",
"",
xml2::xml_attr(this_rawchoicelabel, "ref")
)
)
choice_translations <- all_translations[
all_translations_ids == id_choice
]
# iterate through choice translations
for (kk in seq_along(choice_translations)) {
# read translation
this_choicetranslation <- choice_translations[kk]
# first check this is a regular text labels.
# Questions in ODK can have video, image and audio "labels",
# which will be skipped.
# This is identified by the presence of the 'form' attribute:
is_regular_choicelabel <- !xml2::xml_has_attr(
xml2::xml_find_first(this_choicetranslation, "./value"), "form"
)
if (is_regular_choicelabel) {
# read the parent node to identify language:
choice_translation_parent <- xml2::xml_parent(
this_choicetranslation
)
this_choicelang <- gsub(" ", "_", tolower(xml2::xml_attr(
choice_translation_parent, "lang"
)))
# decide if 'default' language or specific language
if (this_choicelang == "default") {
# if 'default' language, save under 'choice':
choice_labels[["base"]][jj] <- xml2::xml_text(
xml2::xml_find_first(this_choicetranslation, "./value")
)
} else {
# check if language already exists in the dataframe
if (!(paste0("choices_", this_choicelang) %in%
colnames(extension))) {
# if not, create new column
extension <- cbind(extension, data.frame(
new_choicelang = rep(NA, nrow(extension))
))
colnames(extension)[ncol(extension)] <- paste0(
"choices_", this_choicelang
)
}
# add the first value content of the translation
choice_labels[[paste0(
"choices_",
this_choicelang
)]][jj] <- xml2::xml_text(
xml2::xml_find_first(
this_choicetranslation, "./value"
)
)
}
}
}
} else {
choice_labels[["base"]][jj] <- xml2::xml_text(this_rawchoicelabel)
}
}
# add to the extended table
for (this_choicelang in names(choice_labels)) {
these_choicelabels <- choice_labels[[this_choicelang]]
if (this_choicelang == "base") {
this_choicelang_colname <- "choices"
} else {
this_choicelang_colname <- this_choicelang
}
extension[nrow(extension), this_choicelang_colname] <- list(list(list(
values = unlist(choice_values),
labels = unlist(these_choicelabels)
)))
}
}
### PART 1.2: parse complex choice labels,
# i.e. in the presence of choice filters
## check existence of choice itemset:
choice_itemset <- xml2::xml_find_all(
xml2::xml_parent(this_rawlabel), "./itemset"
)
if (length(choice_itemset) > 0) {
# identify value node
choicevalue_node <- xml2::xml_attr(
xml2::xml_find_first(choice_itemset, "./value"),
"ref"
)
# identify label node
choicelabel_node <- xml2::xml_attr(
xml2::xml_find_first(choice_itemset, "./label"),
"ref"
)
# check if labels have translations
has_translation_choice <- grepl("jr:itext", choicelabel_node)
if (has_translation_choice) {
# update choicelabel_node
choicelabel_node <- sub(
")",
"",
sub(
"jr:itext\\(",
"",
choicelabel_node
)
)
}
# extract content from the itemset:
choice_nodeset <- xml2::xml_attr(choice_itemset, "nodeset")
choice_nodeset_id <- substr(
choice_nodeset,
regexpr("\\(", choice_nodeset)[1] + 1,
regexpr(")", choice_nodeset)[1] - 1
)
choice_itemset_content <- xml2::xml_find_all(
frm_xml,
paste0(
".//instance[@id=",
choice_nodeset_id,
"]//item"
)
)
# check if 'choices' column already exist
if (!("choices" %in% colnames(extension))) {
# if not, create new column
extension <- cbind(extension, data.frame(
choices = rep(NA, nrow(extension))
))
}
# initialize lists
choice_values <- list()
choice_labels <- list()
# iterate through choice list:
for (jj in seq_along(choice_itemset_content)) {
## read choice item
this_choiceitem <- choice_itemset_content[jj]
# value
this_choicevalue <- xml2::xml_text(
xml2::xml_find_first(this_choiceitem, paste0("./", choicevalue_node))
)
choice_values[jj] <- this_choicevalue
if (has_translation_choice) {
id_choice <- xml2::xml_text(
xml2::xml_find_first(this_choiceitem, paste0("./", choicelabel_node))
)
choice_translations <- all_translations[
all_translations_ids == id_choice
]
# iterate through choice translations
for (kk in seq_along(choice_translations)) {
# read translation
this_choicetranslation <- choice_translations[kk]
# first check this is a regular text labels.
# Questions in ODK can have video, image and audio "labels",
# which will be skipped.
# This is identified by the presence of the 'form' attribute:
is_regular_choicelabel <- !xml2::xml_has_attr(
xml2::xml_find_first(this_choicetranslation, "./value"), "form"
)
if (is_regular_choicelabel) {
# read the parent node to identify language:
choice_translation_parent <- xml2::xml_parent(
this_choicetranslation
)
this_choicelang <- gsub(" ", "_", tolower(xml2::xml_attr(
choice_translation_parent, "lang"
)))
# decide if 'default' language or specific language
if (this_choicelang == "default") {
# if 'default' language, save under 'choice':
choice_labels[["base"]][jj] <- xml2::xml_text(
xml2::xml_find_first(this_choicetranslation, "./value")
)
} else {
# check if language already exists in the dataframe
if (!(paste0("choices_", this_choicelang) %in%
colnames(extension))) {
# if not, create new column
extension <- cbind(extension, data.frame(
new_choicelang = rep(NA, nrow(extension))
))
colnames(extension)[ncol(extension)] <- paste0(
"choices_", this_choicelang
)
}
# add the first value content of the translation
choice_labels[[paste0(
"choices_",
this_choicelang
)]][jj] <- xml2::xml_text(
xml2::xml_find_first(
this_choicetranslation, "./value"
)
)
}
}
}
} else {
choice_labels[["base"]][jj] <- xml2::xml_text(
xml2::xml_find_first(
this_choiceitem,
paste0("./", choicelabel_node)
)
)
}
}
# add to the extended table
for (this_choicelang in names(choice_labels)) {
these_choicelabels <- choice_labels[[this_choicelang]]
if (this_choicelang == "base") {
this_choicelang_colname <- "choices"
} else {
this_choicelang_colname <- this_choicelang
}
extension[nrow(extension), this_choicelang_colname] <- list(list(list(
values = unlist(choice_values),
labels = unlist(these_choicelabels)
)))
}
}
}
}
# join
fs_ext <- frm_schema %>% dplyr::left_join(extension, by = "path")
return(fs_ext)
}
# usethis::use_test("form_schema_ext") # nolint