/
data_import_functions.R
603 lines (547 loc) 路 23.4 KB
/
data_import_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
## Part of the pathviewr package
## Last updated: 2020-09-17 VBB
############################### read_motive_csv ################################
#' Import data from a CSV exported from Optitrack's Motive software
#'
#' \code{read_motive_csv()} is designed to import data from a CSV that has been
#' exported from Optitrack's Motive software. The resultant object is a tibble
#' that additionally has important metadata stored as attributes (see Details).
#'
#' @param file_name A file (or path to file) in CSV format
#' @param file_id (Optional) identifier for this file. If not supplied, this
#' defaults to \code{basename(file_name)}.
#' @param simplify_marker_naming If Markers are encountered, should they be
#' renamed from "Subject:marker" to "marker"? Defaults to TRUE
#' @param ... Additional arguments passed from other \code{pathviewr} functions
#'
#' @details Uses \code{data.table::fread()} to import data from a CSV file and
#' ultimately store it in a tibble. This object is also labeled with the
#' attribute \code{pathviewr_steps} with value \code{viewr} to indicate that it
#' has been imported by \code{pathviewr} and should be friendly towards use with
#' other functions in our package. Additionally, the following metadata are
#' stored in the tibble's attributes: header information from the Motive CSV
#' file (\code{header}), original IDs for each object (\code{Motive_IDs}), the
#' name of each subject in each data column (\code{subject_names_full}) and
#' unique values of subject names (\code{subject_names_simple}), the type of
#' data (rigid body or marker) that appears in each column
#' (\code{data_types_full}) and overall (\code{data_types_simple}), and original
#' data column names in the CSV (\code{d1, d2}). See Example below for example
#' code to inspect attributes.
#'
#' @section Warning:
#' This function was written to read CSVs exported using Motive's Format Version
#' 1.23 and is not guaranteed to work with those from other versions. Please
#' file an Issue on our Github page if you encounter any problems.
#'
#' @return A tibble with numerical data in columns. The first two columns will
#' have frame numbers and time (assumed to be in secs), respectively. Columns 3
#' and beyond will contain the numerical data on the position or rotation of
#' rigid bodies and/or markers that appear in the Motive CSV file. Each row
#' corresponds to the position or rotation of all objects at a given time
#' (frame).
#'
#' @export
#'
#' @author Vikram B. Baliga
#'
#' @family data import functions
#' @seealso \code{\link{read_flydra_mat}} for importing Flydra data
#'
#' @examples
#' library(pathviewr)
#'
#' ## Import the example Motive data included in the package
#' motive_data <-
#' read_motive_csv(system.file("extdata", "pathviewr_motive_example_data.csv",
#' package = 'pathviewr'))
#'
#' ## Names of variables in the resulting tibble
#' names(motive_data)
#'
#' ## A variety of metadata are stored as attributes. Of particular interest:
#' attr(motive_data, "pathviewr_steps")
#' attr(motive_data, "file_id")
#' attr(motive_data, "header")
#' attr(motive_data, "Motive_IDs")
#' attr(motive_data, "subject_names_full")
#' attr(motive_data, "subject_names_simple")
#' attr(motive_data, "motive_data_names")
#' attr(motive_data, "motive_data_types_full")
#' attr(motive_data, "motive_data_types_simple")
#'
#' ## Of course, all attributes can be viewed as a (long) list via:
#' attributes(motive_data)
#'
read_motive_csv <-
function(file_name,
file_id = NA,
simplify_marker_naming = TRUE,
...) {
## Import checks
if (missing(file_name))
stop("A file_name is required")
## Open connection to file for reading in text mode
file_con <- file(file_name, "r")
## Match file_id to file_name if no file_id is supplied
if (is.na(file_id)) file_id <- basename(file_name)
## Get maketime of file (may not be accurate...use with caution!)
mtime <- file.info(file_name)$mtime
## Setup for reading in file
header <- c()
names_line <- c()
data_names_part_one <- c()
data_names_part_two <- c()
data_names <- c()
## Read in header; usually just line 1 but grepl() makes this flexbile
while (!grepl(",Type", (l <- readLines(file_con, 1)))) {
header <- c(header, l)
}
## Split the character vector and make a data.frame of it
header <-
## Chopping out the last entry of header via `length(header)-1` because
## the final line is (usually) empty. Revise here if we see errors!!!
strsplit(header[[1]], ",")[[length(header) - 1]] %>%
data.frame(stringsAsFactors = FALSE)
## Convert the 1-col data frame into a 2-col data frame:
odds <- base::seq_along(header$.) %% 2 == 1
metadata <- header$.[odds] # odd-numbered rows are metadata
value <-
header$.[!odds] # even-numbered rows are corresponding values
header <- data.frame(metadata, value,
stringsAsFactors = FALSE)
## Warn about Motive version
if (!value[1] == "1.23") {
warning(
"This function was written to read CSVs exported using Motive's
Format Version 1.23 and is not guaranteed to work with those from other
versions. Please file an Issue on our Github page if you encounter any
problems.",
call. = FALSE)
}
## The next line (already read and stored into `l`) should contain info
## on the type of data in each column (hopefully either "Rigid Body" or
## "Marker")
type_line <- l
type_vec <- strsplit(type_line[1], ",")[[1]]
types <- type_vec[-c(1, 2)]
#### NOTE!!
## Add a check here for Types. They must be either "Rigid Body" or
## "Marker". If another type is found, the function needs to stop
## and then tell the user that a non-Rigid Body non-Marker type
## was encountered and our software is not designed to handle these
## cases.
## Rigid body & marker names (line 4)
while (!grepl(",ID", (l <- readLines(file_con, 1)))) {
names_line <- c(names_line, l)
}
## Save the line itself for use later
names_vec <- strsplit(names_line[1], ",")[[1]]
## Make a list of the unique rigid bodies and markers
names <- base::unique(names_vec[-c(1, 2)])
## The ID line (typically line 5) is stored in `l` currently
id_line <- l
id_vec <- strsplit(id_line[1], ",")[[1]]
ids <- id_vec[-c(1, 2)]
## Create a data.frame that lays out object IDs, types, and names
ids_types_names <- data.frame(id_vec,
type_vec,
names_vec,
simplify = NA, # container to be filled
subjects = NA, # container to be filled
stringsAsFactors = FALSE
)
## Now add the actual markers' names to the "simplify" column of
## ids_types_names. I'm sure this can be re-written to avoid a
## for() loop, but I'm keeping it like this for now because it
## makes sense to me.
for (i in seq_len(nrow(ids_types_names))) {
## If i-th entry of $type_vec is "Marker"
base::ifelse(
ids_types_names$type_vec[i] == "Marker",
## Then place the marker name in $simplify
ids_types_names$simplify[i] <-
stringr::str_split(ids_types_names$names_vec[i], ":")[[1]][2],
# 2nd half
## Otherwise, copy the contents of $names_vec
ids_types_names$simplify[i] <-
ids_types_names$names_vec[i]
)
}
## Similarly, use the subject ID in subjects column
for (i in seq_len(nrow(ids_types_names))) {
## If i-th entry of $type_vec is not a "Rigid Body
base::ifelse(
ids_types_names$type_vec[i] != "Rigid Body",
## Then place the marker name in $subjects
ids_types_names$subjects[i] <-
stringr::str_split(ids_types_names$names_vec[i], ":")[[1]][1],
# 1st half
## Otherwise, copy the contents of $names_vec
ids_types_names$subjects[i] <-
ids_types_names$names_vec[i]
)
}
## Data names line 6
l <- readLines(file_con, 1)
data_names_part_one <- strsplit(l, ",")[[1]]
## Data names line 7
l <- readLines(file_con, 1)
data_names_part_two <- strsplit(l, ",")[[1]]
## Make variables same length
## Often arises when final column is "Marker Quality" in line 6 with
## nothing in line 7
if (length(data_names_part_two) == length(data_names_part_one) - 1) {
data_names_part_two <-
c(data_names_part_two, "") # add blank character
} else {
data_names_part_two <- data_names_part_two # keep the same
}
## Handle choice of simplify_marker_naming = TRUE or FALSE
if (simplify_marker_naming == TRUE) {
marker_id_line <- ids_types_names$simplify
} else {
marker_id_line <- names_vec
}
## Make data frame to help with data names
data_names_frame <- data.frame(v1 = marker_id_line,
v2 = data_names_part_one,
v3 = data_names_part_two,
stringsAsFactors = FALSE)
## v2 (Position) seems useless, but I'm keeping it in for now and ignoring
## it in the next step
## Stitch it together
for (i in seq_len(nrow(data_names_frame))) {
data_names[[i]] <- paste0(data_names_frame[i, 1],
"_",
data_names_frame[i, 2],
"_",
data_names_frame[i, 3])
}
## Some hacky cleanup
## Not ideal but it works
data_names <- sub("__Frame", "frame", data_names,
ignore.case = FALSE)
data_names <- sub(" ", "_", data_names,
ignore.case = FALSE)
data_names <- sub("Name__Time_\\(Seconds\\)", "time_sec", data_names,
ignore.case = FALSE)
## We'd like to snake_case-ify names of variables, but I currently am
## averse to altering the names of subjects, too. So I will opt to
## be inconsistent here re:converting to lower case for the moment.
## Should we opt to extend snake_case-ing to subjects, I expect
## most of what follows can be written with code that's more efficient
data_names <- sub("Mean Marker Error_", "mean_marker_error", data_names,
ignore.case = FALSE)
data_names <- sub("Mean_Marker Error_", "mean_marker_error", data_names,
ignore.case = FALSE)
data_names <- sub("Position_X", "position_x", data_names,
ignore.case = FALSE)
data_names <- sub("Position_Y", "position_y", data_names,
ignore.case = FALSE)
data_names <- sub("Position_Z", "position_z", data_names,
ignore.case = FALSE)
data_names <- sub("Rotation_X", "rotation_x", data_names,
ignore.case = FALSE)
data_names <- sub("Rotation_Y", "rotation_y", data_names,
ignore.case = FALSE)
data_names <- sub("Rotation_Z", "rotation_z", data_names,
ignore.case = FALSE)
data_names <- sub("Rotation_W", "rotation_w", data_names,
ignore.case = FALSE)
## Read in data
dataz <- data.table::fread(
text = readLines(file_con),
header = FALSE,
sep = ",",
dec = ".",
stringsAsFactors = FALSE
)
## Quickly check for non-numerics and warn if found
if (any(!apply(dataz, 2, is.numeric))) {
warning("The csv file includes non-numeric data.
\nPlease ensure that this is intentional before proceeding.")
}
## Rename columns
colnames(dataz) <- data_names
## Be neat and close up the file connection
close(file_con)
## Make the object (a tibble)
data <- tibble::as_tibble(dataz)
## Add metadata as attributes()
attr(data, "pathviewr_steps") <- "viewr"
attr(data, "file_id") <- file_id
attr(data, "file_mtime") <- mtime
attr(data, "frame_rate") <- header$value[5] %>% as.numeric()
attr(data, "header") <- header
attr(data, "Motive_IDs") <- ids
attr(data, "subject_names_full") <- names_vec[-c(1, 2)]
attr(data, "subject_names_simple") <- names
attr(data, "data_names") <- data_names
attr(data, "data_types_full") <- types
attr(data, "data_types_simple") <- base::unique(types)
attr(data, "d1") <- data_names_part_one
attr(data, "d2") <- data_names_part_two
attr(data, "import_method") <- "motive"
## Export
return(data)
}
################################# read_flydra_mat #############################
## Time is now encoded as a function of frame_rate and the specific labeling
## of frames within the imported flydra object
#' Import data from a MAT file exported from Flydra software
#'
#' \code{read_flydra_mat()} is designed to import data from a \code{.mat} file
#' that has been exported from Flydra software. The resultant object is a tibble
#' that additionally has important metadata stored as attributes (see Details).
#'
#' @param mat_file A file (or path to file) in .mat format, exported from Flydra
#' @param file_id (Optional) identifier for this file. If not supplied, this
#' defaults to \code{basename(file_name)}.
#' @param subject_name Name that will be assigned to the subject
#' @param frame_rate The capture frame rate of the session
#' @param ... Additional arguments that may be passed from other pathviewr
#' functions
#'
#' @return A tibble with numerical data in columns. The first two columns will
#' have frame numbers and time (assumed to be in secs), respectively. Columns
#' 3 through 5 will contain position data. Note that unlike the behavior of
#' \code{read_motive_csv()} this function produces "tidy" data that have
#' already been gathered into key-value pairs based on subject.
#'
#' @author Vikram B. Baliga
#'
#' @family data import functions
#'
#' @seealso \code{\link{read_motive_csv}} for importing Motive data
#'
#' @export
#' @examples
#' library(pathviewr)
#'
#' ## Import the example Flydra data included in the package
#' flydra_data <-
#' read_flydra_mat(system.file("extdata", "pathviewr_flydra_example_data.mat",
#' package = 'pathviewr'),
#' subject_name = "birdie_wooster")
#'
#' ## Names of variables in the resulting tibble
#' names(flydra_data)
#'
#' ## A variety of metadata are stored as attributes. Of particular interest:
#' attr(flydra_data, "pathviewr_steps")
read_flydra_mat <-
function(mat_file,
file_id = NA,
subject_name,
frame_rate = 100, ## in Hz
...) {
## Import checks
if (missing(mat_file))
stop("A mat_file is required")
## For now, we will assume that only one subject (one individual
## hummingbird) is present in the data. Since these subject names were not
## stored in the flydra data or accompanying H5 files (as far as I can see)
## this will need to be supplied by the user
if (missing(subject_name))
stop("A subject_name is required")
## Match file_id to mat_file if no file_id is supplied
if (is.na(file_id)) file_id <-
basename(mat_file)
## Get maketime of file (may not be accurate...use with caution!)
mtime <-
file.info(mat_file)$mtime
## Read the MAT file via R.matlab::readMat()
mat_read <-
R.matlab::readMat(mat_file)
## The data we'd like to tibble-ize is spread across various components
## of the list. We need to put it together manually.
## First extract the kalmanized componenets
kalm_tib <-
tibble::tibble(
kalman.frame = mat_read$kalman.frame,
kalman.x = mat_read$kalman.x,
kalman.y = mat_read$kalman.y,
kalman.z = mat_read$kalman.z,
kalman.xvel = mat_read$kalman.xvel,
kalman.yvel = mat_read$kalman.yvel,
kalman.zvel = mat_read$kalman.zvel,
) %>%
## For some reason, frames may be out of order
## So sort this tibble by kalman.frame
dplyr::arrange(by = kalman.frame)
## There may be duplication of frames or rows entirely.
## These duplications should be removed via distinct()
kalm_distinct <-
dplyr::distinct(kalm_tib, NAME = kalman.frame, .keep_all = TRUE)
## Now extract all the frames
framez <- kalm_distinct$kalman.frame
frame_tib <- tibble::tibble(frame = framez)
## Because frames may have been dropped, we will first generate a
## full sequence that begins and ends on the min and max frames.
## Then we'll generate time stamps that correspond to those frames.
## Finally, we'll drop time stamps for any frames that were dropped
## in the exported matlab file.
frame_first <- min(framez)
frame_last <- max(framez)
frame_seq <- seq(from = frame_first, to = frame_last, by = 1)
time_interval <- 1 / frame_rate
## We'll set time to start at 0 and then increase by time_interval until
## it hits the same vector length as length(frame_seq)
time_seq <- seq(from = 0,
by = time_interval,
length.out = length(frame_seq))
## Now combine
frame_time_seqs <- tibble::tibble(frame = frame_seq,
time = time_seq)
## Need to join it back to frame_tib in case frame ordering shifts
## or is duplicated within the data
joined_frame_time_seq <-
dplyr::left_join(frame_tib, frame_time_seqs,
by = "frame")
## Now put the data together
data <-
tibble::tibble(
## Using kalman frame instead of observed frame
frame = as.numeric(frame_tib$frame),
time_sec = as.numeric(joined_frame_time_seq$time),
subject = subject_name,
position_length = as.numeric(kalm_distinct$kalman.x),
position_width = as.numeric(kalm_distinct$kalman.y),
position_height = as.numeric(kalm_distinct$kalman.z),
velocity = as.numeric(
sqrt((kalm_distinct$kalman.xvel ^ 2) +
(kalm_distinct$kalman.yvel ^ 2) +
(kalm_distinct$kalman.zvel ^ 2))
),
length_inst_vel = as.numeric(kalm_distinct$kalman.xvel),
width_inst_vel = as.numeric(kalm_distinct$kalman.yvel),
height_inst_vel = as.numeric(kalm_distinct$kalman.zvel)
)
## Add metadata as attributes()
attr(data, "pathviewr_steps") <-
c("viewr", "renamed_tunnel", "gathered_tunnel")
## Adding "renamed_tunnel" and "gathered" because axes are renamed as the
## tibble is being created above and we are basically already in gathered
## format.
attr(data, "file_id") <- file_id
attr(data, "file_mtime") <- mtime
attr(data, "frame_rate") <- frame_rate
## Re-enable the following line in the future if you would like to store
## the original matlab file as an attribute:
# attr(data, "flydra_mat") <- mat_read
attr(data, "header") <- attr(mat_read, "header")
attr(data, "import_method") <- "flydra"
## Export
return(data)
}
################################### as_viewr ###################################
#' Convert data from another format into a viewr object
#'
#' Should you have data from a non-Motive, non-Flydra source, this function can
#' be used to ensure your data are put into the right format to work with other
#' pathviewr functions.
#'
#' @param obj_name A tibble or data frame containing movement trajectories
#' @param frame_rate Must be a single numeric value indicating capture frame
#' rate in frames per second.
#' @param frame_col Column number of obj_name that contains frame numbers
#' @param time_col Column number of obj_name that contains time (must be in
#' seconds)
#' @param subject_col Column number of obj_name that contains subject name(s)
#' @param position_length_col Column number of obj_name that contains
#' length-axis position values
#' @param position_width_col Column number of obj_name that contains width-axis
#' position values
#' @param position_height_col Column number of obj_name that contains
#' height-axis position values
#' @param include_rotation Are rotation data included? Defaults to FALSE
#' @param rotation_real_col Column number of obj_name that contains the "real"
#' axis of quaternion rotation data
#' @param rotation_length_col Column number of obj_name that contains the length
#' axis of quaternion rotation data
#' @param rotation_width_col Column number of obj_name that contains the width
#' axis of quaternion rotation data
#' @param rotation_height_col Column number of obj_name that contains the height
#' axis of quaternion rotation data
#'
#' @return A tibble that is organized to be compliant with other
#' \code{pathviewr} functions and that contains the attributes
#' \code{pathviewr_steps} with entries set to \code{c("viewr",
#' "renamed_tunnel", "gathered_tunnel")}
#'
#' @export
#'
#' @family data import functions
#'
#' @author Vikram B. Baliga
#'
#' @examples
#'
#' ## Create a dummy data frame with simulated (nonsense) data
#' df <- data.frame(frame = seq(1, 100, by = 1),
#' time_sec = seq(0, by = 0.01, length.out = 100),
#' subject = "birdie_sanders",
#' z = rnorm(100),
#' x = rnorm(100),
#' y = rnorm(100))
#'
#' ## Use as_viewr() to convert it into a viewr object
#' test <-
#' as_viewr(
#' df,
#' frame_rate = 100,
#' frame_col = 1,
#' time_col = 2,
#' subject_col = 3,
#' position_length_col = 5,
#' position_width_col = 6,
#' position_height_col = 4
#' )
as_viewr <- function(obj_name,
frame_rate = 100,
frame_col,
time_col,
subject_col,
position_length_col,
position_width_col,
position_height_col,
include_rotation = FALSE,
rotation_real_col,
rotation_length_col,
rotation_width_col,
rotation_height_col
) {
## Check that obj_name is a tibble or data.frame
if (!tibble::is_tibble(obj_name))
if (!is.data.frame(obj_name))
stop("A tibble or data.frame must be supplied to the obj_name argument.")
## Extract each variable
data <- data.frame(
frame = obj_name[, frame_col],
time_sec = obj_name[, time_col],
subject = obj_name[, subject_col],
position_length = obj_name[, position_length_col],
position_width = obj_name[, position_width_col],
position_height = obj_name[, position_height_col]
)
colnames(data) <- c("frame", "time_sec", "subject",
"position_length", "position_width", "position_height")
data <- tibble::tibble(data)
## Optional arguments, depending on data
if (include_rotation == TRUE) {
data$rotation_real <- obj_name[, rotation_real_col]
data$rotation_length <- obj_name[, rotation_length_col]
data$rotation_width <- obj_name[, rotation_width_col]
data$rotation_height <- obj_name[, rotation_height_col]
}
## Add metadata as attributes()
attr(data, "pathviewr_steps") <-
c("viewr", "renamed_tunnel", "gathered_tunnel")
## Adding "renamed_tunnel" and "gathered" because axes are renamed as the
## tibble is being created above and we are basically already in gathered
## format.
attr(data, "file_id") <- deparse(quote(obj_name))
attr(data, "frame_rate") <- frame_rate
attr(data, "import_method") <- "as_viewr"
return(data)
}