Skip to content

Commit

Permalink
ENH: area2catena()/prof_class(): added possibility to export/import c…
Browse files Browse the repository at this point in the history
…ompressed RData files as catena_out/catena_file to speed up writing/reading operations
  • Loading branch information
tpilz committed Jul 20, 2018
1 parent 3fc4c67 commit 24e281c
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 20 deletions.
25 changes: 18 additions & 7 deletions R/area2catena.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@
#' @param dir_out Character string specifying output directory (will be created;
#' nothing will be overwritten).
#' @param catena_out Output: Name of output file containing mean catena information
#' as input for \code{\link[lumpR]{prof_class}}.
#' as input for \code{\link[lumpR]{prof_class}}. To save computation time and
#' memory in cases of large catchments and/or high resolution, specify a file
#' name with ending \code{*.RData}. In such a case, a compressed RData file will be
#' written instead of a text file.
#' @param catena_head_out Output: Name of output header file containing meta-information
#' as input for \code{\link[lumpR]{prof_class}}; manual adjustment necessary.
#' @param ridge_thresh Integer specifying threshold of flow accumulation, below
Expand Down Expand Up @@ -74,7 +77,13 @@
#' \bold{IMPORTANT:} Herein, when specifying the GRASS input maps, please do
#' explicitly refer to the mapset if it is different from the mapset given in
#' initGRASS() (even PERMANENT!), as otherwise internally used readRAST() command
#' resulted in errors under Windows.
#' resulted in errors under Windows.
#'
#' In case of \bold{long computation times or memory issues}, try \code{plot_catena = FALSE}
#' and specify an RData file as \code{catena_out}. Furthermore, make sure your
#' raster maps are not overly large (i.e. containing lots of NULL values around
#' the region of interest) and are of type \code{CELL} (i.e. integer) where it makes
#' sense (check with \code{r.info} in GRASS).
#'
#' GUIs such as RStudio may not produce some runtime messages (within parallel
#' foreach loop).
Expand Down Expand Up @@ -395,16 +404,18 @@ area2catena <- function(



# write output
# write output #---------------------------------------------------------------
#out_pre <- mapply(logdata[,c(3:length(logdata))], FUN=function(x) formatC(x, format="f", digits=3))
#out_fmt <- cbind(logdata[,c(1,2)], out_pre)
#format output to reasonable number of digits
logdata <- round(logdata,3)

write.table(logdata, paste(dir_out,catena_out, sep="/"), col.names=F, row.names=F, quote=F, sep="\t")

if(grepl(".RData$", catena_out)) {
save(logdata, file=paste(dir_out,catena_out, sep="/"))
} else {
write.table(logdata, paste(dir_out,catena_out, sep="/"), col.names=F, row.names=F, quote=F, sep="\t")
}

# write output #---------------------------------------------------------------
# write header file
write("#This file works as a header to the output of area2catena. Don't add additional headerlines.",
file=paste(dir_out,catena_head_out, sep="/"), append=F)
Expand Down Expand Up @@ -435,7 +446,7 @@ area2catena <- function(
#Generate output files for reclassification (input class-IDs vs. internally used IDs)
#(area2catena creates continuous class numbering; restoring the orginal classes will require these files)

if (grass_files | "svc" %in% supp_qual) {
if (grass_files | any(grepl("svc", supp_qual))) {
for (i in supp_qual) {
write(c("new_id", "original_id"),
file=paste(dir_out, "/reclass_", i, ".txt", sep=""), ncolumns=2, append=F, sep="\t")
Expand Down
38 changes: 28 additions & 10 deletions R/prof_class.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@
#' Default: \code{FALSE}.
#' @param plot_silhouette \code{logical}. Shall a silhouette plot (illustrating the clustering
#' process) be generated? Consumes much memory and processing time and should be disabled,
#' if a memory error is thrown. Default: \code{TRUE}.
#' if a memory error is thrown. Will be \code{FALSE} if \code{make_plots = FALSE}.
#' Default: \code{TRUE}.
#'
#' @return Function returns nothing. Output files are written into output directory
#' as specified in arguments.
Expand All @@ -85,6 +86,9 @@
#' as each landscape unit refers to the representative EHA. The gaps can be filled
#' with GRASS function \code{r.grow}.
#'
#' In case of \bold{long computation times or memory issues}, try \code{make_plots = FALSE}
#' and specify an RData file as \code{catena_file} (already in \code{\link[lumpR]{area2catena}}).
#'
#' @details This function first resamples the catenas derived from \code{\link[lumpR]{area2catena}}
#' to a common length (\code{com_length} or the median number of support points
#' of all catenas but not more than \code{max_com_length}). Second, k-means clustering
Expand Down Expand Up @@ -254,9 +258,13 @@ prof_class <- function(
# com_length <- -1

# load standard catena data
stats <- scan(catena_file, nlines = 1, what=numeric(), sep = "\t", quiet = TRUE) #read first line only
stats <- read.table(file = catena_file, colClasses = c("numeric", rep("NULL", length(stats)-1)), sep = "\t") #read first column only

if(grepl(".RData$", catena_file)) {
load(catena_file)
stats <- logdata[,1,drop=F]
} else {
stats <- scan(catena_file, nlines = 1, what=numeric(), sep = "\t", quiet = TRUE) #read first line only
stats <- read.table(file = catena_file, colClasses = c("numeric", rep("NULL", length(stats)-1)), sep = "\t") #read first column only
}

profpoints <- table(stats[,1]) #count number of points of each catena
p_id_unique = unique(stats[,1]) #get unique IDs
Expand Down Expand Up @@ -310,7 +318,8 @@ prof_class <- function(
main="Original catenas", xlab="horizontal length [m]", ylab="elevation [m]")
}
#read and resample profiles (done at the same time to avoid duplicates in memory)
testcon <- file(catena_file,open="r")
# TODO: This mess needs to be improved!
if(!grepl(".RData$", catena_file)) testcon <- file(catena_file,open="r")
if(!silent) #for printing progress indicator
pb <- txtProgressBar(min = 0, max = length(p_id_unique), style = 3)

Expand All @@ -326,10 +335,14 @@ prof_class <- function(
p_pos = which(names(profpoints)==cur_p_id) #position of current profile in list

skiplines = sum(profpoints[(total_read+1) : p_pos]) - profpoints[p_pos] #compute number of lines to skip to reach the next selected profile
tt = readLines(con = testcon, n = skiplines)

tt = scan(file=testcon, what=numeric(), nlines = profpoints[p_pos], quiet = TRUE) #read single profile
tt = matrix(tt, nrow = c(profpoints[p_pos]), byrow = TRUE) #reshape matrix
if(grepl(".RData$", catena_file)) {
tt <- as.matrix(logdata[which(logdata[,1] == cur_p_id),])
} else {
tt = readLines(con = testcon, n = skiplines)
tt = scan(file=testcon, what=numeric(), nlines = profpoints[p_pos], quiet = TRUE) #read single profile
tt = matrix(tt, nrow = c(profpoints[p_pos]), byrow = TRUE) #reshape matrix
}

total_read = p_pos #we are now just at the desired profile

if (any(tt[,1]!= cur_p_id)) stop(paste0("Format error in ",catena_file))
Expand Down Expand Up @@ -385,9 +398,14 @@ prof_class <- function(
if(!silent) # close progress bar
close(pb)

close(testcon)
if(grepl(".RData$", catena_file)) {
rm(logdata)
} else {
close(testcon)
}
# remove not needed objects to save memory
rm(list = c("p_supp", "p_resampled", "tt"))
gc(verbose = F);gc(verbose = F)

#save(file="debug.RData", list = ls(all.names = TRUE)) #for debugging only

Expand Down
13 changes: 11 additions & 2 deletions man/area2catena.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/prof_class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 24e281c

Please sign in to comment.