Skip to content

Commit

Permalink
Fix .cosoilmoist_prep() RE: #241, #242, #253
Browse files Browse the repository at this point in the history
  • Loading branch information
brownag committed Jun 29, 2022
1 parent 529aa5c commit 732255b
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 42 deletions.
37 changes: 29 additions & 8 deletions R/uncode.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,37 @@ uncode <- function(df,

if (!invert) {
# replace values with ChoiceName, try filling NA with replace based on ChoiceLabel
nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]])
lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]])
nc[is.na(nc)] <- lc[is.na(nc)]
df[[i]] <- nc
} else {
# do not explicitly set `levels` if none of the values in value_col (numeric) are present
if (any(df[[i]] %in% sub[[value_col]])) {
nc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[name_col]])
lc <- factor(df[[i]], levels = sub[[value_col]], labels = sub[[label_col]])
if (all(is.na(nc))) {
df[[i]] <- lc
} else {
nc[is.na(nc)] <- lc[is.na(nc)]
df[[i]] <- nc
}
df[[i]] <- nc
} else {
nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[name_col]])
lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[label_col]])
if (all(is.na(nc))) {
df[[i]] <- lc
} else {
nc[is.na(nc)] <- lc[is.na(nc)]
df[[i]] <- nc
}
}
} else if (invert) {
# replace values with ChoiceName, try filling NA with replace based on ChoiceLabel
nc <- factor(df[[i]], levels = sub[[name_col]], labels = sub[[value_col]])
lc <- factor(df[[i]], levels = sub[[label_col]], labels = sub[[value_col]])
nc[is.na(nc)] <- lc[is.na(nc)]
df[[i]] <- nc
if (all(is.na(nc))) {
df[[i]] <- lc
} else {
nc[is.na(nc)] <- lc[is.na(nc)]
df[[i]] <- nc
}
}
}

Expand Down Expand Up @@ -219,7 +240,7 @@ get_NASIS_metadata <- function(dsn = NULL) {
#' Get NASIS metadata entries for specific domains or choices
#'
#' @param x character vector to match in NASIS metadata
#' @param what Column to match `x` against. Default "ColumnPhysicalName"; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"`
#' @param what Column to match `x` against. Default `"ColumnPhysicalName"`; alternate options include `"DomainID"`, `"DomainName"`, `"DomainRanked"`, `"DisplayLabel"`, `"ChoiceSequence"`, `"ChoiceValue"`, `"ChoiceName"`, `"ChoiceLabel"`, `"ChoiceObsolete"`, `"ChoiceDescription"`, `"ColumnLogicalName"`
#' @return a `data.frame` containing selected NASIS metadata sorted first on `DomainID` and then on `ChoiceSequence`
#' @export
#' @rdname get_NASIS_metadata
Expand Down
69 changes: 35 additions & 34 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -734,47 +734,48 @@
orig_names <- names(df)

# relabel names
names(df) <- gsub("^soimoist", "", names(df))
old_names <- "stat"
new_names <- "status"
names(df)[names(df) %in% old_names] <- new_names

# names(df) <- gsub("^soimoist", "", names(df))
# old_names <- "stat"
# new_names <- "status"
# names(df)[names(df) %in% old_names] <- new_names

# setting frequency levels and order

# NOTE: the next block of code require factor levels be set, regardless of package options
# NOTE: the SDA domains for flooding and ponding have different levels, and "Common" is obsolete
# TODO: replace with ordering derived from NASIS ChoiceSequence
flod_lev <- factor(df$flodfreqcl, levels = c("None", "Very rare", "Rare", "Occasional", "Common", "Frequent", "Very frequent"))
pond_lev <- factor(df$pondfreqcl, levels = c("None", "Rare", "Occasional", "Common", "Frequent"))

# .:. ordering implied in get_NASIS_column_metadata result from NASIS ChoiceSequence
flod_lev <- factor(df$flodfreqcl, levels = get_NASIS_column_metadata("flodfreqcl")$ChoiceLabel)
pond_lev <- factor(df$pondfreqcl, levels = get_NASIS_column_metadata("pondfreqcl")$ChoiceLabel)
mois_lev <- factor(df$soimoiststat, levels = get_NASIS_column_metadata("soimoiststat")$ChoiceLabel)

# impute NA freqcl values, default = "not populated"
if (impute == TRUE) {
if (impute) {

missing <- "Not populated"
lev_flodfreqcl <- c(missing, levels(df$flodfreqcl))
lev_pondfreqcl <- c(missing, levels(df$pondfreqcl))
lev_status <- c(missing, levels(df$status))

df <- within(df, {
# replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL
dept_r[is.na(dept_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201
depb_r[is.na(depb_r) & (!is.na(pondfreqcl) | !is.na(flodfreqcl))] = 201

# replace NULL L and H depths with the RV
dept_l = ifelse(is.na(dept_l), dept_r, dept_l)
dept_h = ifelse(is.na(dept_h), dept_r, dept_h)

depb_l = ifelse(is.na(depb_l), depb_r, depb_l)
depb_h = ifelse(is.na(depb_h), depb_r, depb_h)

# replace NULL freqcl with "Not_Populated"
status = factor(levels(status)[status], levels = lev_status)
flodfreqcl = factor(levels(flodfreqcl)[flodfreqcl], levels = lev_flodfreqcl)
pondfreqcl = factor(levels(pondfreqcl)[pondfreqcl], levels = lev_flodfreqcl)

status[is.na(status)] <- missing
flodfreqcl[is.na(flodfreqcl)] <- missing
pondfreqcl[is.na(pondfreqcl)] <- missing
})
lev_flodfreqcl <- c(missing, levels(flod_lev))
lev_pondfreqcl <- c(missing, levels(pond_lev))
lev_status <- c(missing, levels(mois_lev))

# replace NULL RV depths with 201 cm if pondfreqcl or flodqcl is not NULL
df$soimoistdept_r[is.na(df$soimoistdept_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201
df$soimoistdepb_r[is.na(df$soimoistdepb_r) & (!is.na(df$pondfreqcl) | !is.na(df$flodfreqcl))] <- 201

# replace NULL L and H depths with the RV
df$soimoistdept_l <- ifelse(is.na(df$soimoistdept_l), df$soimoistdept_r, df$soimoistdept_l)
df$soimoistdept_h <- ifelse(is.na(df$soimoistdept_h), df$soimoistdept_r, df$soimoistdept_h)
df$soimoistdepb_l <- ifelse(is.na(df$soimoistdepb_l), df$soimoistdepb_r, df$soimoistdepb_l)
df$soimoistdepb_h <- ifelse(is.na(df$soimoistdepb_h), df$soimoistdepb_r, df$soimoistdepb_h)

# relevel factors with "Not populated" as first level
df$soimoiststat <- factor(as.character(df$soimoiststat), levels = lev_status)
df$flodfreqcl <- factor(as.character(df$flodfreqcl), levels = lev_flodfreqcl)
df$pondfreqcl <- factor(as.character(df$pondfreqcl), levels = lev_flodfreqcl)

# replace NULL moist state and frequency class with "Not populated"
df$soimoiststat[is.na(df$soimoiststat)] <- missing
df$flodfreqcl[is.na(df$flodfreqcl)] <- missing
df$pondfreqcl[is.na(df$pondfreqcl)] <- missing
}

# convert factors to strings
Expand Down

0 comments on commit 732255b

Please sign in to comment.