added several variable functions and a probs function to generate ran…

…om probabilities.
trinker · Apr 17, 2015 · 32f2be6 · 32f2be6
1 parent 984a0aa
commit 32f2be6
Show file tree

Hide file tree

Showing 56 changed files with 1,350 additions and 358 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -21,15 +21,19 @@ Collate:
     'utils.R'
     'r_sample.R'
     'age.R'
+    'r_sample_factor.R'
+    'animal.R'
     'r_sample_binary.R'
     'answer.R'
     'area.R'
     'coin.R'
+    'color.R'
     'r_sample_logical.R'
     'death.R'
     'dice.R'
     'dna.R'
     'dummy.R'
+    'education.R'
     'grade.R'
     'grade_level.R'
     'group.R'
@@ -44,17 +48,21 @@ Collate:
     'likert.R'
     'lorem_ipsum.R'
     'marital.R'
+    'month.R'
+    'wakefield-package.R'
+    'r_sample_replace.R'
+    'name.R'
     'political.R'
+    'probs.R'
     'r_data.R'
     'r_data_frame.R'
     'r_list.R'
-    'r_sample_factor.R'
     'r_sample_integer.R'
     'race.R'
     'sat.R'
+    'sentence.R'
     'sex.R'
     'string.R'
     'upper.R'
     'valid.R'
     'varname.R'
-    'wakefield-package.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,14 +2,17 @@
 
 S3method(print,variable)
 export(age)
+export(animal)
 export(answer)
 export(area)
 export(coin)
+export(color)
 export(death)
 export(dice)
 export(died)
 export(dna)
 export(dummy)
+export(education)
 export(gpa)
 export(grade)
 export(grade_letter)
@@ -31,10 +34,15 @@ export(lorem_ipsum)
 export(lower)
 export(lower_factor)
 export(marital)
+export(month)
+export(name)
 export(normal)
 export(normal_round)
 export(paragraph)
+export(pet)
 export(political)
+export(primary)
+export(probs)
 export(r_data)
 export(r_data_frame)
 export(r_list)
@@ -45,8 +53,10 @@ export(r_sample_factor)
 export(r_sample_integer)
 export(r_sample_logical)
 export(r_sample_ordered)
+export(r_sample_replace)
 export(race)
 export(sat)
+export(sentence)
 export(sex)
 export(string)
 export(upper)

diff --git a/R/animal.R b/R/animal.R
@@ -0,0 +1,53 @@
+#' Generate Random Vector of animals
+#'
+#' \code{animal} - Generate a random vector of animals.
+#'
+#' @inheritParams color
+#' @return Returns a random factor vector of animal elements.
+#' @keywords animal pet
+#' @export
+#' @rdname animal
+#' @include utils.R r_sample_factor.R
+#' @family variable functions
+#' @examples
+#' animal(10)
+#' pie(table(animal(10000)))
+#'
+#' pet(10)
+#' pie(table(pet(10000)))
+animal <- function(n, k = 10, x = wakefield::animal_list, prob = NULL, name = "Animal"){
+
+    stopifnot(k < length(x) || k > 0)
+    stopifnot(length(prob) != k)
+
+    out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k),
+        perl=TRUE), size = n, replace = TRUE, prob = prob)
+    out <- factor(out, levels = lvls)
+    varname(out, name)
+
+}
+
+#' Generate Random Vector of animals
+#'
+#' \code{pet} - Generate a random vector of pets.
+#'
+#' @details The household pets and probabilities:
+#'
+#' \tabular{ll}{
+#'   Dog   \tab 36.5 \%\cr
+#'   Cat   \tab 30.4 \%\cr
+#'   None  \tab 25.8 \%\cr
+#'   Bird  \tab 3.1 \% \cr
+#'   Horse \tab 1.5 \% \cr
+#' }
+#'
+#' @export
+#' @rdname animal
+pet <- hijack(r_sample_factor,
+    name = "Pet",
+    x = c("Dog", "Cat", "None", "Bird", "Horse"),
+    prob = c(.365, .304, .258, .031, .015)
+)
+
+
+
diff --git a/R/color.R b/R/color.R
@@ -0,0 +1,52 @@
+#' Generate Random Vector of Colors
+#'
+#' \code{color} - Generate a random vector of colors (sampled from \code{colors()}).
+#'
+#' @param n The number elements to generate.  This can be globally set within
+#' the environment of \code{r_data_frame} or \code{r_list}.
+#' @param k The number of the elements of x to sample from (uses \code{sample(x, k)}).
+#' @param x A vector of elements to chose from.
+#' @param prob A vector of probabilities to chose from.
+#' @param name The name to assign to the output vector's \code{varname}
+#' attribute.  This is used to auto assign names to the column/vector name when
+#' used inside of \code{r_data_frame} or \code{r_list}.
+#' @return Returns a random factor vector of color elements.
+#' @keywords color
+#' @export
+#' @rdname color
+#' @include utils.R r_sample_factor.R
+#' @family variable functions
+#' @examples
+#' color(10)
+#' pie(tab <- table(color(10000)), col = names(tab))
+#'
+#' primary(10)
+#' pie(tab <- table(primary(10000)), col = names(tab))
+color <- function(n, k = 10, x = colors(), prob = NULL, name = "Color"){
+
+    stopifnot(k < length(x) || k > 0)
+    stopifnot(length(prob) != k)
+
+    out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k),
+        perl=TRUE), size = n, replace = TRUE, prob = prob)
+    out <- factor(out, levels = lvls)
+    varname(out, name)
+
+}
+
+#' Generate Random Vector of Colors
+#'
+#' \code{color} - Generate a random vector of
+#' \href{http://en.wikipedia.org/wiki/Primary_color}{\emph{psycological primary}}
+#' colors (sampled from \code{colors()}).
+#'
+#' @export
+#' @rdname color
+primary <- hijack(r_sample_factor,
+    name = "Color",
+    x = c("Red", "Green", "Blue", "Yellow", "Black", "White")
+)
+
+
+
+
diff --git a/R/education.R b/R/education.R
@@ -0,0 +1,70 @@
+#' Generate Random Vector of Educational Attainment Level
+#'
+#' Generate a random vector of educational attainment level.
+#'
+#' @details The educational attainments and probabilities used match approximate
+#' U.S. educational attainment make-up (\url{http://www.census.gov}):
+#'
+#' \tabular{lr}{
+#'   \bold{ Highest Attainment}        \tab \bold{Percent} \cr
+#'   No Schooling Completed                   \tab 1.3 \% \cr
+#'   Nursery School to 8th Grade              \tab 5 \%   \cr
+#'   9th Grade to 12th Grade, No Diploma      \tab 8.5 \% \cr
+#'   Regular High School Diploma              \tab 24.6 \%\cr
+#'   GED or Alternative Credential            \tab 3.9 \% \cr
+#'   Some College, Less than 1 Year           \tab 6.4 \% \cr
+#'   Some College, 1 or More Years, No Degree \tab 15 \%  \cr
+#'   Associate's Degree                       \tab 7.5 \% \cr
+#'   Bachelor's Degree                        \tab 17.6 \%\cr
+#'   Master's Degree                          \tab 7.2 \% \cr
+#'   Professional School Degree               \tab 1.9 \% \cr
+#'   Doctorate Degree                         \tab 1.2 \% \cr
+#' }
+#'
+#' @inheritParams r_sample_factor
+#' @return Returns a random vector of educational attainment level elements.
+#' @keywords education
+#' @export
+#' @include utils.R r_sample.R
+#' @references \url{http://www.census.gov}
+#' @family variable functions
+#' @examples
+#' education(10)
+#' pie(table(education(10000)))
+education <- hijack(r_sample,
+    name = "Education",
+    x = c("No Schooling Completed", "Nursery School to 8th Grade", "9th Grade to 12th Grade, No Diploma",
+        "Regular High School Diploma", "GED or Alternative Credential",
+        "Some College, Less than 1 Year", "Some College, 1 or More Years, No Degree",
+        "Associate's Degree", "Bachelor's Degree", "Master's Degree",
+        "Professional School Degree", "Doctorate Degree"),
+    prob = c(0.013, 0.05, 0.085, 0.246, 0.039, 0.064, 0.15, 0.075, 0.176,
+    0.072, 0.019, 0.012)
+)
+
+
+
+## out <- dplyr::data_frame(
+##     Attainment = rm_default(o, pattern = "^.+?(?=\\s\\.)", extract = TRUE) %>%
+##     unlist %>%
+##     gsub("’", "'", .) %>%
+##     TC %>%
+##     unlist %>%
+##     gsub("Ged", "GED", .) %>%
+##     gsub("(?<=\\d)Th", "th", ., perl=TRUE) %>%
+##     rm_non_ascii(replacement="<<NA>>") %>%
+##     gsub("<<NA>><<NA>><<NA>>", "'", .),
+##
+##     Proportion = sapply(strsplit(sapply(strsplit(o, " –|(\\. )+"), "[[", 2), " "), "[[", 2) %>%
+##     as_numeric %>%
+##     sapply(., function(x) x/100),
+##     Percent = paste(Proportion * 100, "\\%")
+## )
+##
+## out %>%
+##    dplyr::select(-2) %>%
+##    as.data.frame %>%
+##    tabular() %>%
+##    cat(file="clipboard")
+##    roxann()
+
diff --git a/R/month.R b/R/month.R
@@ -0,0 +1,23 @@
+#' Generate Random Vector of Months
+#'
+#' Generate a random factor vector of months.
+#'
+#' @inheritParams r_sample_factor
+#' @return Returns a random character vector of month elements.
+#' @keywords month
+#' @export
+#' @family variable functions
+#' @examples
+#' month(10)
+#' pie(table(month(10000, prob = probs(12))))
+month <-
+function (n, x = month.name, prob = NULL, name = "month") {
+
+    if (missing(n)) stop("`n` is missing")
+    out <- sample(x = x, size = n, replace = TRUE, prob = prob)
+    out <- factor(out, levels = x)
+    varname(out, name)
+
+}
+
+
diff --git a/R/name.R b/R/name.R
@@ -0,0 +1,22 @@
+#' Generate Random Vector of Names
+#'
+#' Generate a random vector of first names (Gender Neutral-ish).
+#'
+#' @inheritParams r_sample_replace
+#' @return Returns a random vector of name elements.
+#' @keywords name
+#' @export
+#' @include wakefield-package.R utils.R r_sample_replace.R
+#' @family variable functions
+#' @examples
+#' name(10)
+#' name(100)
+#' name(1000, replace = TRUE)
+name <- function (n, x = wakefield::name_neutral, prob = NULL, replace = FALSE,
+    name = "Name") {
+
+    if (missing(n)) stop("`n` is missing")
+    out <- sample(x = x, size = n, replace = replace, prob = prob)
+    varname(out, name)
+
+}
diff --git a/R/probs.R b/R/probs.R
@@ -0,0 +1,22 @@
+#' Generate a Random Vector of Probabilities.
+#'
+#' Generate a random vector of probabilities that sum to 1.
+#'
+#' @param j An integer of number of probability elements (typically performs
+#' best at j < 4000).
+#' @param upper \code{probs} works by sampling from \code{1:upper} j times and
+#' then dividing each sample by the sum of all samples.
+#' @return Returns a vector of probabilities summing to 1.
+#' @keywords probability percent
+#' @export
+#' @examples
+#' probs(10)
+#' sum(probs(100))
+#' pie(table(month(10000, prob = probs(12))))
+probs <- function(j, upper = 1000000) {
+    m <- sample(1:upper, j, TRUE)
+    out <- suppressWarnings(m/sum(m))
+    if (any(is.na(out))) stop("`j` is too large.  `j` typically performs best at < 4000")
+    out
+}
+
diff --git a/R/r_sample_replace.R b/R/r_sample_replace.R
@@ -0,0 +1,30 @@
+#' Generate Random Vector (Without Replacement)
+#'
+#' Generate a random vector without replacement.
+#'
+#' @param n The number elements to generate.  This can be globally set within
+#' the environment of \code{r_data_frame} or \code{r_list}.
+#' @param x A vector of elements to chose from.
+#' @param prob A vector of probabilities to chose from.
+#' @param replace logical.  If \code{TRUE} samping is done with replacement.
+#' Default is without replacement.
+#' @param name The name to assign to the output vector's \code{varname}
+#' attribute.  This is used to auto assign names to the column/vector name when
+#' used inside of \code{r_data_frame} or \code{r_list}.
+#' @return Returns a random vector of elements.
+#' @include utils.R
+#' @seealso \code{\link[base]{sample}}
+#' @export
+#' @examples
+#' r_sample(100, name = "Var")
+#' table(r_sample(x = c("Dog", "Cat", "Fish", "Bird"), n=1000))
+#' r_sample(x = c("B", "W"), prob = c(.7, .3), n = 25, name = "Race")
+#' r_sample(25, x = c(TRUE, FALSE))
+r_sample_replace <-
+function (n, x = 1:100, prob = NULL, replace = FALSE, name = "Sample") {
+
+    if (missing(n)) stop("`n` is missing")
+    out <- sample(x = x, size = n, replace = replace, prob = prob)
+    varname(out, name)
+
+}
diff --git a/R/sentence.R b/R/sentence.R
@@ -0,0 +1,20 @@
+#' Generate Random Vector of Sentences
+#'
+#' Generate a random vector of sentences from the
+#' \code{\link[wakefield]{presidential_debates_2012}}.
+#'
+#' @inheritParams r_sample_factor
+#' @return Returns a random character vector of sentence elements.
+#' @keywords sentence
+#' @export
+#' @family variable functions
+#' @examples
+#' sentence(10)
+sentence <-
+function (n, x = wakefield::presidential_debates_2012, prob = NULL, name = "Sentence") {
+
+    if (missing(n)) stop("`n` is missing")
+    out <- sample(x = x, size = n, replace = TRUE, prob = prob)
+    varname(out, name)
+
+}