-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added several variable functions and a probs function to generate ran…
…om probabilities.
- Loading branch information
Showing
56 changed files
with
1,350 additions
and
358 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#' Generate Random Vector of animals | ||
#' | ||
#' \code{animal} - Generate a random vector of animals. | ||
#' | ||
#' @inheritParams color | ||
#' @return Returns a random factor vector of animal elements. | ||
#' @keywords animal pet | ||
#' @export | ||
#' @rdname animal | ||
#' @include utils.R r_sample_factor.R | ||
#' @family variable functions | ||
#' @examples | ||
#' animal(10) | ||
#' pie(table(animal(10000))) | ||
#' | ||
#' pet(10) | ||
#' pie(table(pet(10000))) | ||
animal <- function(n, k = 10, x = wakefield::animal_list, prob = NULL, name = "Animal"){ | ||
|
||
stopifnot(k < length(x) || k > 0) | ||
stopifnot(length(prob) != k) | ||
|
||
out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k), | ||
perl=TRUE), size = n, replace = TRUE, prob = prob) | ||
out <- factor(out, levels = lvls) | ||
varname(out, name) | ||
|
||
} | ||
|
||
#' Generate Random Vector of animals | ||
#' | ||
#' \code{pet} - Generate a random vector of pets. | ||
#' | ||
#' @details The household pets and probabilities: | ||
#' | ||
#' \tabular{ll}{ | ||
#' Dog \tab 36.5 \%\cr | ||
#' Cat \tab 30.4 \%\cr | ||
#' None \tab 25.8 \%\cr | ||
#' Bird \tab 3.1 \% \cr | ||
#' Horse \tab 1.5 \% \cr | ||
#' } | ||
#' | ||
#' @export | ||
#' @rdname animal | ||
pet <- hijack(r_sample_factor, | ||
name = "Pet", | ||
x = c("Dog", "Cat", "None", "Bird", "Horse"), | ||
prob = c(.365, .304, .258, .031, .015) | ||
) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#' Generate Random Vector of Colors | ||
#' | ||
#' \code{color} - Generate a random vector of colors (sampled from \code{colors()}). | ||
#' | ||
#' @param n The number elements to generate. This can be globally set within | ||
#' the environment of \code{r_data_frame} or \code{r_list}. | ||
#' @param k The number of the elements of x to sample from (uses \code{sample(x, k)}). | ||
#' @param x A vector of elements to chose from. | ||
#' @param prob A vector of probabilities to chose from. | ||
#' @param name The name to assign to the output vector's \code{varname} | ||
#' attribute. This is used to auto assign names to the column/vector name when | ||
#' used inside of \code{r_data_frame} or \code{r_list}. | ||
#' @return Returns a random factor vector of color elements. | ||
#' @keywords color | ||
#' @export | ||
#' @rdname color | ||
#' @include utils.R r_sample_factor.R | ||
#' @family variable functions | ||
#' @examples | ||
#' color(10) | ||
#' pie(tab <- table(color(10000)), col = names(tab)) | ||
#' | ||
#' primary(10) | ||
#' pie(tab <- table(primary(10000)), col = names(tab)) | ||
color <- function(n, k = 10, x = colors(), prob = NULL, name = "Color"){ | ||
|
||
stopifnot(k < length(x) || k > 0) | ||
stopifnot(length(prob) != k) | ||
|
||
out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k), | ||
perl=TRUE), size = n, replace = TRUE, prob = prob) | ||
out <- factor(out, levels = lvls) | ||
varname(out, name) | ||
|
||
} | ||
|
||
#' Generate Random Vector of Colors | ||
#' | ||
#' \code{color} - Generate a random vector of | ||
#' \href{http://en.wikipedia.org/wiki/Primary_color}{\emph{psycological primary}} | ||
#' colors (sampled from \code{colors()}). | ||
#' | ||
#' @export | ||
#' @rdname color | ||
primary <- hijack(r_sample_factor, | ||
name = "Color", | ||
x = c("Red", "Green", "Blue", "Yellow", "Black", "White") | ||
) | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#' Generate Random Vector of Educational Attainment Level | ||
#' | ||
#' Generate a random vector of educational attainment level. | ||
#' | ||
#' @details The educational attainments and probabilities used match approximate | ||
#' U.S. educational attainment make-up (\url{http://www.census.gov}): | ||
#' | ||
#' \tabular{lr}{ | ||
#' \bold{ Highest Attainment} \tab \bold{Percent} \cr | ||
#' No Schooling Completed \tab 1.3 \% \cr | ||
#' Nursery School to 8th Grade \tab 5 \% \cr | ||
#' 9th Grade to 12th Grade, No Diploma \tab 8.5 \% \cr | ||
#' Regular High School Diploma \tab 24.6 \%\cr | ||
#' GED or Alternative Credential \tab 3.9 \% \cr | ||
#' Some College, Less than 1 Year \tab 6.4 \% \cr | ||
#' Some College, 1 or More Years, No Degree \tab 15 \% \cr | ||
#' Associate's Degree \tab 7.5 \% \cr | ||
#' Bachelor's Degree \tab 17.6 \%\cr | ||
#' Master's Degree \tab 7.2 \% \cr | ||
#' Professional School Degree \tab 1.9 \% \cr | ||
#' Doctorate Degree \tab 1.2 \% \cr | ||
#' } | ||
#' | ||
#' @inheritParams r_sample_factor | ||
#' @return Returns a random vector of educational attainment level elements. | ||
#' @keywords education | ||
#' @export | ||
#' @include utils.R r_sample.R | ||
#' @references \url{http://www.census.gov} | ||
#' @family variable functions | ||
#' @examples | ||
#' education(10) | ||
#' pie(table(education(10000))) | ||
education <- hijack(r_sample, | ||
name = "Education", | ||
x = c("No Schooling Completed", "Nursery School to 8th Grade", "9th Grade to 12th Grade, No Diploma", | ||
"Regular High School Diploma", "GED or Alternative Credential", | ||
"Some College, Less than 1 Year", "Some College, 1 or More Years, No Degree", | ||
"Associate's Degree", "Bachelor's Degree", "Master's Degree", | ||
"Professional School Degree", "Doctorate Degree"), | ||
prob = c(0.013, 0.05, 0.085, 0.246, 0.039, 0.064, 0.15, 0.075, 0.176, | ||
0.072, 0.019, 0.012) | ||
) | ||
|
||
|
||
|
||
## out <- dplyr::data_frame( | ||
## Attainment = rm_default(o, pattern = "^.+?(?=\\s\\.)", extract = TRUE) %>% | ||
## unlist %>% | ||
## gsub("’", "'", .) %>% | ||
## TC %>% | ||
## unlist %>% | ||
## gsub("Ged", "GED", .) %>% | ||
## gsub("(?<=\\d)Th", "th", ., perl=TRUE) %>% | ||
## rm_non_ascii(replacement="<<NA>>") %>% | ||
## gsub("<<NA>><<NA>><<NA>>", "'", .), | ||
## | ||
## Proportion = sapply(strsplit(sapply(strsplit(o, " –|(\\. )+"), "[[", 2), " "), "[[", 2) %>% | ||
## as_numeric %>% | ||
## sapply(., function(x) x/100), | ||
## Percent = paste(Proportion * 100, "\\%") | ||
## ) | ||
## | ||
## out %>% | ||
## dplyr::select(-2) %>% | ||
## as.data.frame %>% | ||
## tabular() %>% | ||
## cat(file="clipboard") | ||
## roxann() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#' Generate Random Vector of Months | ||
#' | ||
#' Generate a random factor vector of months. | ||
#' | ||
#' @inheritParams r_sample_factor | ||
#' @return Returns a random character vector of month elements. | ||
#' @keywords month | ||
#' @export | ||
#' @family variable functions | ||
#' @examples | ||
#' month(10) | ||
#' pie(table(month(10000, prob = probs(12)))) | ||
month <- | ||
function (n, x = month.name, prob = NULL, name = "month") { | ||
|
||
if (missing(n)) stop("`n` is missing") | ||
out <- sample(x = x, size = n, replace = TRUE, prob = prob) | ||
out <- factor(out, levels = x) | ||
varname(out, name) | ||
|
||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#' Generate Random Vector of Names | ||
#' | ||
#' Generate a random vector of first names (Gender Neutral-ish). | ||
#' | ||
#' @inheritParams r_sample_replace | ||
#' @return Returns a random vector of name elements. | ||
#' @keywords name | ||
#' @export | ||
#' @include wakefield-package.R utils.R r_sample_replace.R | ||
#' @family variable functions | ||
#' @examples | ||
#' name(10) | ||
#' name(100) | ||
#' name(1000, replace = TRUE) | ||
name <- function (n, x = wakefield::name_neutral, prob = NULL, replace = FALSE, | ||
name = "Name") { | ||
|
||
if (missing(n)) stop("`n` is missing") | ||
out <- sample(x = x, size = n, replace = replace, prob = prob) | ||
varname(out, name) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#' Generate a Random Vector of Probabilities. | ||
#' | ||
#' Generate a random vector of probabilities that sum to 1. | ||
#' | ||
#' @param j An integer of number of probability elements (typically performs | ||
#' best at j < 4000). | ||
#' @param upper \code{probs} works by sampling from \code{1:upper} j times and | ||
#' then dividing each sample by the sum of all samples. | ||
#' @return Returns a vector of probabilities summing to 1. | ||
#' @keywords probability percent | ||
#' @export | ||
#' @examples | ||
#' probs(10) | ||
#' sum(probs(100)) | ||
#' pie(table(month(10000, prob = probs(12)))) | ||
probs <- function(j, upper = 1000000) { | ||
m <- sample(1:upper, j, TRUE) | ||
out <- suppressWarnings(m/sum(m)) | ||
if (any(is.na(out))) stop("`j` is too large. `j` typically performs best at < 4000") | ||
out | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#' Generate Random Vector (Without Replacement) | ||
#' | ||
#' Generate a random vector without replacement. | ||
#' | ||
#' @param n The number elements to generate. This can be globally set within | ||
#' the environment of \code{r_data_frame} or \code{r_list}. | ||
#' @param x A vector of elements to chose from. | ||
#' @param prob A vector of probabilities to chose from. | ||
#' @param replace logical. If \code{TRUE} samping is done with replacement. | ||
#' Default is without replacement. | ||
#' @param name The name to assign to the output vector's \code{varname} | ||
#' attribute. This is used to auto assign names to the column/vector name when | ||
#' used inside of \code{r_data_frame} or \code{r_list}. | ||
#' @return Returns a random vector of elements. | ||
#' @include utils.R | ||
#' @seealso \code{\link[base]{sample}} | ||
#' @export | ||
#' @examples | ||
#' r_sample(100, name = "Var") | ||
#' table(r_sample(x = c("Dog", "Cat", "Fish", "Bird"), n=1000)) | ||
#' r_sample(x = c("B", "W"), prob = c(.7, .3), n = 25, name = "Race") | ||
#' r_sample(25, x = c(TRUE, FALSE)) | ||
r_sample_replace <- | ||
function (n, x = 1:100, prob = NULL, replace = FALSE, name = "Sample") { | ||
|
||
if (missing(n)) stop("`n` is missing") | ||
out <- sample(x = x, size = n, replace = replace, prob = prob) | ||
varname(out, name) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#' Generate Random Vector of Sentences | ||
#' | ||
#' Generate a random vector of sentences from the | ||
#' \code{\link[wakefield]{presidential_debates_2012}}. | ||
#' | ||
#' @inheritParams r_sample_factor | ||
#' @return Returns a random character vector of sentence elements. | ||
#' @keywords sentence | ||
#' @export | ||
#' @family variable functions | ||
#' @examples | ||
#' sentence(10) | ||
sentence <- | ||
function (n, x = wakefield::presidential_debates_2012, prob = NULL, name = "Sentence") { | ||
|
||
if (missing(n)) stop("`n` is missing") | ||
out <- sample(x = x, size = n, replace = TRUE, prob = prob) | ||
varname(out, name) | ||
|
||
} |
Oops, something went wrong.