Skip to content

Commit

Permalink
added several variable functions and a probs function to generate ran…
Browse files Browse the repository at this point in the history
…om probabilities.
  • Loading branch information
trinker committed Apr 17, 2015
1 parent 984a0aa commit 32f2be6
Show file tree
Hide file tree
Showing 56 changed files with 1,350 additions and 358 deletions.
12 changes: 10 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@ Collate:
'utils.R'
'r_sample.R'
'age.R'
'r_sample_factor.R'
'animal.R'
'r_sample_binary.R'
'answer.R'
'area.R'
'coin.R'
'color.R'
'r_sample_logical.R'
'death.R'
'dice.R'
'dna.R'
'dummy.R'
'education.R'
'grade.R'
'grade_level.R'
'group.R'
Expand All @@ -44,17 +48,21 @@ Collate:
'likert.R'
'lorem_ipsum.R'
'marital.R'
'month.R'
'wakefield-package.R'
'r_sample_replace.R'
'name.R'
'political.R'
'probs.R'
'r_data.R'
'r_data_frame.R'
'r_list.R'
'r_sample_factor.R'
'r_sample_integer.R'
'race.R'
'sat.R'
'sentence.R'
'sex.R'
'string.R'
'upper.R'
'valid.R'
'varname.R'
'wakefield-package.R'
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

S3method(print,variable)
export(age)
export(animal)
export(answer)
export(area)
export(coin)
export(color)
export(death)
export(dice)
export(died)
export(dna)
export(dummy)
export(education)
export(gpa)
export(grade)
export(grade_letter)
Expand All @@ -31,10 +34,15 @@ export(lorem_ipsum)
export(lower)
export(lower_factor)
export(marital)
export(month)
export(name)
export(normal)
export(normal_round)
export(paragraph)
export(pet)
export(political)
export(primary)
export(probs)
export(r_data)
export(r_data_frame)
export(r_list)
Expand All @@ -45,8 +53,10 @@ export(r_sample_factor)
export(r_sample_integer)
export(r_sample_logical)
export(r_sample_ordered)
export(r_sample_replace)
export(race)
export(sat)
export(sentence)
export(sex)
export(string)
export(upper)
Expand Down
53 changes: 53 additions & 0 deletions R/animal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#' Generate Random Vector of animals
#'
#' \code{animal} - Generate a random vector of animals.
#'
#' @inheritParams color
#' @return Returns a random factor vector of animal elements.
#' @keywords animal pet
#' @export
#' @rdname animal
#' @include utils.R r_sample_factor.R
#' @family variable functions
#' @examples
#' animal(10)
#' pie(table(animal(10000)))
#'
#' pet(10)
#' pie(table(pet(10000)))
animal <- function(n, k = 10, x = wakefield::animal_list, prob = NULL, name = "Animal"){

stopifnot(k < length(x) || k > 0)
stopifnot(length(prob) != k)

out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k),
perl=TRUE), size = n, replace = TRUE, prob = prob)
out <- factor(out, levels = lvls)
varname(out, name)

}

#' Generate Random Vector of animals
#'
#' \code{pet} - Generate a random vector of pets.
#'
#' @details The household pets and probabilities:
#'
#' \tabular{ll}{
#' Dog \tab 36.5 \%\cr
#' Cat \tab 30.4 \%\cr
#' None \tab 25.8 \%\cr
#' Bird \tab 3.1 \% \cr
#' Horse \tab 1.5 \% \cr
#' }
#'
#' @export
#' @rdname animal
pet <- hijack(r_sample_factor,
name = "Pet",
x = c("Dog", "Cat", "None", "Bird", "Horse"),
prob = c(.365, .304, .258, .031, .015)
)



52 changes: 52 additions & 0 deletions R/color.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' Generate Random Vector of Colors
#'
#' \code{color} - Generate a random vector of colors (sampled from \code{colors()}).
#'
#' @param n The number elements to generate. This can be globally set within
#' the environment of \code{r_data_frame} or \code{r_list}.
#' @param k The number of the elements of x to sample from (uses \code{sample(x, k)}).
#' @param x A vector of elements to chose from.
#' @param prob A vector of probabilities to chose from.
#' @param name The name to assign to the output vector's \code{varname}
#' attribute. This is used to auto assign names to the column/vector name when
#' used inside of \code{r_data_frame} or \code{r_list}.
#' @return Returns a random factor vector of color elements.
#' @keywords color
#' @export
#' @rdname color
#' @include utils.R r_sample_factor.R
#' @family variable functions
#' @examples
#' color(10)
#' pie(tab <- table(color(10000)), col = names(tab))
#'
#' primary(10)
#' pie(tab <- table(primary(10000)), col = names(tab))
color <- function(n, k = 10, x = colors(), prob = NULL, name = "Color"){

stopifnot(k < length(x) || k > 0)
stopifnot(length(prob) != k)

out <- sample(x = lvls <- gsub("(\\w)(\\w*)", "\\U\\1\\L\\2", sample(x, k),
perl=TRUE), size = n, replace = TRUE, prob = prob)
out <- factor(out, levels = lvls)
varname(out, name)

}

#' Generate Random Vector of Colors
#'
#' \code{color} - Generate a random vector of
#' \href{http://en.wikipedia.org/wiki/Primary_color}{\emph{psycological primary}}
#' colors (sampled from \code{colors()}).
#'
#' @export
#' @rdname color
primary <- hijack(r_sample_factor,
name = "Color",
x = c("Red", "Green", "Blue", "Yellow", "Black", "White")
)




70 changes: 70 additions & 0 deletions R/education.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#' Generate Random Vector of Educational Attainment Level
#'
#' Generate a random vector of educational attainment level.
#'
#' @details The educational attainments and probabilities used match approximate
#' U.S. educational attainment make-up (\url{http://www.census.gov}):
#'
#' \tabular{lr}{
#' \bold{ Highest Attainment} \tab \bold{Percent} \cr
#' No Schooling Completed \tab 1.3 \% \cr
#' Nursery School to 8th Grade \tab 5 \% \cr
#' 9th Grade to 12th Grade, No Diploma \tab 8.5 \% \cr
#' Regular High School Diploma \tab 24.6 \%\cr
#' GED or Alternative Credential \tab 3.9 \% \cr
#' Some College, Less than 1 Year \tab 6.4 \% \cr
#' Some College, 1 or More Years, No Degree \tab 15 \% \cr
#' Associate's Degree \tab 7.5 \% \cr
#' Bachelor's Degree \tab 17.6 \%\cr
#' Master's Degree \tab 7.2 \% \cr
#' Professional School Degree \tab 1.9 \% \cr
#' Doctorate Degree \tab 1.2 \% \cr
#' }
#'
#' @inheritParams r_sample_factor
#' @return Returns a random vector of educational attainment level elements.
#' @keywords education
#' @export
#' @include utils.R r_sample.R
#' @references \url{http://www.census.gov}
#' @family variable functions
#' @examples
#' education(10)
#' pie(table(education(10000)))
education <- hijack(r_sample,
name = "Education",
x = c("No Schooling Completed", "Nursery School to 8th Grade", "9th Grade to 12th Grade, No Diploma",
"Regular High School Diploma", "GED or Alternative Credential",
"Some College, Less than 1 Year", "Some College, 1 or More Years, No Degree",
"Associate's Degree", "Bachelor's Degree", "Master's Degree",
"Professional School Degree", "Doctorate Degree"),
prob = c(0.013, 0.05, 0.085, 0.246, 0.039, 0.064, 0.15, 0.075, 0.176,
0.072, 0.019, 0.012)
)



## out <- dplyr::data_frame(
## Attainment = rm_default(o, pattern = "^.+?(?=\\s\\.)", extract = TRUE) %>%
## unlist %>%
## gsub("’", "'", .) %>%
## TC %>%
## unlist %>%
## gsub("Ged", "GED", .) %>%
## gsub("(?<=\\d)Th", "th", ., perl=TRUE) %>%
## rm_non_ascii(replacement="<<NA>>") %>%
## gsub("<<NA>><<NA>><<NA>>", "'", .),
##
## Proportion = sapply(strsplit(sapply(strsplit(o, " –|(\\. )+"), "[[", 2), " "), "[[", 2) %>%
## as_numeric %>%
## sapply(., function(x) x/100),
## Percent = paste(Proportion * 100, "\\%")
## )
##
## out %>%
## dplyr::select(-2) %>%
## as.data.frame %>%
## tabular() %>%
## cat(file="clipboard")
## roxann()

23 changes: 23 additions & 0 deletions R/month.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#' Generate Random Vector of Months
#'
#' Generate a random factor vector of months.
#'
#' @inheritParams r_sample_factor
#' @return Returns a random character vector of month elements.
#' @keywords month
#' @export
#' @family variable functions
#' @examples
#' month(10)
#' pie(table(month(10000, prob = probs(12))))
month <-
function (n, x = month.name, prob = NULL, name = "month") {

if (missing(n)) stop("`n` is missing")
out <- sample(x = x, size = n, replace = TRUE, prob = prob)
out <- factor(out, levels = x)
varname(out, name)

}


22 changes: 22 additions & 0 deletions R/name.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Generate Random Vector of Names
#'
#' Generate a random vector of first names (Gender Neutral-ish).
#'
#' @inheritParams r_sample_replace
#' @return Returns a random vector of name elements.
#' @keywords name
#' @export
#' @include wakefield-package.R utils.R r_sample_replace.R
#' @family variable functions
#' @examples
#' name(10)
#' name(100)
#' name(1000, replace = TRUE)
name <- function (n, x = wakefield::name_neutral, prob = NULL, replace = FALSE,
name = "Name") {

if (missing(n)) stop("`n` is missing")
out <- sample(x = x, size = n, replace = replace, prob = prob)
varname(out, name)

}
22 changes: 22 additions & 0 deletions R/probs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' Generate a Random Vector of Probabilities.
#'
#' Generate a random vector of probabilities that sum to 1.
#'
#' @param j An integer of number of probability elements (typically performs
#' best at j < 4000).
#' @param upper \code{probs} works by sampling from \code{1:upper} j times and
#' then dividing each sample by the sum of all samples.
#' @return Returns a vector of probabilities summing to 1.
#' @keywords probability percent
#' @export
#' @examples
#' probs(10)
#' sum(probs(100))
#' pie(table(month(10000, prob = probs(12))))
probs <- function(j, upper = 1000000) {
m <- sample(1:upper, j, TRUE)
out <- suppressWarnings(m/sum(m))
if (any(is.na(out))) stop("`j` is too large. `j` typically performs best at < 4000")
out
}

30 changes: 30 additions & 0 deletions R/r_sample_replace.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Generate Random Vector (Without Replacement)
#'
#' Generate a random vector without replacement.
#'
#' @param n The number elements to generate. This can be globally set within
#' the environment of \code{r_data_frame} or \code{r_list}.
#' @param x A vector of elements to chose from.
#' @param prob A vector of probabilities to chose from.
#' @param replace logical. If \code{TRUE} samping is done with replacement.
#' Default is without replacement.
#' @param name The name to assign to the output vector's \code{varname}
#' attribute. This is used to auto assign names to the column/vector name when
#' used inside of \code{r_data_frame} or \code{r_list}.
#' @return Returns a random vector of elements.
#' @include utils.R
#' @seealso \code{\link[base]{sample}}
#' @export
#' @examples
#' r_sample(100, name = "Var")
#' table(r_sample(x = c("Dog", "Cat", "Fish", "Bird"), n=1000))
#' r_sample(x = c("B", "W"), prob = c(.7, .3), n = 25, name = "Race")
#' r_sample(25, x = c(TRUE, FALSE))
r_sample_replace <-
function (n, x = 1:100, prob = NULL, replace = FALSE, name = "Sample") {

if (missing(n)) stop("`n` is missing")
out <- sample(x = x, size = n, replace = replace, prob = prob)
varname(out, name)

}
20 changes: 20 additions & 0 deletions R/sentence.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#' Generate Random Vector of Sentences
#'
#' Generate a random vector of sentences from the
#' \code{\link[wakefield]{presidential_debates_2012}}.
#'
#' @inheritParams r_sample_factor
#' @return Returns a random character vector of sentence elements.
#' @keywords sentence
#' @export
#' @family variable functions
#' @examples
#' sentence(10)
sentence <-
function (n, x = wakefield::presidential_debates_2012, prob = NULL, name = "Sentence") {

if (missing(n)) stop("`n` is missing")
out <- sample(x = x, size = n, replace = TRUE, prob = prob)
varname(out, name)

}

0 comments on commit 32f2be6

Please sign in to comment.