Skip to content

Commit

Permalink
Merge pull request #19 from neuropsychology/0.0.7b
Browse files Browse the repository at this point in the history
0.0.7b
  • Loading branch information
DominiqueMakowski committed Jan 4, 2018
2 parents fe22091 + 3cf818d commit 301a7e0
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 14 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
### Minor changes
- Added [`CONTRIBUTING.md`](https://github.com/neuropsychology/psycho.R/blob/master/CONTRIBUTING.md)
- Changed `format_digit`
- Added `except` parameter to `normalize`


# 0.0.6 (2017-12-07)
Expand Down
29 changes: 25 additions & 4 deletions R/normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,52 @@
#' Select numeric variables and normalize (Z-score) them.
#'
#' @param df Dataframe.
#' @param except Character or list of characters of column names to be excluded from normalization.
#'
#' @return Dataframe.
#'
#' @examples
#' df <- data.frame(Participant = as.factor(rep(1:50,each=2)),
#' Condition = base::rep_len(c("A", "B"), 100), V1 = rnorm(100, 30, .2),
#' V2 = runif(100, 3, 5))
#' df <- data.frame(
#' Participant = as.factor(rep(1:50,each=2)),
#' Condition = base::rep_len(c("A", "B"), 100),
#' V1 = rnorm(100, 30, .2),
#' V2 = runif(100, 3, 5),
#' V3 = rnorm(100, 100, 10)
#' )
#'
#' dfZ <- normalize(df)
#' dfZ <- normalize(df, except="V3")
#' dfZ <- normalize(df, except=c("V1", "V2"))
#'
#' @author \href{https://dominiquemakowski.github.io/}{Dominique Makowski}
#'
#'
#' @import purrr
#' @import dplyr
#' @export
normalize <- function(df) {
normalize <- function(df, except=NULL) {

# Remove exceptions
if(!is.null(except) && except %in% names(df)){
to_keep <- as.data.frame(df[except])
df <- df[!names(df) %in% c(except)]
}

# Remove non-numerics
dfother <- purrr::discard(df, is.numeric)
dfnum <- purrr::keep(df, is.numeric)
dfnum <- as.data.frame(scale(dfnum))
# Add non-numerics
if (is.null(ncol(dfother))) {
df <- dfnum
} else {
df <- dplyr::bind_cols(dfother, dfnum)
}

# Add exceptions
if(!is.null(except) && exists("to_keep")){
df <- dplyr::bind_cols(df, to_keep)
}

return(df)
}
16 changes: 12 additions & 4 deletions man/normalize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 19 additions & 6 deletions tests/testthat/test-normalize.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
context("normalize")

test_that("Correct Value", {
x <- data.frame(x = c(8, 10, 12), y = c("a", "b", "c"))
x <- psycho::normalize(x)
testthat::expect_equal(mean(x$x), 0)
df <- data.frame(
V1 = rnorm(100, 30, .2),
V2 = runif(100, 3, 5),
V3 = rnorm(100, 100, 10)
)
dfZ <- normalize(df)
testthat::expect_equal(mean(dfZ$V1), 0)

df <- data.frame(
Participant = as.factor(rep(1:50,each=2)),
Condition = base::rep_len(c("A", "B"), 100),
V1 = rnorm(100, 30, .2),
V2 = runif(100, 3, 5),
V3 = rnorm(100, 100, 10)
)
dfZ <- normalize(df, except="V3")
testthat::expect_equal(mean(dfZ$V2), 0)
dfZ <- normalize(df, except=c("V1", "V2"))
testthat::expect_equal(mean(dfZ$V3), 0)

x <- data.frame(x = c(8, 10, 12))
x <- psycho::normalize(x)
testthat::expect_equal(mean(x$x), 0)
})

1 comment on commit 301a7e0

@lintr-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

R/analyze.stanreg.R:141:1: style: lines should not be more than 80 characters.

​      warning("Interpreting effect size following Cohen (1977, 1988)... Make sure your variables were normalized!")
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/analyze.stanreg.R:290:1: style: lines should not be more than 80 characters.

​  names(summary) <- c("Variable", "MPE", "Median", "MAD", "Mean", "SD", paste0(CI, "_CI_lower"), paste0(CI, "_CI_higher"))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:59:1: style: lines should not be more than 80 characters.

corr <- psych::corr.test(df, y = df2, use = "pairwise", method = method, adjust = "none")
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:92:3: warning: local variable ‘n’ assigned but may not be used

n <- nrow(df)
  ^

R/correlation.R:97:1: style: lines should not be more than 80 characters.

p[lower.tri(p)] <- p.adjust(p[lower.tri(p)], method = adjust, n = choose(nrow(p), 2))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:98:1: style: lines should not be more than 80 characters.

p[upper.tri(p)] <- p.adjust(p[upper.tri(p)], method = adjust, n = choose(nrow(p), 2))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:133:1: style: lines should not be more than 80 characters.

table <- cbind(table[1:length(table) - 1]) # remove last column and return the matrix (which is now a data frame)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:154:1: style: lines should not be more than 80 characters.

corr <- psych::corr.test(cbind(df, df2), use = "pairwise", method = method, adjust = "none")
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:157:1: style: lines should not be more than 80 characters.

p[lower.tri(p)] <- p.adjust(p[lower.tri(p)], method = adjust, n = choose(nrow(p), 2))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:158:1: style: lines should not be more than 80 characters.

p[upper.tri(p)] <- p.adjust(p[upper.tri(p)], method = adjust, n = choose(nrow(p), 2))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:159:1: style: lines should not be more than 80 characters.

​    warning("Due to the presence of two dataframes, the plot might be incorrect. Consider with caution.")
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:165:1: style: lines should not be more than 80 characters.

title = paste("A ", type, "'s correlation matrix (correction: ", adjust, ")\n", sep = ""),
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/correlation.R:179:1: style: lines should not be more than 80 characters.

title = paste("A ", type, "'s correlation matrix (correction: ", adjust, ")\n", sep = ""),
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/formatting.R:26:60: style: Put spaces around all infix operators.

formatted <- paste0(formatted, strrep("0", digits-1))
                                                          ~^~

R/n_factors.R:124:1: style: lines should not be more than 80 characters.

opt <- stats[stats$map == min, ]$n_factors[!is.na(stats[stats$map == min, ]$n_factors)]
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:136:1: style: lines should not be more than 80 characters.

opt <- stats[stats$BIC == min, ]$n_factors[!is.na(stats[stats$BIC == min, ]$n_factors)]
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:148:1: style: lines should not be more than 80 characters.

opt <- stats[stats$SABIC == min, ]$n_factors[!is.na(stats[stats$SABIC == min, ]$n_factors)]
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:167:1: style: lines should not be more than 80 characters.

n_optimal = c(na.omit(cfit[cfit$cfit == max(cfit$cfit, na.rm = T), ])$n_factors)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:205:1: style: lines should not be more than 80 characters.

plot_data$n.Methods.Ratio <- plot_data$n.Methods.Ratio * (1 / max(plot_data$n.Methods.Ratio))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:206:1: style: lines should not be more than 80 characters.

plot_data$area <- plot_data$n.Methods.Ratio / (max(plot_data$n.Methods.Ratio) / max(plot_data$Eigenvalues))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:207:1: style: lines should not be more than 80 characters.

plot_data$var <- plot_data$Cum.Variance / (max(plot_data$Cum.Variance) / max(plot_data$Eigenvalues))
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

R/n_factors.R:227:1: style: lines should not be more than 80 characters.

trans = ~. * (max(eigenvalues$Cum.Variance) / max(eigenvalues$Eigenvalues)),
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

tests/testthat/test-normalize.R:13:42: style: Put spaces around all infix operators.

Participant = as.factor(rep(1:50,each=2)),
                                        ~^~

tests/testthat/test-normalize.R:19:30: style: Put spaces around all infix operators.

dfZ <- normalize(df, except="V3")
                            ~^~

tests/testthat/test-normalize.R:21:30: style: Put spaces around all infix operators.

dfZ <- normalize(df, except=c("V1", "V2"))
                            ~^~

Please sign in to comment.