Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
595 lines (499 sloc) 10.9 KB
#' Added variable plot data
#'
#' Data for generating the added variable plots.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' ols_prep_avplot_data(model)
#'
#' @importFrom stats model.frame residuals as.formula
#' @importFrom dplyr bind_cols
#'
#' @export
#'
ols_prep_avplot_data <- function(model) {
m1 <-
model %>%
model.frame() %>%
as_data_frame()
m2 <-
model %>%
model.matrix() %>%
as_data_frame() %>%
select(-1)
m1 %>%
select(1) %>%
bind_cols(m2) %>%
as_data_frame()
}
#' Regress predictor on other predictors
#'
#' Regress a predictor in the model on all the other predictors.
#'
#' @param data A \code{data.frame}.
#' @param i A numeric vector (indicates the predictor in the model).
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' data <- ols_prep_avplot_data(model)
#' ols_prep_regress_x(data, 1)
#'
#' @importFrom stats lsfit
#'
#' @export
#'
ols_prep_regress_x <- function(data, i) {
x <- remove_columns(data, i)
y <- select_columns(data, i)
lsfit(x, y) %>%
use_series(residuals)
}
#' Regress y on other predictors
#'
#' Regress y on all the predictors except the ith predictor.
#'
#' @param data A \code{data.frame}.
#' @param i A numeric vector (indicates the predictor in the model).
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' data <- ols_prep_avplot_data(model)
#' ols_prep_regress_y(data, 1)
#'
#' @export
#'
ols_prep_regress_y <- function(data, i) {
x <- remove_columns(data, i)
y <- select_columns(data)
lsfit(x, y) %>%
use_series(residuals)
}
#' Cooks' D plot data
#'
#' Prepare data for cook's d bar plot.
#'
#' @param model An object of class \code{lm}.
#'
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' ols_prep_cdplot_data(model)
#'
#' @importFrom dplyr if_else
#'
#' @export
#'
ols_prep_cdplot_data <- function(model) {
cd <- NULL
color <- NULL
cooksd <- cooks.distance(model)
n <- length(cooksd)
obs <- seq_len(n)
ckd <- tibble(obs = obs, cd = cooksd)
ts <- 4 / n
cooks_max <- max(cooksd)
ckd %<>%
mutate(
color = if_else(cd >= ts, "outlier", "normal"),
fct_color = color %>%
factor() %>%
ordered(levels = c("normal", "outlier"))
)
maxx <-
cooks_max %>%
multiply_by(0.01) %>%
add(cooks_max)
list(ckd = ckd, maxx = maxx, ts = ts)
}
#' Cooks' D outlier observations
#'
#' Identify outliers in cook's d plot.
#'
#' @param k Cooks' d bar plot data.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' k <- ols_prep_cdplot_data(model)
#' ols_prep_outlier_obs(k)
#'
#' @export
#'
ols_prep_outlier_obs <- function(k) {
ckd <- NULL
color <- NULL
obs <- NULL
k %>%
use_series(ckd) %>%
mutate(
txt = ifelse(color == "outlier", obs, NA)
)
}
#' Cooks' d outlier data
#'
#' Outlier data for cook's d bar plot.
#'
#' @param k Cooks' d bar plot data.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt, data = mtcars)
#' k <- ols_prep_cdplot_data(model)
#' ols_prep_cdplot_outliers(k)
#'
#' @export
#'
ols_prep_cdplot_outliers <- function(k) {
color <- NULL
ckd <- NULL
obs <- NULL
cd <- NULL
k %>%
use_series(ckd) %>%
filter(
color == "outlier"
) %>%
select(obs, cd) %>%
set_colnames(c("observation", "cooks_distance"))
}
#' DFBETAs plot data
#'
#' Prepares the data for dfbetas plot.
#'
#' @param d A \code{tibble} or \code{data.frame} with dfbetas.
#' @param threshold The threshold for outliers.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
#' dfb <- dfbetas(model)
#' n <- nrow(dfb)
#' threshold <- 2 / sqrt(n)
#' dbetas <- dfb[, 1]
#' df_data <- tibble::tibble(obs = seq_len(n), dbetas = dbetas)
#' ols_prep_dfbeta_data(df_data, threshold)
#'
#' @export
#'
ols_prep_dfbeta_data <- function(d, threshold) {
color <- NULL
obs <- NULL
d %>%
mutate(
color = ifelse(((d$dbetas >= threshold) | (d$dbetas <= -threshold)),
c("outlier"), c("normal")),
fct_color = color %>%
factor() %>%
ordered(levels = c("normal", "outlier")),
txt = ifelse(color == "outlier", obs, NA)
)
}
#' DFBETAs plot outliers
#'
#' Data for identifying outliers in dfbetas plot.
#'
#' @param d A \code{tibble} or \code{data.frame}.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
#' dfb <- dfbetas(model)
#' n <- nrow(dfb)
#' threshold <- 2 / sqrt(n)
#' dbetas <- dfb[, 1]
#' df_data <- tibble::tibble(obs = seq_len(n), dbetas = dbetas)
#' d <- ols_prep_dfbeta_data(df_data, threshold)
#' ols_prep_dfbeta_outliers(d)
#'
#' @export
#'
ols_prep_dfbeta_outliers <- function(d) {
color <- NULL
obs <- NULL
dbetas <- NULL
d %>%
filter(
color == "outlier"
) %>%
select(obs, dbetas)
}
#' Deleted studentized residual plot data
#'
#' Generates data for deleted studentized residual vs fitted plot.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
#' ols_prep_dsrvf_data(model)
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
ols_prep_dsrvf_data <- function(model) {
dsr <- NULL
color <- NULL
pred <- fitted(model)
dsresid <-
model %>%
rstudent() %>%
unname()
n <- length(dsresid)
ds <- tibble(obs = seq_len(n), dsr = dsresid)
ds %<>%
mutate(
color = ifelse((abs(dsr) >= 2), "outlier", "normal"),
fct_color = color %>%
factor() %>%
ordered(levels = c("normal", "outlier"))
)
ds2 <- tibble(obs = seq_len(n),
pred = pred,
dsr = ds$dsr,
color = ds$color,
fct_color = ds$fct_color)
minx <-
ds2 %>%
use_series(dsr) %>%
min() %>%
subtract(1)
maxx <-
ds2 %>%
use_series(dsr) %>%
max() %>%
add(1)
cminx <- ifelse(minx < -2, minx, -2.5)
cmaxx <- ifelse(maxx > 2, maxx, 2.5)
list(ds = ds2, cminx = cminx, cmaxx = cmaxx)
}
#' Residual fit spread plot data
#'
#' Data for generating residual fit spread plot.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
#' ols_prep_rfsplot_fmdata(model)
#' ols_prep_rfsplot_rsdata(model)
#'
#' @export
#'
ols_prep_rfsplot_fmdata<- function(model) {
predicted <- fitted(model)
pred_m <- mean(predicted)
y <- predicted - pred_m
percenti <- ecdf(y)
x <- percenti(y)
tibble(x, y)
}
#' @rdname ols_prep_rfsplot_fmdata
#' @export
#'
ols_prep_rfsplot_rsdata <- function(model) {
y <- residuals(model)
residtile <- ecdf(y)
x <- residtile(y)
tibble(x, y)
}
#' Residual vs regressor plot data
#'
#' Data for generating residual vs regressor plot.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
#' ols_prep_rvsrplot_data(model)
#'
#' @export
#'
ols_prep_rvsrplot_data <- function(model) {
np <-
model %>%
coefficients() %>%
length() %>%
subtract(1)
dat <-
model %>%
model.frame() %>%
select(-1)
pnames <-
model %>%
coefficients() %>%
names() %>%
extract(-1)
list(np = np, dat = dat, pnames = pnames)
}
#' Studentized residual vs leverage plot data
#'
#' Generates data for studentized resiudual vs leverage plot.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(read ~ write + math + science, data = hsb)
#' ols_prep_rstudlev_data(model)
#'
#' @importFrom dplyr case_when
#'
#' @export
#'
ols_prep_rstudlev_data <- function(model) {
color <- NULL
leverage <-
model %>%
hatvalues() %>%
unname()
rstudent <-
model %>%
rstudent() %>%
unname()
k <-
model %>%
coefficients() %>%
length()
n <-
model %>%
model.frame() %>%
nrow()
lev_thrsh <-
2 %>%
multiply_by(k) %>%
divide_by(n)
rst_thrsh <- 2
miny <-
rstudent %>%
min() %>%
subtract(3)
maxy <-
rstudent %>%
max() %>%
add(3)
minx <- min(leverage)
maxx <- ifelse((max(leverage) > lev_thrsh), max(leverage),
(lev_thrsh + 0.05))
levrstud <-
tibble(obs = seq_len(n), leverage, rstudent) %>%
mutate(
color = case_when(
(leverage < lev_thrsh & abs(rstudent) < 2) ~ "normal",
(leverage > lev_thrsh & abs(rstudent) < 2) ~ "leverage",
(leverage < lev_thrsh & abs(rstudent) > 2) ~ "outlier",
TRUE ~ "outlier & leverage"
),
fct_color = color %>%
factor() %>%
ordered(
levels = c(
"normal", "leverage", "outlier",
"outlier & leverage"
)
)
)
list(levrstud = levrstud,
lev_thrsh = lev_thrsh,
minx = minx,
miny = miny,
maxx = maxx,
maxy = maxy
)
}
#' Studentized residual plot data
#'
#' Generates data for studentized residual plot.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(read ~ write + math + science, data = hsb)
#' ols_prep_srplot_data(model)
#'
#' @export
#'
ols_prep_srplot_data<- function(model) {
color <- NULL
dstud <-
model %>%
rstudent() %>%
unname()
obs <-
dstud %>%
length() %>%
seq_len(.)
dsr <-
tibble(obs = obs, dsr = dstud) %>%
mutate(
color = ifelse((abs(dsr) >= 3), "outlier", "normal"),
fct_color = color %>%
factor() %>%
ordered(levels = c("normal", "outlier"))
)
cminxx <-
dsr %>%
use_series(dsr) %>%
min() %>%
subtract(1) %>%
floor(.)
cmaxxx <-
dsr %>%
use_series(dsr) %>%
max() %>%
subtract(1) %>%
floor(.)
cminx <- ifelse(cminxx > -3, -3, cminxx)
cmaxx <- ifelse(cmaxxx < 3, 3, cmaxxx)
nseq <-
0 %>%
add(cminx) %>%
add(1) %>%
abs() %>%
seq_len(.) %>%
multiply_by(-1)
pseq <-
0 %>%
add(cmaxx) %>%
subtract(1) %>%
seq_len(.)
list(cminx = cminx,
cmaxx = cmaxx,
nseq = nseq,
pseq = pseq,
dsr = dsr)
}
#' Standardized residual chart data
#'
#' Generates data for standardized residual chart.
#'
#' @param model An object of class \code{lm}.
#'
#' @examples
#' model <- lm(read ~ write + math + science, data = hsb)
#' ols_prep_srchart_data(model)
#'
#' @importFrom magrittr is_greater_than
#'
#' @export
#'
ols_prep_srchart_data <- function(model) {
color <- NULL
sdres <- rstandard(model)
sdres_out <-
sdres %>%
abs() %>%
is_greater_than(2)
outlier <-
sdres %>%
extract(sdres_out)
obs <-
sdres %>%
length() %>%
seq_len(.)
tibble(obs = obs, sdres = sdres) %>%
mutate(
color = ifelse(((sdres >= 2) | (sdres <= -2)), c("outlier"), c("normal")),
fct_color = color %>%
factor() %>%
ordered(levels = c("normal", "outlier")),
txt = ifelse(color == "outlier", obs, NA)
)
}
You can’t perform that action at this time.