/
glm_mi.R
169 lines (165 loc) · 8.58 KB
/
glm_mi.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#' Pooling and model selection of Linear and Logistic regression
#' models across multiply imputed data.
#'
#' \code{glm_mi} Pooling and backward or forward selection of Linear and Logistic regression
#' models across multiply imputed data using selection methods RR, D1, D2, D3, D4 and MPR
#' (without use of with function).
#'
#' @param data Data frame with stacked multiple imputed datasets.
#' The original dataset that contains missing values must be excluded from the
#' dataset. The imputed datasets must be distinguished by an imputation variable,
#' specified under impvar, and starting by 1.
#' @param formula A formula object to specify the model as normally used by glm.
#' See under "Details" and "Examples" how these can be specified. If a formula
#' object is used set predictors, cat.predictors, spline.predictors or int.predictors
#' at the default value of NULL.
#' @param nimp A numerical scalar. Number of imputed datasets. Default is 5.
#' @param impvar A character vector. Name of the variable that distinguishes the
#' imputed datasets.
#' @param keep.predictors A single string or a vector of strings including the variables that are forced
#' in the model during predictor selection. All type of variables are allowed.
#' @param p.crit A numerical scalar. P-value selection criterium. A value of 1
#' provides the pooled model without selection.
#' @param method A character vector to indicate the pooling method for p-values to pool the
#' total model or used during predictor selection. This can be "RR", D1", "D2", "D3", "D4", or "MPR".
#' See details for more information. Default is "RR".
#' @param direction The direction of predictor selection, "BW" means backward selection and "FW"
#' means forward selection.
#' @param model_type A character vector for type of model, "binomial" is for logistic regression and
#' "linear" is for linear regression models.
#'
#' @details The basic pooling procedure to derive pooled coefficients, standard errors, 95
#' confidence intervals and p-values is Rubin's Rules (RR). However, RR is only possible when
#' the model includes continuous and dichotomous variables. Specific procedures are
#' available when the model also included categorical (> 2 categories) or restricted cubic spline
#' variables. These pooling methods are: “D1” is pooling of the total covariance matrix,
#' ”D2” is pooling of Chi-square values, “D3” and "D4" is pooling Likelihood ratio statistics
#' (method of Meng and Rubin) and “MPR” is pooling of median p-values (MPR rule).
#' Spline regression coefficients are defined by using the rcs function for restricted cubic
#' splines of the rms package. A minimum number of 3 knots as defined under knots is required.
#'
#' A typical formula object has the form \code{Outcome ~ terms}. Categorical variables has to
#' be defined as \code{Outcome ~ factor(variable)}, restricted cubic spline variables as
#' \code{Outcome ~ rcs(variable, 3)}. Interaction terms can be defined as
#' \code{Outcome ~ variable1*variable2} or \code{Outcome ~ variable1 + variable2 + variable1:variable2}.
#' All variables in the terms part have to be separated by a "+". If a formula
#' object is used set predictors, cat.predictors, spline.predictors or int.predictors
#' at the default value of NULL.
#'
#'@return An object of class \code{pmods} (multiply imputed models) from
#' which the following objects can be extracted:
#' \itemize{
#' \item \code{data} imputed datasets
#' \item \code{RR_model} pooled model at each selection step
#' \item \code{RR_model_final} final selected pooled model
#' \item \code{multiparm} pooled p-values at each step according to pooling method
#' \item \code{multiparm_final} pooled p-values at final step according to pooling method
#' \item \code{multiparm_out} (only when direction = "FW") pooled p-values of removed predictors
#' \item \code{formula_step} formula object at each step
#' \item \code{formula_final} formula object at final step
#' \item \code{formula_initial} formula object at final step
#' \item \code{predictors_in} predictors included at each selection step
#' \item \code{predictors_out} predictors excluded at each step
#' \item \code{impvar} name of variable used to distinguish imputed datasets
#' \item \code{nimp} number of imputed datasets
#' \item \code{Outcome} name of the outcome variable
#' \item \code{method} selection method
#' \item \code{p.crit} p-value selection criterium
#' \item \code{call} function call
#' \item \code{model_type} type of regression model used
#' \item \code{direction} direction of predictor selection
#' \item \code{predictors_final} names of predictors in final selection step
#' \item \code{predictors_initial} names of predictors in start model
#' \item \code{keep.predictors} names of predictors that were forced in the model
#' }
#'
#' @references Eekhout I, van de Wiel MA, Heymans MW. Methods for significance testing of categorical
#' covariates in logistic regression models after multiple imputation: power and applicability
#' analysis. BMC Med Res Methodol. 2017;17(1):129.
#' @references Enders CK (2010). Applied missing data analysis. New York: The Guilford Press.
#' @references Meng X-L, Rubin DB. Performing likelihood ratio tests with multiply-imputed data sets.
#' Biometrika.1992;79:103-11.
#' @references van de Wiel MA, Berkhof J, van Wieringen WN. Testing the prediction error difference between
#' 2 predictors. Biostatistics. 2009;10:550-60.
#' @references Marshall A, Altman DG, Holder RL, Royston P. Combining estimates of interest in prognostic
#' modelling studies after multiple imputation: current practice and guidelines. BMC Med Res Methodol.
#' 2009;9:57.
#' @references Van Buuren S. (2018). Flexible Imputation of Missing Data. 2nd Edition. Chapman & Hall/CRC
#' Interdisciplinary Statistics. Boca Raton.
#' @references EW. Steyerberg (2019). Clinical Prediction MOdels. A Practical Approach
#' to Development, Validation, and Updating (2nd edition). Springer Nature Switzerland AG.
#'
#' @references http://missingdatasolutions.rbind.io/
#'
#' @author Martijn Heymans, 2021
#'
#' @examples
#' pool_lr <- glm_mi(data=lbpmilr, formula = Chronic ~ Pain +
#' factor(Satisfaction) + rcs(Tampascale,3) + Radiation +
#' Radiation*factor(Satisfaction) + Age + Duration + BMI,
#' p.crit = 0.05, direction="FW", nimp=5, impvar="Impnr",
#' keep.predictors = c("Radiation*factor(Satisfaction)", "Age"),
#' method="D1", model_type="binomial")
#'
#' pool_lr$RR_model_final
#'
#' @export
glm_mi <- function(data,
formula = NULL,
nimp=5,
impvar=NULL,
keep.predictors=NULL,
p.crit=1,
method="RR",
direction=NULL,
model_type=NULL)
{
call <- match.call()
m_check <- check_model(data=data, formula = formula,
keep.predictors = keep.predictors,
impvar=impvar, p.crit=p.crit, method=method,
nimp=nimp, direction=direction,
model_type=model_type)
P <- m_check$P
keep.P <- m_check$keep.P
Outcome <- m_check$Outcome
if(p.crit==1){
if(model_type=="binomial")
pobjpool <-
glm_lr_bw(data = data, nimp=nimp, impvar = impvar, Outcome= Outcome,
P = P, p.crit = p.crit, method = method, keep.P = keep.P)
if(model_type=="linear")
pobjpool <-
glm_lm_bw(data = data, nimp=nimp, impvar = impvar, Outcome= Outcome,
P = P, p.crit = p.crit, method = method, keep.P = keep.P)
class(pobjpool) <-
"pmods"
return(pobjpool)
}
if(direction=="FW"){
if(model_type=="binomial")
pobjfw <-
glm_lr_fw(data = data, nimp = nimp, impvar = impvar, Outcome = Outcome, p.crit = p.crit,
P = P, keep.P = keep.P, method = method)
if(model_type=="linear")
pobjfw <-
glm_lm_fw(data = data, nimp = nimp, impvar = impvar, Outcome = Outcome, p.crit = p.crit,
P = P, keep.P = keep.P, method = method)
class(pobjfw) <-
"pmods"
return(pobjfw)
}
if(direction=="BW"){
if(model_type=="binomial")
pobjbw <-
glm_lr_bw(data = data, nimp = nimp, impvar = impvar, Outcome = Outcome, p.crit = p.crit,
P = P, keep.P = keep.P, method = method)
if(model_type=="linear")
pobjbw <-
glm_lm_bw(data = data, nimp = nimp, impvar = impvar, Outcome = Outcome, p.crit = p.crit,
P = P, keep.P = keep.P, method = method)
class(pobjbw) <-
"pmods"
return(pobjbw)
}
}