-
Notifications
You must be signed in to change notification settings - Fork 635
/
cforest.R
230 lines (182 loc) · 8.08 KB
/
cforest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
timestamp <- Sys.time()
library(caret)
library(plyr)
library(recipes)
library(dplyr)
model <- "cforest"
#########################################################################
set.seed(2)
training <- twoClassSim(50, linearVars = 2)
testing <- twoClassSim(500, linearVars = 2)
trainX <- training[, -ncol(training)]
trainY <- training$Class
rec_cls <- recipe(Class ~ ., data = training) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors())
seeds <- vector(mode = "list", length = nrow(training) + 1)
seeds <- lapply(seeds, function(x) 1:20)
cctrl1 <- trainControl(method = "cv", number = 3, returnResamp = "all",
classProbs = TRUE,
summaryFunction = twoClassSummary,
seeds = seeds)
cctrl2 <- trainControl(method = "LOOCV",
classProbs = TRUE, summaryFunction = twoClassSummary,
seeds = seeds)
cctrl3 <- trainControl(method = "oob",
seeds = seeds)
cctrl4 <- trainControl(method = "none",
classProbs = TRUE, summaryFunction = twoClassSummary,
seeds = seeds)
cctrlR <- trainControl(method = "cv", number = 3, returnResamp = "all", search = "random")
set.seed(849)
test_class_cv_model <- train(trainX, trainY,
method = "cforest",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "cforest",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
test_class_pred <- predict(test_class_cv_model, testing[, -ncol(testing)])
test_class_prob <- predict(test_class_cv_model, testing[, -ncol(testing)], type = "prob")
test_class_pred_form <- predict(test_class_cv_form, testing[, -ncol(testing)])
test_class_prob_form <- predict(test_class_cv_form, testing[, -ncol(testing)], type = "prob")
set.seed(849)
test_class_rand <- train(trainX, trainY,
method = "cforest",
trControl = cctrlR,
tuneLength = 4)
set.seed(849)
test_class_loo_model <- train(trainX, trainY,
method = "cforest",
trControl = cctrl2,
metric = "ROC",
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
test_levels <- levels(test_class_cv_model)
if(!all(levels(trainY) %in% test_levels))
cat("wrong levels")
set.seed(849)
test_class_oob_model <- train(trainX, trainY,
method = "cforest",
trControl = cctrl3,
controls = party::cforest_unbiased(ntree = 20))
set.seed(849)
test_class_none_model <- train(trainX, trainY,
method = "cforest",
trControl = cctrl4,
tuneLength = 1,
metric = "ROC",
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
set.seed(849)
test_class_rec <- train(x = rec_cls,
data = training,
method = "cforest",
trControl = cctrl1,
metric = "ROC",
controls = party::cforest_unbiased(ntree = 20))
if(
!isTRUE(
all.equal(test_class_cv_model$results,
test_class_rec$results))
)
stop("CV weights not giving the same results")
test_class_imp_rec <- varImp(test_class_rec)
test_class_pred_rec <- predict(test_class_rec, testing[, -ncol(testing)])
test_class_prob_rec <- predict(test_class_rec, testing[, -ncol(testing)],
type = "prob")
test_class_none_pred <- predict(test_class_none_model, testing[, -ncol(testing)])
test_class_none_prob <- predict(test_class_none_model, testing[, -ncol(testing)], type = "prob")
#########################################################################
library(caret)
library(plyr)
library(recipes)
library(dplyr)
set.seed(1)
training <- SLC14_1(30)
testing <- SLC14_1(100)
trainX <- training[, -ncol(training)]
trainY <- training$y
rec_reg <- recipe(y ~ ., data = training) %>%
step_center(all_predictors()) %>%
step_scale(all_predictors())
testX <- trainX[, -ncol(training)]
testY <- trainX$y
rctrl1 <- trainControl(method = "cv", number = 3, returnResamp = "all", seeds = seeds)
rctrl2 <- trainControl(method = "LOOCV", seeds = seeds)
rctrl3 <- trainControl(method = "oob", seeds = seeds)
rctrl4 <- trainControl(method = "none", seeds = seeds)
rctrlR <- trainControl(method = "cv", number = 3, returnResamp = "all", search = "random")
set.seed(849)
test_reg_cv_model <- train(trainX, trainY,
method = "cforest",
trControl = rctrl1,
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
test_reg_pred <- predict(test_reg_cv_model, testX)
set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "cforest",
trControl = rctrl1,
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
test_reg_pred_form <- predict(test_reg_cv_form, testX)
set.seed(849)
test_reg_rand <- train(trainX, trainY,
method = "cforest",
trControl = rctrlR,
tuneLength = 4)
set.seed(849)
test_reg_loo_model <- train(trainX, trainY,
method = "cforest",
trControl = rctrl2,
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
set.seed(849)
test_reg_oob_model <- train(trainX, trainY,
method = "cforest",
trControl = rctrl3,
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
set.seed(849)
test_reg_none_model <- train(trainX, trainY,
method = "cforest",
trControl = rctrl4,
tuneLength = 1,
preProc = c("center", "scale"),
controls = party::cforest_unbiased(ntree = 20))
test_reg_none_pred <- predict(test_reg_none_model, testX)
set.seed(849)
test_reg_rec <- train(x = rec_reg,
data = training,
method = "cforest",
trControl = rctrl1,
controls = party::cforest_unbiased(ntree = 20))
if(
!isTRUE(
all.equal(test_reg_cv_model$results,
test_reg_rec$results))
)
stop("CV weights not giving the same results")
test_reg_imp_rec <- varImp(test_reg_rec)
test_reg_pred_rec <- predict(test_reg_rec, testing[, -ncol(testing)])
#########################################################################
test_class_predictors1 <- predictors(test_class_cv_model)
test_reg_predictors1 <- predictors(test_reg_cv_model)
#########################################################################
test_class_imp <- varImp(test_class_cv_model)
test_class_imp <- varImp(test_reg_cv_model)
#########################################################################
tests <- grep("test_", ls(), fixed = TRUE, value = TRUE)
sInfo <- sessionInfo()
timestamp_end <- Sys.time()
save(list = c(tests, "sInfo", "timestamp", "timestamp_end"),
file = file.path(getwd(), paste(model, ".RData", sep = "")))
if(!interactive())
q("no")