-
Notifications
You must be signed in to change notification settings - Fork 5
/
boston2.R
37 lines (31 loc) · 1.19 KB
/
boston2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
library(MASS) # For Boston
library(aglm)
## Read data
xy <- Boston # xy is a data.frame to be processed.
colnames(xy)[ncol(xy)] <- "y" # Let medv be the objective variable, y.
## Split data into train and test
n <- nrow(xy) # Sample size.
set.seed(2018) # For reproducibility.
test.id <- sample(n, round(n/4)) # ID numbders for test data.
test <- xy[test.id,] # test is the data.frame for testing.
train <- xy[-test.id,] # train is the data.frame for training.
x <- train[-ncol(xy)]
y <- train$y
newx <- test[-ncol(xy)]
y_true <- test$y
## Create bins
bins_list <- make.bins(x[, colnames(x) != "chas"])
bins_names <- colnames(x)[colnames(x) != "chas"]
## Set chas and rad variables as factors
x$chas <- as.factor(x$chas)
x$rad <- as.ordered(x$rad)
newx$chas <- factor(newx$chas, levels=levels(x$chas))
newx$rad <- ordered(newx$rad, levels=levels(x$rad))
## Select the best lambda
lambda.min <- cv.aglm(x, y, bins_list=bins_list, bins_names=bins_names)$lambda.min
cat("lambda.min: ", lambda.min, "\n")
## Predict y for newx
model <- aglm(x, y, lambda=lambda.min, bins_list=bins_list, bins_names=bins_names)
y_pred <- predict(model, newx=newx)
cat("RMSE: ", sqrt(mean((y_true - y_pred)^2)), "\n")
plot(y_pred, y_true)