-
Notifications
You must be signed in to change notification settings - Fork 2
/
rr.jl
142 lines (129 loc) · 3.92 KB
/
rr.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
rr(X, Y; kwargs...)
rr(X, Y, weights::Weight; kwargs...)
rr!(X::Matrix, Y::Matrix, weights::Weight; kwargs...)
Ridge regression (RR) implemented by SVD factorization.
* `X` : X-data (n, p).
* `Y` : Y-data (n, q).
* `weights` : Weights (n) of the observations.
Must be of type `Weight` (see e.g. function `mweight`).
Keyword arguments:
* `lb` : Ridge regularization parameter "lambda".
* `scal` : Boolean. If `true`, each column of `X`
is scaled by its uncorrected standard deviation.
## References
Cule, E., De Iorio, M., 2012. A semi-automatic method
to guide the choice of ridge parameter in ridge regression.
arXiv:1205.0686.
Hastie, T., Tibshirani, R., 2004. Efficient quadratic
regularization for expression arrays. Biostatistics 5, 329-340.
https://doi.org/10.1093/biostatistics/kxh010
Hastie, T., Tibshirani, R., Friedman, J., 2009. The
elements of statistical learning: data mining,
inference, and prediction, 2nd ed. Springer, New York.
Hoerl, A.E., Kennard, R.W., 1970. Ridge Regression: Biased
Estimation for Nonorthogonal Problems. Technometrics 12, 55-67.
https://doi.org/10.1080/00401706.1970.10488634
## Examples
```julia
using JchemoData, JLD2, CairoMakie
path_jdat = dirname(dirname(pathof(JchemoData)))
db = joinpath(path_jdat, "data/cassav.jld2")
@load db dat
pnames(dat)
X = dat.X
y = dat.Y.tbc
year = dat.Y.year
tab(year)
s = year .<= 2012
Xtrain = X[s, :]
ytrain = y[s]
Xtest = rmrow(X, s)
ytest = rmrow(y, s)
lb = 1e-3
mod = model(rr; lb)
#mod = model(rrchol; lb)
fit!(mod, Xtrain, ytrain)
pnames(mod)
pnames(mod.fm)
coef(mod)
res = predict(mod, Xtest)
@head res.pred
@show rmsep(res.pred, ytest)
plotxy(res.pred, ytest; color = (:red, .5), bisect = true, xlabel = "Prediction",
ylabel = "Observed").f
## Only for function 'rr' (not for 'rrchol')
coef(mod; lb = 1e-1)
res = predict(mod, Xtest; lb = [.1 ; .01])
@head res.pred[1]
@head res.pred[2]
```
"""
function rr(X, Y; kwargs...)
Q = eltype(X[1, 1])
weights = mweight(ones(Q, nro(X)))
rr(X, Y, weights; kwargs...)
end
function rr(X, Y, weights::Weight; kwargs...)
rr!(copy(ensure_mat(X)), copy(ensure_mat(Y)), weights; kwargs...)
end
function rr!(X::Matrix, Y::Matrix, weights::Weight; kwargs...)
par = recovkwargs(Par, kwargs)
Q = eltype(X)
p = nco(X)
sqrtw = sqrt.(weights.w)
xmeans = colmean(X, weights)
ymeans = colmean(Y, weights)
xscales = ones(Q, p)
if par.scal
xscales .= colstd(X, weights)
fcscale!(X, xmeans, xscales)
else
fcenter!(X, xmeans)
end
fcenter!(Y, ymeans)
sqrtD = Diagonal(sqrtw)
res = LinearAlgebra.svd!(sqrtD * X)
sv = res.S
TtDY = Diagonal(sv) * res.U' * (sqrtD * Y)
Rr(res.V, TtDY, sv, xmeans, xscales, ymeans, weights, kwargs, par)
end
"""
coef(object::Rr; lb = nothing)
Compute the b-coefficients of a fitted model.
* `object` : The fitted model.
* `lb` : Ridge regularization parameter
"lambda".
"""
function coef(object::Rr; lb = nothing)
isnothing(lb) ? lb = object.par.lb : nothing
lb = convert(eltype(object.sv), lb)
eig = object.sv.^2
z = 1 ./ (eig .+ lb^2)
beta = Diagonal(z) * object.TtDY
B = Diagonal(1 ./ object.xscales) * object.V * beta
int = object.ymeans' .- object.xmeans' * B
tr = sum(eig .* z)
(B = B, int = int, df = 1 + tr)
end
"""
predict(object::Rr, X; lb = nothing)
Compute Y-predictions from a fitted model.
* `object` : The fitted model.
* `X` : X-data for which predictions are computed.
* `lb` : Regularization parameter, or collection of
regularization parameters, "lambda" to consider.
"""
function predict(object::Rr, X; lb = nothing)
X = ensure_mat(X)
Q = eltype(X)
isnothing(lb) ? lb = object.par.lb : nothing
le_lb = length(lb)
pred = list(Matrix{Q}, le_lb)
@inbounds for i = 1:le_lb
z = coef(object; lb = lb[i])
pred[i] = z.int .+ X * z.B
end
le_lb == 1 ? pred = pred[1] : nothing
(pred = pred,)
end