#### LICENSE
These notes are released under the 
"Creative Commons Attribution-ShareAlike 4.0 International" license. 
See the **human-readable version** [here](https://creativecommons.org/licenses/by-sa/4.0/)
and the **real thing** [here](https://creativecommons.org/licenses/by-sa/4.0/legalcode). 

## Ridge regression 

### Standardization of responses and explanatory variables

Penalty does not include the intercept. Magnitude (amount) of regularization
may depend on the scale of the covariates (because the magnitude of the 
regression coefficients depends on the "size" of the features). 

We need to standardize the input variables. Otherwise, the results may be
determined by the size of one of the explanatory variables (which is 
arbitrary and can be modified without changing the model). 


In [None]:
library(glmnet)
data(alcohol, package='robustbase')

x <- model.matrix(logSolubility ~ ., data=alcohol)
x <- x[, -1]
y <- alcohol$logSolubility
lambdas <- exp( seq(-20, 10, length=200))

### w/o standardization
set.seed(123)
a <- cv.glmnet(x=x, y=y, family='gaussian', alpha=0, lambda=lambdas, standardize = FALSE)
x[, 5] <- x[, 5] / 100000
set.seed(123)
b <- cv.glmnet(x=x, y=y, family='gaussian', alpha=0, lambda=lambdas, standardize = FALSE)
# round(cbind(coef(b, s = 'lambda.1se'), coef(b, s='lambda.min')), 4)
round(cbind(coef(a, s='lambda.min'), coef(b, s='lambda.min')), 4)
c(a$lambda.min, b$lambda.min)


### w/standardization
x <- model.matrix(logSolubility ~ ., data=alcohol)
x <- x[, -1]
y <- alcohol$logSolubility
lambdas <- exp( seq(-20, 10, length=200))
set.seed(123)
a2 <- cv.glmnet(x=x, y=y, family='gaussian', alpha=0, lambda=lambdas, standardize = TRUE)
x[, 5] <- x[, 5] / 100000
set.seed(123)
b2 <- cv.glmnet(x=x, y=y, family='gaussian', alpha=0, lambda=lambdas, standardize = TRUE)
round(cbind(coef(a2, s='lambda.min'), coef(b2, s='lambda.min'), coef(b, s='lambda.min')), 4)
c(a2$lambda.min, b2$lambda.min, b$lambda.min)
plot(a)
plot(b)
plot(a2)
plot(b2)