# 5.3.1 The Validation Set Approach

In [1]:
library(ISLR2)

In [2]:
set.seed(1)

In [3]:
train <- sample(392, 196)

In [4]:
lm.fit <- lm(mpg ~ horsepower, data = Auto, subset = train)

In [5]:
attach(Auto)

In [6]:
mean((mpg - predict(lm.fit, Auto))[-train]^2)

In [7]:
lm.fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto,
              subset = train)

In [8]:
mean((mpg - predict(lm.fit2, Auto))[-train]^2)

In [9]:
lm.fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)

In [10]:
mean((mpg - predict(lm.fit3, Auto))[-train]^2)

In [11]:
set.seed(2)

In [12]:
train <- sample(392, 196)

In [13]:
lm.fit <- lm(mpg ~ horsepower, data = Auto, subset = train)

In [14]:
mean((mpg - predict(lm.fit, Auto))[-train]^2)

In [15]:
lm.fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto,
              subset = train)

In [16]:
mean((mpg - predict(lm.fit2, Auto))[-train]^2)

In [17]:
lm.fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)

In [18]:
mean((mpg - predict(lm.fit3, Auto))[-train]^2)

# 5.3.2 Leave-One-Out Cross-Validation (LOOCV)

In [19]:
glm.fit <- glm(mpg ~ horsepower, data = Auto)

In [20]:
coef(glm.fit)

In [21]:
lm.fit <- lm(mpg ~ horsepower, data = Auto)

In [22]:
coef(lm.fit)

In [23]:
library(boot)

In [24]:
glm.fit <- glm(mpg ~ horsepower, data = Auto)

In [25]:
cv.err <- cv.glm(Auto, glm.fit)

In [26]:
cv.err$delta

In [27]:
rep(0, 10)

In [29]:
cv.error <- rep(0, 10)

In [30]:
for (i in 1:10) {
    glm.fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
    cv.error[i] <- cv.glm(Auto, glm.fit)$delta[1]
    }
cv.error

# 5.3.3. k-Fold Cross-Validation

In [31]:
set.seed(17)

In [32]:
cv.error.10 <- rep(0, 10)

In [33]:
for (i in 1:10) {
    glm.fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
    cv.error.10[i] <- cv.glm(Auto, glm.fit, K=10)$delta[1]
    }
cv.error.10

# 5.3.4 The Boostrap

In [36]:
alpha.fn <- function(data, index) {
    X <- data$X[index]
    Y <- data$Y[index]
    (var(Y) - cov(X, Y)) / (var(X) + var(Y) - 2 * cov(X, Y))
    }

In [37]:
alpha.fn(Portfolio, 1:100)

In [38]:
set.seed(7)

In [39]:
alpha.fn(Portfolio, sample(100, 100, replace=T))

In [40]:
boot(Portfolio, alpha.fn, R = 1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Portfolio, statistic = alpha.fn, R = 1000)


Bootstrap Statistics :
     original       bias    std. error
t1* 0.5758321 0.0007959475  0.08969074

In [43]:
boot.fn <- function(data, index)
    coef(lm(mpg ~ horsepower, data = data, subset = index))
boot.fn(Auto, 1:392)

In [44]:
set.seed(1)

In [45]:
boot.fn(Auto, sample(392, 392, replace=T))

In [48]:
boot.fn(Auto, sample(392, 392, replace=T))

In [49]:
boot(Auto, boot.fn, 1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
      original        bias    std. error
t1* 39.9358610  0.0522971635  0.84001614
t2* -0.1578447 -0.0006027039  0.00733243

In [50]:
summary(lm(mpg ~ horsepower, data = Auto))$coef

Unnamed: 0,Estimate,Std. Error,t value,Pr(>|t|)
(Intercept),39.935861,0.717498656,55.65984,1.2203619999999999e-187
horsepower,-0.1578447,0.006445501,-24.48914,7.031989000000001e-81


In [51]:
boot.fn <- function(data, index)
    coef(
        lm(mpg ~ horsepower + I(horsepower^2),
           data = data, subset = index)
        )

In [52]:
set.seed(1)

In [53]:
boot(Auto, boot.fn, 1000)


ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
        original        bias     std. error
t1* 56.900099702  3.511640e-02 2.0300222526
t2* -0.466189630 -7.080834e-04 0.0324241984
t3*  0.001230536  2.840324e-06 0.0001172164

In [54]:
summary(
    lm(mpg ~ horsepower + I(horsepower^2), data = Auto))$coef

Unnamed: 0,Estimate,Std. Error,t value,Pr(>|t|)
(Intercept),56.900099702,1.8004268063,31.60367,1.740911e-109
horsepower,-0.46618963,0.0311246171,-14.97816,2.289429e-40
I(horsepower^2),0.001230536,0.0001220759,10.08009,2.19634e-21
