In [None]:
file_path  <- "./DatosControlRL2022-1.csv"
has_header <- TRUE
separator  <- ","

library(lmtest)
library(car)

datos_df <- read.csv(file_path, header = has_header, sep = separator)

cat("C =", colnames(datos_df), "\n")
cat("F =", nrow(datos_df), "\n")
head(datos_df)
summary(datos_df)

In [None]:
x_label <- "youtube"
y_label <- "nota"
alpha   <- 0.05

n <- nrow(datos_df)
x <- datos_df[[x_label]]
y <- datos_df[[y_label]]

lmodel <- lm(as.formula(paste(y_label, "~", x_label)), data = datos_df)
lmodel

In [None]:
x_mean <- mean(x)
y_mean <- mean(y)
sxx    <- sum((x - x_mean)^2)
syy    <- sum((y - y_mean)^2)
sxy    <- sum((x - x_mean) * (y - y_mean))
b1     <- sxy / sxx
b0     <- y_mean - b1 * x_mean
y_hat  <- b0 + b1 * x
scr    <- sum((y_hat - y_mean)^2)
sce    <- sum((y - y_hat)^2)
sct    <- sum((y - y_mean)^2)
r      <- sxy / sqrt(sxx * syy)
r2     <- scr / sct
f      <- (scr / 1) / (sce / (20 - 2))

cat(y_label, "v/s", x_label, "\n")
cat("b0 =", b0, "\n")
cat("b1 =", b1, "\n")
cat("r  =", r, "\n")
cat("r2 =", r2, "\n")
cat("f  =", f, "\n")

In [None]:
lmodel_s <- summary(lmodel)
lmodel_r <- lmodel$residuals

b0 <- lmodel$coefficients[[1]]
b1 <- lmodel$coefficients[[2]]
r2 <- lmodel_s$r.squared
r  <- sqrt(r2)
f  <- lmodel_s$fstatistic["value"]

lmodel_s

In [None]:
lmodel_av   <- aov(lmodel)
lmodel_av_s <- summary(lmodel_av)

av_s <- lmodel_av_s[[1]][["F value"]][1]
av_p <- lmodel_av_s[[1]][["Pr(>F)"]][1]
av_f <- qf(1 - alpha, 1, n - 2)
av_t <- av_p >= 0.05

lmodel_av_s

In [None]:
lmodel_dw <- dwtest(lmodel)

dw_s <- lmodel_dw$statistic
dw_p <- lmodel_dw$p.value
dw_t <- dw_p >= 0.05

lmodel_dw

In [None]:
lmodel_ks <- ks.test(lmodel_r, "pnorm", mean = mean(lmodel_r), sd = sd(lmodel_r))

dn <- function(n) {
    if (n == 10) {result <- 0.41}
    else if (n == 15) {result <- 0.34}
    else if (n == 20) {result <- 0.29}
    else if (n >= 50) {result <- 1.36 / sqrt(n)}
    return(result)
}

ks_s <- lmodel_ks$statistic
ks_p <- lmodel_ks$p.value
ks_d <- dn(n)
ks_t <- ks_p >= 0.05

lmodel_ks

In [None]:
cat(y_label, "v/s", x_label, "\n")
cat("b0 =", b0, "\nb1 =", b1, "\n")
cat("r  =", r, "\nr2 =", r2, "\n")
cat("AV =", av_s, "\n F =", av_f, "\n p =", av_p, av_t, "\n")
cat("DW =", dw_s, "\n p =", dw_p, dw_t, "\n")
cat("KS =", ks_s, "\n D =", ks_d, "\n p =", ks_p, ks_t, "\n")

plot(x, y, pch = 16, col = "blue", xlab = x_label, ylab = y_label)
points(x, y_hat, pch = 16, col = "red")
segments(x, y_hat, x, y, col = "black", lty = 2)
abline(lmodel, col = "red")

In [None]:
errs   <- y - y_hat
var    <- sum(errs^2) / (n - 2)
b0_var <- (var * sum(x^2)) / (n * sxx)
b1_var <- var / sxx
b0_inf <- b0 - qt(1 - (alpha / 2), n - 2) * sqrt(b0_var)
b0_sup <- b0 + qt(1 - (alpha / 2), n - 2) * sqrt(b0_var)
b1_inf <- b1 - qt(1 - (alpha / 2), n - 2) * sqrt(b1_var)
b1_sup <- b1 + qt(1 - (alpha / 2), n - 2) * sqrt(b1_var)

cat("b0_c =", b0, "+-", (b0 - b0_inf), "=> [" , b0_inf, ",", b0_sup, "]\n")
cat("b1_c =", b1, "+-", (b1 - b1_inf), "=> [" , b1_inf, ",", b1_sup, "]\n")

In [None]:
y_p <- predict(lmodel, data.frame(asistencia = 30))

cat("y_p =", y_p, "\n")

In [None]:
found <- subset(datos_df, youtube == 1)

conf_mode <- if(nrow(found) == 0) "prediction" else "confidence"
conf_interv <- predict(lmodel, data.frame(youtube = 1), interval = conf_mode)

y_p   <- conf_interv[1]
y_inf <- conf_interv[2]
y_sup <- conf_interv[3]

found
conf_interv
cat("y_c =", y_p, "+-", (y_p - y_inf), "=> [", y_inf, ",", y_sup, "] (", conf_mode, ")\n")