title: "Logistische Regression"
jupyter: ir
---


In [None]:
#| include: false
req_pkg <- c("MedDataSets", "data.table", "performance", "tinyplot")
for (r in req_pkg) {
  if (!(r %in% installed.packages()[, "Package"])) {
    install.packages(r)
  }
}


In [None]:
library(MedDataSets); data("bdims_tbl_df")
dataset <- bdims_tbl_df |> data.table::data.table() # Daten einlesen
use_cols <- c("age", "wgt", "hgt", "sex") # Relevante Spalten definieren
# Relevante Spalten filntern, Ausreißer und fehlende Werte entfernen
dataset <- dataset[hgt < 187 & wgt < 100, .SD, .SDcols = use_cols] |> na.omit();
dataset[, ("sex") := as.factor(sex)] # Datentyp ändern

str(dataset) # Übersicht über den Datensatz


In [None]:
# Ausgabe der ersten 10 Zeilen
dataset[1:10, ]


In [None]:
#| out-width: 80%
#| fig-align: center
library(tinyplot)
tinyplot(hgt ~ wgt, data = dataset)
# cor(dataset$hgt, dataset$wgt) |> round(2) # -> 0.69


In [None]:
m1 <- lm(hgt ~ wgt, data = dataset)
m1


In [None]:
summary(m1)


In [None]:
#| out-width: 80%
#| fig-align: center
tinyplot(hgt ~ wgt, data = dataset, palette = "Okabe-Ito",
         main = "Körpergröße in Abhängigkeit des Körpergewichts")
tinyplot(hgt ~ wgt, data = dataset, palette = "dark3",
         type = "lm", add = TRUE)


In [None]:
# RMSE
sigma(m1)


In [None]:
# MSE
sigma(m1)^2


In [None]:
#| out-width: 80%
#| fig-align: center
res1 <- residuals(m1)
tinyplot(dataset$hgt[order(dataset$hgt)], res1[order(dataset$hgt)],
         xlab = "Height", ylab = "Residuals")


In [None]:
#| out-width: 80%
#| fig-align: center
res2 <- dataset$hgt - predict(m1, dataset)
tinyplot(dataset$hgt[order(dataset$hgt)], res2[order(dataset$hgt)],
         xlab = "Height", ylab = "Residuals")


In [None]:
#| out-width: 80%
#| fig-align: center
m2 <- lm(hgt ~ wgt + sex, data = dataset)

res3 <- residuals(m2)
tinyplot(dataset$hgt[order(dataset$hgt)], res3[order(dataset$hgt)],
         xlab = "Height", ylab = "Residuals")


In [None]:
summary(m2)


In [None]:
# die "residual sum of squares" beträgt:
(dataset$hgt - predict(m2, dataset))^2 |>
  sum()

# bzw.
deviance(m2)


In [None]:
# das Hinzufügen des Geschlechts hat
# eine leichte Verbesserung gegenüber
# dem ersten Modell gebracht:
deviance(m1)


In [None]:
#| out-width: 80%
#| fig-align: center
tinyplot(hgt ~ wgt | sex, data = dataset,
  facet = "by", palette = "Okabe-Ito",
  main = "Actual and Predicted Height")
tinyplot(hgt ~ wgt | sex, data = dataset,
  facet = "by", palette = "dark3", type = "lm", add = TRUE)


In [None]:
# 95%-Konfidenzintervalle
confint(m1, level = 0.95)


In [None]:
m3 <- lm(hgt ~ sex, data = dataset)
summary(m3)


In [None]:
anova(m3)


In [None]:
anova(m1, m2)


In [None]:
plot(performance::check_model(
  m1, panel = FALSE, check = "ncv"
))


In [None]:
plot(performance::check_model(
  m1, panel = FALSE, check = "qq"
))