From 75b7f3453f4fd92bb0a014252071256d3a9be060 Mon Sep 17 00:00:00 2001
From: Thomas Manke <manke@ie-freiburg.mpg.de>
Date: Fri, 5 Apr 2024 15:29:08 +0200
Subject: [PATCH] 05_DataModel.qmd: added plot

---
 qmd/05_DataModels.qmd | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/qmd/05_DataModels.qmd b/qmd/05_DataModels.qmd
index cdefcc5..d9478fe 100644
--- a/qmd/05_DataModels.qmd
+++ b/qmd/05_DataModels.qmd
@@ -167,17 +167,26 @@ anova(fit)
 Determine residual standard error `sigma` for different fits with various complexity
 
 ```{r model_comp}
-fit=lm(Petal.Width ~ Petal.Length, data=iris)
-paste(sigma(fit), deparse(formula(fit)))
-
-fit=lm(Petal.Width ~ Petal.Length + Sepal.Length, data=iris)  # function of more than one variable
-paste(sigma(fit), deparse(formula(fit)))
-
-fit=lm(Petal.Width ~ Species, data=iris)                      # function of categorical variables
-paste(sigma(fit), deparse(formula(fit)))
-
-fit=lm(Petal.Width ~ . , data=iris)                           # function of all other variable (numerical and categorical)
-paste(sigma(fit), deparse(formula(fit)))
+# A list of formulae
+formula_list = list(
+  Petal.Width ~ Petal.Length,                 # as before (single variable)
+  Petal.Width ~ Petal.Length + Sepal.Length,  # function of more than one variable
+  Petal.Width ~ Species,                      # function of categorical variables
+  Petal.Width ~ .                             # function of all other variable (numerical and categorical)
+)
+
+sig=c()
+for (f in formula_list) {
+  fit = lm(f, data=iris)
+  sig = c(sig, sigma(fit))
+  print(paste(sigma(fit), format(f)))
+}
+
+# more concise loop using lapply/sapply
+# sig = sapply(lapply(formula_list, lm, data=iris), sigma)
+
+par(mar=c(4,20,2,2))
+barplot(sig ~ format(formula_list), horiz=TRUE, las=2, ylab="", xlab="sigma")
 ```
 
 ... more complex models tend to have smaller residual standard error (overfitting?)