Chapter 2 - Gaussian Bayesian Networks - learning parameters from data

The crop data <br>
G = genetic potential <br>
E = environmental potential <br>
V = vegatative mass <br>
N = number of seeds (@ flowering time) <br>
W = average weight (later in plants life) <br>
C = crop <br>

$G\sim N(50, 10^2)$ <br>
$E \sim N(50, 10^2)$ <br>
$V \mid G = g,E=e \sim N(-10.35 + 0.5g+0.70e, 5^2)$ <br>
$N \mid V = v \sim N(45 + 0.1v, 9.95^2)$<br>
$W \mid V = v \sim N(15 + 0.7v, 7.14^2)$<br>
$C \mid N = n, W = w \sim N(0.3n + 0.7w, 6.25^2)$<br>

In [None]:
library(bnlearn)
library(graph)
library(rbmn)  #install.packages("rbmn")

# From Lab 7 -- the paramaterization 
# Specify the BN (from formula --> graph)
dag.bnlearn <- model2network("[G][E][V|G:E][N|V][W|V][C|N:W]")

# Specify the distributions
disE <- list(coef = c("(Intercept)" = 50), sd = 10)
disG <- list(coef = c("(Intercept)" = 50), sd = 10)
disV <- list(coef = c("(Intercept)" = -10.35, E = .70, G = 0.5), sd = 5)
disN <- list(coef = c("(Intercept)" = 45, V = .1), sd = 9.95)
disW <- list(coef = c("(Intercept)" = 15, V = 0.7), sd = 7.14)
disC <- list(coef = c("(Intercept)" = 0, N = 0.3, W = 0.7), sd = 6.25)

# put them together for bn parameterization
dis.list <- list(E = disE, G = disG, V = disV, N = disN, W = disW, C = disC)

gbn.bnlearn <- custom.fit(dag.bnlearn, dist = dis.list)

In [None]:
# convert my bn object over to rbmn
# (by the book)

#class(gbn.bnlearn)
gbn.rbmn <- bnfit2nbn(gbn.bnlearn)
gema.rbmn <- nbn2gema(gbn.rbmn)
mn.rbmn <- gema2mn(gema.rbmn)

In [None]:
# We have a BN that is paramaterized -- we should be able to 
# simulate data from the distributions
set.seed(123)
cropdata0 <- rbn(gbn.bnlearn, n = 10) # very small sample size
cropdata1 <- rbn(gbn.bnlearn, n = 100) # moderate size
cropdata2 <- rbn(gbn.bnlearn, n = 500) # larger (but not large)

In [None]:
# Estimate paramaters for the BN network
est.para <- bn.fit(dag.bnlearn, data = cropdata1)
#est.para

#est.para$C

# compare to the "true paramaterization"
#gbn.bnlearn$C


# An aside -- look at the effect of sample size on the estimation
est.para.small <- bn.fit(dag.bnlearn, data = cropdata0)

est.para.large <- bn.fit(dag.bnlearn, data = cropdata2)

gbn.bnlearn$C
est.para.small$C
est.para$C
est.para.large$C

In [None]:
plot(dag.bnlearn)

In [None]:
# use regression to estimate the parameters
#lmC <- lm(C ~ N + W, data = cropdata1[,c("N", "W", "C")] ) #book
lmC <- lm(C ~ N + W, data = cropdata1)
lmC

est.para$C

lmV <- lm(V ~ E + G, data = cropdata1)
lmV
est.para$V


In [None]:
# grab some of the information from the linear model
lmC <- lm(C ~ N + W, data = cropdata1)
coef(lmC)
confint(lmC)

# an alternative layer to estimating C
lm(C ~ N + W - 1, data = cropdata1)