# kolaczyk/sand

Fetching contributors…
Cannot retrieve contributors at this time
316 lines (278 sloc) 7.95 KB
 # SAND with R, chapter6.tex # CHUNK 1 library(sand) data(lazega) A <- get.adjacency(lazega) v.attrs <- get.data.frame(lazega, what="vertices") # CHUNK 2 library(ergm) # Will load package 'network' as well. lazega.s <- network::as.network(as.matrix(A), directed=FALSE) network::set.vertex.attribute(lazega.s, "Office", v.attrs\$Office) network::set.vertex.attribute(lazega.s, "Practice", v.attrs\$Practice) network::set.vertex.attribute(lazega.s, "Gender", v.attrs\$Gender) network::set.vertex.attribute(lazega.s, "Seniority", v.attrs\$Seniority) # CHUNK 3 my.ergm.bern <- formula(lazega.s ~ edges) my.ergm.bern lazega.s ~ edges # CHUNK 4 summary.statistics(my.ergm.bern) # --- ## edges ## 115 # --- # CHUNK 5 my.ergm <- formula(lazega.s ~ edges + kstar(2) + kstar(3) + triangle) summary.statistics(my.ergm) # --- ## edges kstar2 kstar3 triangle ## 115 926 2681 120 # --- # CHUNK 6 my.ergm <- formula(lazega.s ~ edges + gwesp(1, fixed=TRUE)) summary.statistics(my.ergm) # --- ## edges gwesp.fixed.1 ## 115.0000 213.1753 # --- # CHUNK 7 lazega.ergm <- formula(lazega.s ~ edges + gwesp(log(3), fixed=TRUE) + nodemain("Seniority") + nodemain("Practice") + match("Practice") + match("Gender") + match("Office")) # CHUNK 8 set.seed(42) lazega.ergm.fit <- ergm(lazega.ergm) # CHUNK 9 anova.ergm(lazega.ergm.fit) # --- ## Analysis of Variance Table ## ## Model 1: lazega.s ~ edges + gwesp(log(3), fixed = TRUE) + ## nodemain("Seniority") + nodemain("Practice") + ## match("Practice") + match("Gender") + ## match("Office") ## Df Deviance Resid. Df Resid. Dev Pr(>|Chisq|) ## NULL 630 0.00 ## Model 1: 7 -458.86 623 458.86 < 2.2e-16 ## ## NULL ## Model 1: *** ## --- ## Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 # --- # CHUNK 10 summary.ergm(lazega.ergm.fit) # --- ## ========================== ## Summary of model fit ## ========================== ## ## Formula: lazega.s ~ edges + gwesp(log(3), fixed = TRUE) + ## nodemain("Seniority") + nodemain("Practice") + ## match("Practice") + match("Gender") + ## match("Office") ## ## Iterations: 20 ## ## Monte Carlo MLE Results: ## Estimate Std. Error MCMC % ## edges -6.98047 0.72739 0 ## gwesp.fixed.1.09861228866811 0.58967 0.08786 0 ## nodecov.Seniority 0.02442 0.00675 0 ## nodecov.Practice 0.39538 0.11013 0 ## nodematch.Practice 0.76438 0.20055 0 ## nodematch.Gender 0.72110 0.25167 0 ## nodematch.Office 1.16155 0.19498 0 ## p-value ## edges < 1e-04 *** ## gwesp.fixed.1.09861228866811 < 1e-04 *** ## nodecov.Seniority 0.000321 *** ## nodecov.Practice 0.000357 *** ## nodematch.Practice 0.000152 *** ## nodematch.Gender 0.004308 ** ## nodematch.Office < 1e-04 *** ## --- ## Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 ## ## Null Deviance: 873.4 on 630 degrees of freedom ## Residual Deviance: 458.9 on 623 degrees of freedom ## ## AIC: 472.9 BIC: 504 (Smaller is better.) # --- # CHUNK 11 gof.lazega.ergm <- gof(lazega.ergm.fit) # CHUNK 12 par(mfrow=c(1, 3)) plot(gof.lazega.ergm) # CHUNK 13 library(mixer) setSeed(42) fblog.sbm <- mixer(as.matrix(get.adjacency(fblog)), qmin=2, qmax=15) # CHUNK 14 fblog.sbm.output <- getModel(fblog.sbm) names(fblog.sbm.output) # --- ## [1] "q" "criterion" "alphas" "Pis" ## [5] "Taus" # --- # CHUNK 15 fblog.sbm.output\$q # --- ## [1] 12 # --- # CHUNK 16 fblog.sbm.output\$alphas # --- ## [1] 0.15294139 0.13007188 0.12307831 0.05729167 ## [5] 0.13581585 0.03123927 0.09967103 0.09795210 ## [9] 0.01041667 0.02088946 0.12500738 0.01562500 # --- # CHUNK 17 fblog.sbm.output\$Taus[, 1:3] # --- ## [,1] [,2] [,3] ## [1,] 9.999820e-01 9.162358e-04 9.999910e-01 ## [2,] 1.182601e-05 1.000000e-10 5.169635e-07 ## [3,] 4.702876e-06 9.990596e-01 8.427162e-06 ## [4,] 1.000000e-10 1.000000e-10 1.000000e-10 ## [5,] 1.094414e-06 1.000000e-10 5.707788e-09 ## [6,] 1.000000e-10 1.000000e-10 1.000000e-10 ## [7,] 3.451962e-07 2.418009e-05 4.619964e-08 ## [8,] 1.000000e-10 1.000000e-10 1.000000e-10 ## [9,] 1.000000e-10 1.000000e-10 1.000000e-10 ## [10,] 1.000000e-10 1.000000e-10 1.000000e-10 ## [11,] 4.531089e-09 1.000000e-10 1.000000e-10 ## [12,] 1.000000e-10 1.000000e-10 1.000000e-10 # --- # CHUNK 18 my.ent <- function(x) { -sum(x*log(x, 2)) } apply(fblog.sbm.output\$Taus[, 1:3], 2, my.ent) # --- ## [1] 0.0003319527 0.0109735939 0.0001671334 # --- # CHUNK 19 log(fblog.sbm.output\$q, 2) # --- ## [1] 3.584963 # --- # CHUNK 20 summary(apply(fblog.sbm.output\$Taus, 2, my.ent)) # --- ## Min. 1st Qu. Median Mean 3rd Qu. ## 0.0000000 0.0000000 0.0000003 0.0343200 0.0006172 ## Max. ## 1.0100000 # --- # CHUNK 21 plot(fblog.sbm, classes=as.factor(V(fblog)\$PolParty)) # CHUNK 22 summary(lazega) # --- ## IGRAPH UN-- 36 115 -- ## attr: name (v/c), Seniority (v/n), Status (v/n), ## Gender (v/n), Office (v/n), Years (v/n), Age ## (v/n), Practice (v/n), School (v/n) # --- # CHUNK 23 library(eigenmodel) A <- get.adjacency(lazega, sparse=FALSE) lazega.leig.fit1 <- eigenmodel_mcmc(A, R=2, S=11000, burn=10000) # CHUNK 24 same.prac.op <- v.attr.lazega\$Practice %o% v.attr.lazega\$Practice same.prac <- matrix(as.numeric(same.prac.op %in% c(1, 4, 9)), 36, 36) same.prac <- array(same.prac,dim=c(36, 36, 1)) # CHUNK 25 lazega.leig.fit2 <- eigenmodel_mcmc(A, same.prac, R=2, S=11000,burn=10000) # CHUNK 26 same.off.op <- v.attr.lazega\$Office %o% v.attr.lazega\$Office same.off <- matrix(as.numeric(same.off.op %in% c(1, 4, 9)), 36, 36) same.off <- array(same.off,dim=c(36, 36, 1)) lazega.leig.fit3 <- eigenmodel_mcmc(A, same.off, R=2, S=11000, burn=10000) # CHUNK 27 lat.sp.1 <- eigen(lazega.leig.fit1\$ULU_postmean)\$vec[, 1:2] lat.sp.2 <- eigen(lazega.leig.fit2\$ULU_postmean)\$vec[, 1:2] lat.sp.3 <- eigen(lazega.leig.fit3\$ULU_postmean)\$vec[, 1:2] # CHUNK 28 colbar <- c("red", "dodgerblue", "goldenrod") v.colors <- colbar[V(lazega)\$Office] v.shapes <- c("circle", "square")[V(lazega)\$Practice] v.size <- 3.5*sqrt(V(lazega)\$Years) plot(lazega, layout=lat.sp.1, vertex.color=v.colors, vertex.shape=v.shapes, vertex.size=v.size) # CHUNK 29 apply(lazega.leig.fit1\$L_postsamp, 2, mean) # --- ## [1] 0.2603655 1.0384032 # --- apply(lazega.leig.fit2\$L_postsamp, 2, mean) # --- ## [1] 0.8898394 -0.1156671 # --- apply(lazega.leig.fit3\$L_postsamp, 2, mean) # --- ## [1] 0.5970403 0.3112896 # --- # CHUNK 30 perm.index <- sample(1:630) nfolds <- 5 nmiss <- 630/nfolds Avec <- A[lower.tri(A)] Avec.pred1 <- numeric(length(Avec)) # CHUNK 31 for(i in seq(1,nfolds)){ # Index of missing values. miss.index <- seq(((i-1) * nmiss + 1), (i*nmiss), 1) A.miss.index <- perm.index[miss.index] # Fill a new Atemp appropriately with NA's. Avec.temp <- Avec Avec.temp[A.miss.index] <- rep("NA", length(A.miss.index)) Avec.temp <- as.numeric(Avec.temp) Atemp <- matrix(0, 36, 36) Atemp[lower.tri(Atemp)] <- Avec.temp Atemp <- Atemp + t(Atemp) # Now fit model and predict. Y <- Atemp model1.fit <- eigenmodel_mcmc(Y, R=2, S=11000, burn=10000) model1.pred <- model1.fit\$Y_postmean model1.pred.vec <- model1.pred[lower.tri(model1.pred)] Avec.pred1[A.miss.index] <- model1.pred.vec[A.miss.index] } # CHUNK 32 library(ROCR) pred1 <- prediction(Avec.pred1, Avec) perf1 <- performance(pred1, "tpr", "fpr") plot(perf1, col="blue", lwd=3) # CHUNK 33 perf1.auc <- performance(pred1, "auc") slot(perf1.auc, "y.values") # --- ## [[1]] ## [1] 0.8246264 # ---