Add blockmodel vignette once requisite models have been implemented #14

alexpghayes · 2021-02-19T23:20:40Z

---
title: "extended-blockmodel-examples"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{extended-blockmodel-examples}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

library(fastRG)

Some blockmodel examples

WARNING: content below here is rough!

K <- 10
n <- 500
pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)

dcsbm <- dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50)
A <- sample_sparse(dcsbm)
A
mean(rowSums(A))  # average degree

If we remove multiple edges, the expected_degree parameter is not trustworthy:

bernoulli_dcsbm <- dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50)
A <- sample_sparse(bernoulli_dcsbm, poisson_edges = FALSE)
mean(rowSums(A))

but it is a good upper bound when the graph is sparse:

n <- 10000
A <- dcsbm(theta = rexp(n), pi = pi, B = B, expected_degree = 50, poisson_edges = FALSE)
mean(rowSums(A))

This draws a 100 x 100 adjacency matrix from each model. Each image might take around 5 seconds to render.

# helper to plot adjacency matrices

plot_matrix <- function(A) {
  image(as.matrix(A), col = grey(seq(1, 0, len = 20)))
}

K <- 10
n <- 100
pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)
A <- sample_sparse(dcsbm(theta = theta, pi = pi, B = B, expected_degree = 50))

plot_matrix(A[, n:1])

K <- 2
n <- 100
alpha <- c(1, 1) / 5
B <- diag(c(1, 1))
theta <- n
A <- dc_mixed(theta, alpha, B, expected_degree = 50, sort_nodes = TRUE)

plot_matrix(A[, theta:1] / max(A))

n <- 100
K <- 2
pi <- c(.7, .7)
B <- diag(c(1, 1))
theta <- n
A <- dc_overlapping(theta, pi, B, expected_degree = 50, sort_nodes = TRUE)

plot_matrix(t(A[, 1:n] / max(A)))

K <- 10
n <- 100
pi <- rexp(K) + 1
pi <- pi / sum(pi)
B <- matrix(rexp(K^2), nrow = K)
B <- B / (3 * max(B))
diag(B) <- diag(B) + mean(B) * K
A <- fastRG::sbm(n, pi, B, sort_nodes = TRUE)

plot_matrix(A)

Next sample a DC-SBM with 10,000 nodes. Then compute and plot the leading eigenspace.

library(RSpectra)

K <- 10
n <- 10000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 20, simple = TRUE)
mean(rowSums(A))

K <- 10
n <- 10000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 20, simple = TRUE, sort_nodes = TRUE)
mean(rowSums(A))

# leading eigen of regularized Laplacian with tau = 1
D <- Diagonal(n, 1 / sqrt(rowSums(A) + 1))
ei <- eigs_sym(D %*% A %*% D, 10)

# normalize the rows of X:
X <- t(apply(ei$vectors[, 1:K], 1, function(x) return(x / sqrt(sum(x^2) + 1 / n))))

par(mfrow = c(5, 1), mar = c(1, 2, 2, 2), xaxt = "n", yaxt = "n")

# plot 1000 elements of the leading eigenvectors:
s <- sort(sample(n, 1000))

for (i in 1:5) {
  plot(X[s, i], pch = ".")
}

or

# This samples a 1M node graph.
# Depending on the computer, sampling the graph should take between 10 and 30 seconds
#   Then, taking the eigendecomposition of the regularized graph laplacian should take between 1 and 3 minutes
# The resulting adjacency matrix is a bit larger than 100MB.
# The leading eigenvectors of A are highly localized
K <- 3
n <- 1000000
pi <- rexp(K) + 1
pi <- pi / sum(pi)
pi <- -sort(-pi)
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
A <- dcsbm(rgamma(n, shape = 2, scale = .4), pi, B, expected_degree = 10, sort_nodes = TRUE)
D <- Diagonal(n, 1 / sqrt(rowSums(A) + 10))
L <- D %*% A %*% D
ei <- eigs_sym(L, 4)

s <- sort(sample(n, 10000))
X <- t(apply(ei$vectors[, 1:K], 1, function(x) return(x / sqrt(sum(x^2) + 1 / n))))
plot(X[s, 3]) # highly localized eigenvectors

To sample from a degree corrected and node contextualized graph...

n <- 10000 # number of nodes
d <- 1000 # number of features
K <- 5 # number of blocks

# Here are the parameters for the graph:

pi <- rexp(K) + 1
pi <- pi / sum(pi) * 3
B <- matrix(rexp(K^2) + 1, nrow = K)
diag(B) <- diag(B) + mean(B) * K
theta <- rexp(n)
paraG <- dcsbm_params(theta = theta, pi = pi, B = B)

# Here are the parameters for the features:

thetaY <- rexp(d)
piFeatures <- rexp(K) + 1
piFeatures <- piFeatures / sum(piFeatures) * 3
BFeatures <- matrix(rexp(K^2) + 1, nrow = K)
diag(BFeatures) <- diag(BFeatures) + mean(BFeatures) * K

paraFeat <- dcsbm_params(theta = thetaY, pi = piFeatures, B = BFeatures)

# the node "degrees" in the features, should be related to their degrees in the graph.
X <- paraG$X
X@x <- paraG$X@x + rexp(n) # the degree parameter should be different. X@x + rexp(n) makes feature degrees correlated to degrees in graph.

# generate the graph and features
A <- fastRG(paraG$X, paraG$S, expected_degree = 10)
features <- fastRG(X, paraFeat$S, paraFeat$X, expected_degree = 20)

The text was updated successfully, but these errors were encountered:

alexpghayes changed the title ~~Add blockmodel once requisite models have been implemented~~ Add blockmodel vignette once requisite models have been implemented Feb 22, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add blockmodel vignette once requisite models have been implemented #14

Add blockmodel vignette once requisite models have been implemented #14

alexpghayes commented Feb 19, 2021

Add blockmodel vignette once requisite models have been implemented #14

Add blockmodel vignette once requisite models have been implemented #14

Comments

alexpghayes commented Feb 19, 2021

Some blockmodel examples