From 5f8879109c440ef54442ef202378de318a5c9815 Mon Sep 17 00:00:00 2001 From: "George G. Vega Yon" Date: Tue, 12 Mar 2019 00:48:12 -0700 Subject: [PATCH] Working on new functions to manipulate network objects better --- .Rbuildignore | 1 + DESCRIPTION | 3 +- NAMESPACE | 9 ++ R/RcppExports.R | 8 ++ R/network.R | 95 ++++++++++++++++ R/sim.R | 44 +++++++- R/utils.R | 40 ++++++- README.Rmd | 8 +- data-raw/fivenets.R | 15 ++- makefile | 2 + man/matrix_to_network.Rd | 68 ++++++++++++ man/new_rergmito.Rd | 14 ++- man/nvertex.Rd | 23 +++- src/RcppExports.cpp | 34 ++++++ src/count_stats.cpp | 129 +++++++++++++++++++++- src/loglik.cpp | 2 +- src/network.cpp | 179 +++++++++++++++++++++++++++++++ tests/testthat/test-new_rlergm.R | 4 +- tests/testthat/test-stat-count.R | 5 + 19 files changed, 659 insertions(+), 24 deletions(-) create mode 100644 R/network.R create mode 100644 makefile create mode 100644 man/matrix_to_network.Rd create mode 100644 src/network.cpp diff --git a/.Rbuildignore b/.Rbuildignore index 3ebe2a3..62f2d62 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,4 @@ ^CODE_OF_CONDUCT\.md$ ^.*\.Rproj$ ^\.Rproj\.user$ +^makefile$ diff --git a/DESCRIPTION b/DESCRIPTION index 4b4d3fe..0e33d2c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,8 +7,7 @@ Description: The package implements simulation and estimation algorithms for statistics. As a difference from the 'ergm' package, 'ergmito' skips the MCMC part of the estimation process and goes straight to MLE. Depends: R (>= 3.3.0) -Authors@R: - c( +Authors@R: c( person(given = "George", family = "Vega Yon", role = c("cre","aut"), email = "g.vegayon@gmail.com", comment = c(ORCID = "0000-0002-3171-0844")), person("Army Research Laboratory and the U.S. Army Research Office", diff --git a/NAMESPACE b/NAMESPACE index 62fcc26..ca4ab2d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,13 @@ S3method(count_stats,list) S3method(ergmito_boot,ergmito) S3method(ergmito_boot,formula) S3method(logLik,ergmito) +S3method(matrix_to_network,list) +S3method(matrix_to_network,matrix) +S3method(nedges,ergmito) +S3method(nedges,formula) +S3method(nedges,list) +S3method(nedges,matrix) +S3method(nedges,network) S3method(nnets,ergmito) S3method(nnets,formula) S3method(nnets,list) @@ -38,6 +45,8 @@ export(ergmito_formulae) export(exact_gradient) export(exact_loglik) export(extract.ergmito) +export(matrix_to_network) +export(nedges) export(new_rergmito) export(nnets) export(nvertex) diff --git a/R/RcppExports.R b/R/RcppExports.R index a006517..86142af 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -31,6 +31,14 @@ exact_gradient. <- function(x, params, weights, statmat) { .Call(`_ergmito_exact_gradient`, x, params, weights, statmat) } +init_network <- function(n, directed = TRUE, hyper = FALSE, loops = FALSE, multiple = FALSE, bipartite = FALSE) { + .Call(`_ergmito_init_network`, n, directed, hyper, loops, multiple, bipartite) +} + +matrix_to_network. <- function(x, directed, hyper, loops, multiple, bipartite) { + .Call(`_ergmito_matrix_to_network`, x, directed, hyper, loops, multiple, bipartite) +} + make_sets <- function(n) { .Call(`_ergmito_make_sets`, n) } diff --git a/R/network.R b/R/network.R new file mode 100644 index 0000000..8fec99e --- /dev/null +++ b/R/network.R @@ -0,0 +1,95 @@ +#' Manipulation of network objects +#' +#' This function implements a vectorized version of [network::network]`.adjmat`. +#' It allows us to turn regular matrices into network objects quickly. +#' +#' @param x Either a single square matrix (adjacency matrix), or a list of these. +#' @param directed Logical scalar, if `FALSE` then the function only checks the +#' upper diagonal of the matrix assuming it is undirected. +#' @param loops Logical scalar. When `FALSE` (default) it will skip the diagonal +#' of the adjacency matrix. +#' @param hyper,multiple,bipartite Currently Ignored. Right now all the network objects +#' created by this function set these parameters as `FALSE`. +#' +#' @details This version does not support adding the name parameter yet. The +#' function in the network package includes the name of the vertices as an +#' attribute. +#' +#' Just like in the network function, `NA` are checked and added accordingly, i.e. +#' if there is an `NA` in the matrix, then the value is recorded as a missing edge. +#' +#' @return An object of class `network`. This is a list with the following elements: +#' - `mel` *Master Edge List*: A named list with length equal to the number of edges in +#' the network. The list itself has 3 elements: `inl` (tail), `outl` (head), and +#' `atl` (attribute). By default `atl`, a list itself, has a single element: `na`. +#' +#' - `gal` *Graph Attributes List*: a named list with the following elements: +#' - `n` Number of nodes +#' - `mnext` Number of edges + 1 +#' - `directed`,`hyper`,`loops`,`multiple`,`bipartite` The arguments passed to +#' the function. +#' +#' - `val` *Vertex Attributes List* +#' +#' - `iel` *In Edgest List* +#' +#' - `oel` *Out Edgest List* +#' +#' @examples +#' set.seed(155) +#' adjmats <- rbernoulli(rep(5, 20)) +#' networks <- matrix_to_network(adjmats) +#' +#' @export +matrix_to_network <- function( + x, + directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), + loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), + bipartite = rep(FALSE, length(x)) + ) UseMethod("matrix_to_network") + +#' @export +#' @rdname matrix_to_network +matrix_to_network.matrix <- function( + x, + directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), + loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), + bipartite = rep(FALSE, length(x)) + ) { + + matrix_to_network.( + x = list(x), + directed = directed, + hyper = hyper, + loops = loops, + multiple = multiple, + bipartite = bipartite + )[[1L]] + +} + +#' @export +#' @rdname matrix_to_network +matrix_to_network.list <- function( + x, + directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), + loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), + bipartite = rep(FALSE, length(x)) + ) { + + matrix_to_network.( + x = x, + directed = directed, + hyper = hyper, + loops = loops, + multiple = multiple, + bipartite = bipartite + ) + +} \ No newline at end of file diff --git a/R/sim.R b/R/sim.R index ed4a09c..fc145db 100644 --- a/R/sim.R +++ b/R/sim.R @@ -7,8 +7,19 @@ #' @param x An object of class `ergmito_sampler`. #' @param sizes Integer vector. Values between 2 to 5 (6 becomes too intensive). #' @param mc.cores Integer. Passed to [parallel::mclapply] +#' @param force Logical. When `FALSE` (default) will try to use `ergmito`'s stat +#' count functions (see [count_stats]). This means that if one of the requested +#' statistics in not avialable in `ergmito`, then we will use `ergm` to compute +#' them, which is significatnly slower (see details). #' @param ... Further arguments passed to [ergm::ergm.allstats]. #' +#' @details +#' While the \CRANpkg{ergm} package is very efficient, it was not built to do some +#' of the computations requiered in the ergmito package. This translates in having +#' some of the functions of the package (ergm) with poor speed performance. This +#' led us to "reinvent the wheel" in some cases to speed things up, this includes +#' calculating observed statistics in a list of networks. +#' #' @return An environment with the following objects: #' #' - `calc_prob` @@ -25,7 +36,8 @@ #' #' @export #' @importFrom parallel mclapply -new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { +new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L, + force = FALSE, ...) { # environment(model) <- parent.frame() @@ -58,6 +70,7 @@ new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { ergm_model_attrs <- which(sapply(ergm_model$attrnames, length) > 0) # Capturing attributes (if any) + sampler_w_attributes <- FALSE if (length(ergm_model_attrs) && nnets(net) != 1L) { stop( @@ -67,6 +80,8 @@ new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { } else if (length(ergm_model_attrs) && nnets(net) == 1L) { + sampler_w_attributes <- TRUE + for (a in ergm_model_attrs) { ergm_model$attrs[[a]] <- if (is.null(ergm_model$attrnames[[a]])) @@ -134,6 +149,11 @@ new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { } else { # THE ERGM WAY ------------------------------------------------------------- + if (!force) + stop("To generate this sampler we need to use statnet's ergm functions since", + " not all the requested statistics are available in ergmito. If you", + " would like to procede, use the option `force = TRUE`", call. = FALSE) + # Are we addinig attributes? if (nnets(net) == 1 && network::is.network(net)) { attrs <- list() @@ -263,7 +283,7 @@ new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { } if (!as_indexes) { - ans$networks[[s]][ + nets <- ans$networks[[s]][ sample.int( n = length(ans$prob[[s]]), size = n, @@ -272,6 +292,26 @@ new_rergmito <- function(model, theta = NULL, sizes = NULL, mc.cores = 2L,...) { useHash = FALSE ) ] + + # If this is a sampler with attributes, then we need to add the attributes + # to the sampled graphs + if (sampler_w_attributes) { + + nets <- lapply(nets, function(n) { + + # New baseline, we remove all edges + net0 <- network::network.copy(net) + net0[,] <- 0 + + edgelist <- which(n != 0, arr.ind = TRUE) + network::add.edges(net0, edgelist[, 2], edgelist[, 1]) + + }) + + } + + nets + } else { sample.int( n = length(ans$prob[[s]]), diff --git a/R/utils.R b/R/utils.R index 5c94fac..c17d1c0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,14 +1,50 @@ #' Utility functions to query network dimensions -#' @param x Either an object of class [ergmito], [network], or [matrix]. +#' @param x Either an object of class [ergmito], [network], [formula], or [matrix]. +#' @param ... Further arguments passed to the method. Currently only `nedges.network` +#' receives arguments (see [network::network.edgecount]). #' @export nvertex <- function(x) UseMethod("nvertex") +#' @export +#' @rdname nvertex +nedges <- function(x, ...) UseMethod("nedges") + +#' @export +#' @rdname nvertex +nedges.network <- function(x, ...) { + network::network.edgecount(x, ...) +} + +#' @export +#' @rdname nvertex +nedges.list <- function(x, ...) { + sapply(x, nedges, ...) +} + +#' @export +#' @rdname nvertex +nedges.matrix <- function(x, ...) { + sum(x != 0) +} + +#' @export +#' @rdname nvertex +nedges.ergmito <- function(x, ...) { + nedges(x$network, ...) +} + +#' @export +#' @rdname nvertex +nedges.formula <- function(x, ...) { + nedges(eval(x[[2]]), envir = environment(x)) +} + #' @export #' @rdname nvertex nvertex.network <- function(x) { - x$gal$n + network::network.size(x) } diff --git a/README.Rmd b/README.Rmd index edb2673..29e2a7c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -68,15 +68,15 @@ gplot(net) ``` ```{r comparing-w-ergm} -model <- net ~ edges + mutual + balance +model <- net ~ edges + mutual + ctriad library(ergm) ans_ergmito <- ergmito(model) ans_ergm <- ergm(model) -# The ergmito should have a larger value -ergm.exact(ans_ergmito$coef, model) -ergm.exact(ans_ergm$coef, model) +# The ergmito should have a larger value when computing exact loglikelihood +ergm.exact(ans_ergmito$coef, model) > + ergm.exact(ans_ergm$coef, model) summary(ans_ergmito) summary(ans_ergm) diff --git a/data-raw/fivenets.R b/data-raw/fivenets.R index f5da663..568c78c 100644 --- a/data-raw/fivenets.R +++ b/data-raw/fivenets.R @@ -6,22 +6,26 @@ set.seed(12312) nets <- replicate(5, { network( - rbernoulli(4, .5), - vertex.attr = list(rpois(4, 20)), + rbernoulli(5, .5), + vertex.attr = list(floor(rnorm(5, 30, 10))), vertex.attrnames = "age") }, simplify = FALSE) # Generating the samplers examplers <- lapply(nets, function(net) { - new_rergmito(net ~ edges + nodeicov("age"), theta = c(-4, .2)) + new_rergmito(net ~ edges + balance, theta = c(-2, 2)) }) # Generating a random sample of these networks (same size) -fivenets <- lapply(examplers, function(i) i$sample(1L, nvertex(i$network0))[[1L]]) -ans <- ergmito(fivenets ~ edges + nodeicov("age")) +fivenets2 <- lapply(examplers, function(i) { + # debug(i$sample) + i$sample(1L, nvertex(i$network0))[[1L]] +}) +ans <- ergmito(fivenets2 ~ edges + balance) confint(ans) +nedges(ans) usethis::use_data(fivenets) @@ -29,3 +33,4 @@ fivesamplers <- examplers usethis::use_data(fivesamplers) +\frac{\exp{\transpose{\theta}\stats{y,x}}}{\transpose{\Mat{W}}\left[\exp{\transpose{\theta}\Mat{W}}\right]} \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..3bf7364 --- /dev/null +++ b/makefile @@ -0,0 +1,2 @@ +all: + cd ../ && R CMD build ergmito/ && R CMD check --as-cran --use-valgrind ergmito_*.tar.gz diff --git a/man/matrix_to_network.Rd b/man/matrix_to_network.Rd new file mode 100644 index 0000000..c7f7d15 --- /dev/null +++ b/man/matrix_to_network.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/network.R +\name{matrix_to_network} +\alias{matrix_to_network} +\alias{matrix_to_network.matrix} +\alias{matrix_to_network.list} +\title{Manipulation of network objects} +\usage{ +matrix_to_network(x, directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), bipartite = rep(FALSE, length(x))) + +\method{matrix_to_network}{matrix}(x, directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), bipartite = rep(FALSE, length(x))) + +\method{matrix_to_network}{list}(x, directed = rep(TRUE, length(x)), + hyper = rep(FALSE, length(x)), loops = rep(FALSE, length(x)), + multiple = rep(FALSE, length(x)), bipartite = rep(FALSE, length(x))) +} +\arguments{ +\item{x}{Either a single square matrix (adjacency matrix), or a list of these.} + +\item{directed}{Logical scalar, if \code{FALSE} then the function only checks the +upper diagonal of the matrix assuming it is undirected.} + +\item{hyper, multiple, bipartite}{Currently Ignored. Right now all the network objects +created by this function set these parameters as \code{FALSE}.} + +\item{loops}{Logical scalar. When \code{FALSE} (default) it will skip the diagonal +of the adjacency matrix.} +} +\value{ +An object of class \code{network}. This is a list with the following elements: +\itemize{ +\item \code{mel} \emph{Master Edge List}: A named list with length equal to the number of edges in +the network. The list itself has 3 elements: \code{inl} (tail), \code{outl} (head), and +\code{atl} (attribute). By default \code{atl}, a list itself, has a single element: \code{na}. +\item \code{gal} \emph{Graph Attributes List}: a named list with the following elements: +\itemize{ +\item \code{n} Number of nodes +\item \code{mnext} Number of edges + 1 +\item \code{directed},\code{hyper},\code{loops},\code{multiple},\code{bipartite} The arguments passed to +the function. +} +\item \code{val} \emph{Vertex Attributes List} +\item \code{iel} \emph{In Edgest List} +\item \code{oel} \emph{Out Edgest List} +} +} +\description{ +This function implements a vectorized version of \link[network:network]{network::network}\code{.adjmat}. +It allows us to turn regular matrices into network objects quickly. +} +\details{ +This version does not support adding the name parameter yet. The +function in the network package includes the name of the vertices as an +attribute. + +Just like in the network function, \code{NA} are checked and added accordingly, i.e. +if there is an \code{NA} in the matrix, then the value is recorded as a missing edge. +} +\examples{ +set.seed(155) +adjmats <- rbernoulli(rep(5, 20)) +networks <- matrix_to_network(adjmats) + +} diff --git a/man/new_rergmito.Rd b/man/new_rergmito.Rd index 1b2f2d6..f020e74 100644 --- a/man/new_rergmito.Rd +++ b/man/new_rergmito.Rd @@ -6,7 +6,8 @@ \alias{print.ergmito_sampler} \title{ERGMito sampler} \usage{ -new_rergmito(model, theta = NULL, sizes = NULL, mc.cores = 2L, ...) +new_rergmito(model, theta = NULL, sizes = NULL, mc.cores = 2L, + force = FALSE, ...) \method{[}{ergmito_sampler}(x, i, j, ...) @@ -21,6 +22,11 @@ new_rergmito(model, theta = NULL, sizes = NULL, mc.cores = 2L, ...) \item{mc.cores}{Integer. Passed to \link[parallel:mclapply]{parallel::mclapply}} +\item{force}{Logical. When \code{FALSE} (default) will try to use \code{ergmito}'s stat +count functions (see \link{count_stats}). This means that if one of the requested +statistics in not avialable in \code{ergmito}, then we will use \code{ergm} to compute +them, which is significatnly slower (see details).} + \item{...}{Further arguments passed to \link[ergm:ergm.allstats]{ergm::ergm.allstats}.} \item{x}{An object of class \code{ergmito_sampler}.} @@ -49,6 +55,12 @@ The indexing method \code{[.ergmito_sampler} returns a named list of length Using } \details{ +While the \CRANpkg{ergm} package is very efficient, it was not built to do some +of the computations requiered in the ergmito package. This translates in having +some of the functions of the package (ergm) with poor speed performance. This +led us to "reinvent the wheel" in some cases to speed things up, this includes +calculating observed statistics in a list of networks. + The indexing method, \code{[.ergmito_sampler}, allows extracting networks directly by passing indexes. \code{i} indicates the index of the networks to draw, which go from 1 through \code{2^(n*(n-1))}, and \code{j} indicates the requested diff --git a/man/nvertex.Rd b/man/nvertex.Rd index ed36353..477dc0c 100644 --- a/man/nvertex.Rd +++ b/man/nvertex.Rd @@ -2,6 +2,12 @@ % Please edit documentation in R/utils.R \name{nvertex} \alias{nvertex} +\alias{nedges} +\alias{nedges.network} +\alias{nedges.list} +\alias{nedges.matrix} +\alias{nedges.ergmito} +\alias{nedges.formula} \alias{nvertex.network} \alias{nvertex.matrix} \alias{nvertex.list} @@ -17,6 +23,18 @@ \usage{ nvertex(x) +nedges(x, ...) + +\method{nedges}{network}(x, ...) + +\method{nedges}{list}(x, ...) + +\method{nedges}{matrix}(x, ...) + +\method{nedges}{ergmito}(x, ...) + +\method{nedges}{formula}(x, ...) + \method{nvertex}{network}(x) \method{nvertex}{matrix}(x) @@ -40,7 +58,10 @@ nnets(x) \method{nnets}{formula}(x) } \arguments{ -\item{x}{Either an object of class \link{ergmito}, \link{network}, or \link{matrix}.} +\item{x}{Either an object of class \link{ergmito}, \link{network}, \link{formula}, or \link{matrix}.} + +\item{...}{Further arguments passed to the method. Currently only \code{nedges.network} +receives arguments (see \link[network:network.edgecount]{network::network.edgecount}).} } \description{ Utility functions to query network dimensions diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 55d87f6..38bb57b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -60,6 +60,38 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// init_network +List init_network(int n, bool directed, bool hyper, bool loops, bool multiple, bool bipartite); +RcppExport SEXP _ergmito_init_network(SEXP nSEXP, SEXP directedSEXP, SEXP hyperSEXP, SEXP loopsSEXP, SEXP multipleSEXP, SEXP bipartiteSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< int >::type n(nSEXP); + Rcpp::traits::input_parameter< bool >::type directed(directedSEXP); + Rcpp::traits::input_parameter< bool >::type hyper(hyperSEXP); + Rcpp::traits::input_parameter< bool >::type loops(loopsSEXP); + Rcpp::traits::input_parameter< bool >::type multiple(multipleSEXP); + Rcpp::traits::input_parameter< bool >::type bipartite(bipartiteSEXP); + rcpp_result_gen = Rcpp::wrap(init_network(n, directed, hyper, loops, multiple, bipartite)); + return rcpp_result_gen; +END_RCPP +} +// matrix_to_network +ListOf< List > matrix_to_network(const ListOf< IntegerMatrix >& x, const LogicalVector& directed, const LogicalVector& hyper, const LogicalVector& loops, const LogicalVector& multiple, const LogicalVector& bipartite); +RcppExport SEXP _ergmito_matrix_to_network(SEXP xSEXP, SEXP directedSEXP, SEXP hyperSEXP, SEXP loopsSEXP, SEXP multipleSEXP, SEXP bipartiteSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const ListOf< IntegerMatrix >& >::type x(xSEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type directed(directedSEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type hyper(hyperSEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type loops(loopsSEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type multiple(multipleSEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type bipartite(bipartiteSEXP); + rcpp_result_gen = Rcpp::wrap(matrix_to_network(x, directed, hyper, loops, multiple, bipartite)); + return rcpp_result_gen; +END_RCPP +} // make_sets vecint make_sets(int n); RcppExport SEXP _ergmito_make_sets(SEXP nSEXP) { @@ -114,6 +146,8 @@ static const R_CallMethodDef CallEntries[] = { {"_ergmito_count_stats", (DL_FUNC) &_ergmito_count_stats, 3}, {"_ergmito_exact_loglik", (DL_FUNC) &_ergmito_exact_loglik, 5}, {"_ergmito_exact_gradient", (DL_FUNC) &_ergmito_exact_gradient, 4}, + {"_ergmito_init_network", (DL_FUNC) &_ergmito_init_network, 6}, + {"_ergmito_matrix_to_network", (DL_FUNC) &_ergmito_matrix_to_network, 6}, {"_ergmito_make_sets", (DL_FUNC) &_ergmito_make_sets, 1}, {"_ergmito_powerset", (DL_FUNC) &_ergmito_powerset, 2}, {"_ergmito_print_powerset", (DL_FUNC) &_ergmito_print_powerset, 1}, diff --git a/src/count_stats.cpp b/src/count_stats.cpp index 14fdb81..ea7399e 100644 --- a/src/count_stats.cpp +++ b/src/count_stats.cpp @@ -51,7 +51,6 @@ inline double count_ttriad(const IntegerMatrix & x, const NumericVector & A) { // Label 1 if (x(i,j) == 1 && x(i,k) == 1 && x(j,k) == 1) - // if (x(j, i) == 0 && x(k,i) == 0 && x(k,j) == 0) ++count; } @@ -75,8 +74,7 @@ inline double count_ctriad(const IntegerMatrix & x, const NumericVector & A) { // Label 1 if (x(i, j) == 1 && x(j, k) == 1 && x(k, i) == 1) - // if (x(j, i) == 0 && x(k, j) == 0 && x(i, k) == 0) - ++count; + ++count; } } @@ -124,6 +122,121 @@ inline double count_triangle(const IntegerMatrix & x, const NumericVector & A) { } +inline double count_balance(const IntegerMatrix & x, const NumericVector & A) { + + int count = 0, n = x.nrow(); + int s; + + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) { + + if (i == j) + continue; + + // We compute this statistic for selecting two possible cases: disconnected + // and mutual ties. The two relevant cases for balanced triads. + s = x(i, j) + x(j, i); + + if (s == 1) + continue; + + // Case in which i and j are disconnected. If these two are disconnected, + // then the loop through k should be truncated as a function of i since + // otherwise we will be double counting. + else if (s == 0) { // Triad 102 + + for (int k = 0; k < i; ++k) { + + if (k == j) + continue; + + if (x(i, k) == 0 || x(k, i) == 0 || x(j, k) == 1 || x(k, j) == 1) + continue; + + ++count; + + + } + + + // Case in which they have a mutual tie, then we also have to truncate the + // loop over j, otherwise triple counting. + } else if (s == 2) { // Triad 300 + + if (j >= i) + break; + + for (int k = 0; k < j; ++k) { + + if (x(i, k) == 0 || x(k, i) == 0 || x(j, k) == 0 || x(k, j) == 0) + continue; + + ++count; + } + + } + + } + + return (double) count; + +} + +// Triadic census -------------------------------------------------------------- +inline double count_t300(const IntegerMatrix & x, const NumericVector & A) { + + int count = 0, n = x.nrow(); + + for (int i = 0; i < n; ++i) { + + for (int j = 0; j < i; ++j) { + + if (x(i, j) == 0 || x(j, i) == 0) + continue; + + for (int k = 0; k < j; ++k) { + + if (x(i, k) == 0 || x(k, i) == 0 || x(j, k) == 0 || x(k, j) == 0) + continue; + + ++count; + + } + + } + + } + + return (double) count; +} + +inline double count_t102(const IntegerMatrix & x, const NumericVector & A) { + int count = 0, n = x.nrow(); + + for (int i = 0; i < n; ++i) { + + for (int j = 0; j < n; ++j) { + + if (x(i, j) == 1 || x(j, i) == 1) + continue; + + for (int k = 0; k < i; ++k) { + + if (x(i, k) == 0 || x(k, i) == 0 || x(j, k) == 1 || x(k, j) == 1) + continue; + + ++count; + + } + + } + + } + + return (double) count; +} + +// Callers --------------------------------------------------------------------- typedef double (*ergm_term_fun)(const IntegerMatrix & x, const NumericVector & A); void get_ergm_term(std::string term, ergm_term_fun & fun) { @@ -136,6 +249,9 @@ void get_ergm_term(std::string term, ergm_term_fun & fun) { else if (term == "nodeocov") fun = &count_nodeocov; else if (term == "nodematch") fun = &count_nodematch; else if (term == "triangle") fun = &count_triangle; + else if (term == "balance") fun = &count_balance; + else if (term == "t300") fun = &count_t300; + else if (term == "t102") fun = &count_t102; else stop("The term %s is not available.", term); @@ -153,7 +269,10 @@ CharacterVector count_available(int i = 0) { "nodeicov", "nodeocov", "nodematch", - "triangle" + "triangle", + "balance", + "t300", + "t102" ); } @@ -165,6 +284,8 @@ NumericMatrix count_stats( const ListOf< NumericVector > & A ) { + // List b; + // b.containsElementNamed("parameter1"); int n = X.size(); int k = terms.size(); diff --git a/src/loglik.cpp b/src/loglik.cpp index d79e3f7..4215f7d 100644 --- a/src/loglik.cpp +++ b/src/loglik.cpp @@ -78,7 +78,7 @@ arma::vec exact_loglik( } -// Calculates the likelihood for a given network individually. +// Calculates the gradient for a given network individually. inline arma::colvec exact_gradienti( const arma::rowvec & x, const arma::colvec & params, diff --git a/src/network.cpp b/src/network.cpp new file mode 100644 index 0000000..a4070ec --- /dev/null +++ b/src/network.cpp @@ -0,0 +1,179 @@ +#include +using namespace Rcpp; + +// [[Rcpp::export]] +List init_network( + int n, + bool directed = true, + bool hyper = false, + bool loops = false, + bool multiple = false, + bool bipartite = false +) { + + if (n < 0) + stop("`n` cannot be less than 0."); + + List emptylist; + + List g = List::create( + /* Master edgelist: + * This is in itself a list of length (mnext - 1) which, if different from + * an empty list, has the following elements: inl (tail), outl (head), alt (attributes) + * + * See https://github.com/statnet/network/blob/525286ff257745011f35a58c193c68745f78e853/vignettes/networkVignette.Rnw#L331 + */ + _["mel"] = List(0), + + _["gal"] = List::create( // Graph attribute list + _["n"] = n, + _["mnext"] = 1, // Next number of edges (next m) + _["directed"] = directed, + _["hyper"] = hyper, + _["loops"] = loops, + _["multiple"] = multiple, + _["bipartite"] = bipartite + ), + _["val"] = R_NilValue, + _["iel"] = R_NilValue, + _["oel"] = R_NilValue + ); + + // Depending if there are nodes, then + if (n > 0) { + + std::vector< List > listoflists(n); + std::vector< IntegerVector > listofInt(n); + + // n lists + g["val"] = listoflists; + g["iel"] = listofInt; + g["oel"] = listofInt; + + + } else { + + // n lists + g["val"] = emptylist; + g["iel"] = emptylist; + g["oel"] = emptylist; + + } + + g.attr("class") = "network"; + + return g; + +} + +inline ListOf< List > matrix_to_networki( + const IntegerMatrix & x, + bool directed = true, + bool hyper = false, + bool loops = false, + bool multiple = false, + bool bipartite = false + ) { + + int n = x.nrow(), m = x.ncol(); + + if (n != m) + stop("Non-square matrix."); + + std::vector< List > ans; + ans.reserve(x.size()); + + std::vector< std::vector > iel(n), oel(n); + + // Default lists. We do this so that we don't make that many calls during the + // for-loop. + List NA_trueList = List::create(_["na"] = true); + List NA_falseList = List::create(_["na"] = false); + + + int nedge = 0; + for (int j = 0; j < m; ++j) { + + int ni = n; + if (!directed) + ni = j; + + for (int i = 0; i < ni; ++i) { + + if (!loops && (i == j)) + continue; + + if (x(i, j) != 0) { + + List edge = List::create( + _["inl"] = i + 1, // + _["outl"] = j + 1, // While we see from 0 to (n-1), R lives in 1 to n. + _["atl"] = ( x(i, j) == R_NaInt ) ? NA_trueList : NA_falseList + ); + + ans.push_back(edge); + iel.at(j).push_back(++nedge); + oel.at(i).push_back(nedge); + +#ifdef ERGMITO_DEBUG_NETWORK + Rprintf("x[%d, %d]: edge %d\n", i, j, nedge); +#endif + + } + } + } + + // typedef std::vector< std::vector< int > > vecvecint; + // for (vecvecint::iterator iter = iel.begin(); iter != iel.end(); ++iter) { + // std::reverse(iter->begin(), iter->end()); + // } + // for (vecvecint::iterator iter = oel.begin(); iter != oel.end(); ++iter) { + // std::reverse(iter->begin(), iter->end()); + // } + + // Empty list of attributes + ListOf< List > listoflist(n); + + List network = List::create( + _["mel"] = wrap(ans), + _["gal"] = List::create( + _["n"] = n, + _["mnext"] = ++nedge, // Next number of edges (next m) + _["directed"] = directed, + _["hyper"] = hyper, + _["loops"] = loops, + _["multiple"] = multiple, + _["bipartite"] = bipartite + ), + _["val"] = listoflist, + _["iel"] = wrap(iel), + _["oel"] = wrap(oel) + ); + + network.attr("class") = "network"; + + return network; + +} + +// [[Rcpp::export(name="matrix_to_network.")]] +ListOf< List > matrix_to_network( + const ListOf< IntegerMatrix > & x, + const LogicalVector & directed , + const LogicalVector & hyper , + const LogicalVector & loops , + const LogicalVector & multiple , + const LogicalVector & bipartite + ) { + + int n = x.size(); + ListOf< List > out(n); + + for (int i = 0; i < n; ++i) + out[i] = matrix_to_networki( + x[i], directed[i], hyper[i], loops[i], multiple[i], bipartite[i] + ); + + return out; + +} diff --git a/tests/testthat/test-new_rlergm.R b/tests/testthat/test-new_rlergm.R index 7893375..385d924 100644 --- a/tests/testthat/test-new_rlergm.R +++ b/tests/testthat/test-new_rlergm.R @@ -8,7 +8,7 @@ test_that("Using the count_stats vs ergm::summary_formula is equal", { Sys.setenv(ERGMITO_TEST = "") set.seed(1);ans0 <- new_rergmito(nets ~ edges + mutual, sizes = 3, mc.cores = 1L) Sys.setenv(ERGMITO_TEST = 1) - set.seed(1);ans1 <- new_rergmito(nets ~ edges + mutual, sizes = 3, mc.cores = 1L) + set.seed(1);ans1 <- new_rergmito(nets ~ edges + mutual, sizes = 3, mc.cores = 1L, force = TRUE) expect_equal(ans0$prob, ans1$prob) expect_equal(sum(ans0$prob$`3`), 1) @@ -21,7 +21,7 @@ test_that("Using the count_stats vs ergm::summary_formula is equal", { Sys.setenv(ERGMITO_TEST = "") ans0 <- new_rergmito(fivenets[[1]] ~ edges + nodeicov("age"), theta = coef(mod), mc.cores = 1L) Sys.setenv(ERGMITO_TEST = 1) - ans1 <- new_rergmito(fivenets[[1]] ~ edges + nodeicov("age"), theta = coef(mod), mc.cores = 1L) + ans1 <- new_rergmito(fivenets[[1]] ~ edges + nodeicov("age"), theta = coef(mod), mc.cores = 1L, force = TRUE) expect_equal(ans0$prob, ans1$prob) diff --git a/tests/testthat/test-stat-count.R b/tests/testthat/test-stat-count.R index 311a3df..672927f 100644 --- a/tests/testthat/test-stat-count.R +++ b/tests/testthat/test-stat-count.R @@ -48,6 +48,11 @@ test_that("Sufficient statistics", { expect_equivalent(ans0, ans1) + ans0 <- count_stats(pset3, c("balance"))[,1] + ans1 <- unname(sapply(pset3, function(p) summary(p ~ balance))) + + expect_equivalent(ans0, ans1) + # Nodecovar set.seed(44) age <- lapply(nvertex(pset3), rpois, lambda=4)