diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index da0ff34..c51f801 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -14,67 +14,33 @@ jobs: config: - {os: macOS-latest, r: 'release'} - {os: windows-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-22.04, r: 'devel'} + - {os: ubuntu-22.04, r: 'release'} + - {os: ubuntu-22.04, r: 'oldrel'} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - - uses: r-lib/actions/setup-pandoc@master - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "16.04"))') - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - shell: Rscript {0} + extra-packages: any::rcmdcheck, any::remotes + needs: check - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - name: Check - env: - _R_CHECK_CRAN_INCOMING_: false - run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") - shell: Rscript {0} - # run: cd .. && R CMD build RcppThread && R CMD check RcppThread_1.1.0.tar.gz - # shell: bash + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true - name: Show testthat output if: always() @@ -83,13 +49,13 @@ jobs: - name: Upload check results if: failure() - uses: actions/upload-artifact@main + uses: actions/upload-artifact@v2 with: name: ${{ runner.os }}-r${{ matrix.config.r }}-results path: check - name: Test coverage - if: matrix.config.os == 'macOS-latest' && matrix.config.r == '3.6' + if: matrix.config.os == 'macOS-latest' run: | Rscript -e 'remotes::install_github("r-lib/covr@gh-actions")' Rscript -e 'covr::codecov(token = "${{secrets.CODECOV_TOKEN}}")' diff --git a/DESCRIPTION b/DESCRIPTION index 78ccafd..76ed4f1 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: vinereg Type: Package Title: D-Vine Quantile Regression -Version: 0.8.3 +Version: 0.9.0 Authors@R: c( person("Thomas", "Nagler",, "mail@tnagler.com", role = c("aut", "cre")), person("Dani", "Kraus",,, role = c("ctb")) diff --git a/NAMESPACE b/NAMESPACE index 3aa35af..bc1b4ab 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(fitted,vinereg) S3method(predict,vinereg) S3method(print,vinereg) S3method(summary,vinereg) +export(cll) export(cpit) export(plot_effects) export(vinereg) diff --git a/NEWS.md b/NEWS.md index 6257537..de8d813 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# vinereg 0.9.0 + +NEW FEATURE + +* New function `cll()` to compute the conditional log-likelihood. + + # vinereg 0.8.3 BUG FIX diff --git a/R/RcppExports.R b/R/RcppExports.R index 133cf6e..50ca71d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -17,3 +17,11 @@ cond_dist_cpp <- function(u, vinecop_r, num_threads) { .Call(`_vinereg_cond_dist_cpp`, u, vinecop_r, num_threads) } +cond_loglik_cpp <- function(u, vinecop_r, num_threads) { + .Call(`_vinereg_cond_loglik_cpp`, u, vinecop_r, num_threads) +} + +with_parameters_cop_cpp <- function(vinecop_r, parameters) { + .Call(`_vinereg_with_parameters_cop_cpp`, vinecop_r, parameters) +} + diff --git a/R/cpit.R b/R/cpit.R index 34a5a50..27d2fee 100644 --- a/R/cpit.R +++ b/R/cpit.R @@ -27,3 +27,40 @@ cpit <- function(object, newdata, cores = 1) { newdata <- to_uscale(newdata, object$margins) cond_dist_cpp(newdata, object$vine, cores) } + +#' Conditional log-likelihood +#' +#' Calculates the conditional log-likelihood of the response given the covariates. +#' +#' @param object an object of class \code{vinereg}. +#' @param newdata matrix of response and covariate values for which to compute +#' the conditional distribution. +#' @param cores integer; the number of cores to use for computations. +#' +#' @export +#' +#' @examples +#' \dontshow{ +#' set.seed(1) +#' } +#' # simulate data +#' x <- matrix(rnorm(500), 250, 2) +#' y <- x %*% c(1, -2) +#' dat <- data.frame(y = y, x = x, z = as.factor(rbinom(250, 2, 0.5))) +#' +#' # fit vine regression model +#' fit <- vinereg(y ~ ., dat) +#' +#' cll(fit, dat) +#' fit$stats$cll +cll <- function(object, newdata, cores = 1) { + newdata <- prepare_newdata(newdata, object, use_response = TRUE) + ll_marg <- if (inherits(object$margins[[1]], "kde1d")) { + sum(log(kde1d::dkde1d(newdata[, 1], object$margins[[1]]))) + } else { + 0 + } + newdata <- to_uscale(newdata, object$margins) + ll_cop <- cond_loglik_cpp(newdata, object$vine, cores) + ll_cop + ll_marg +} diff --git a/R/vinereg.R b/R/vinereg.R index e792338..72fc8ba 100755 --- a/R/vinereg.R +++ b/R/vinereg.R @@ -223,10 +223,10 @@ finalize_vinereg_object <- function(formula, selcrit, model_frame, margins, vine pchisq(2 * var_cll, var_edf, lower.tail = FALSE) ) var_p_value[1] <- NA - cll <- sum(var_cll) - edf <- sum(var_edf) - caic <- sum(var_caic) - cbic <- sum(var_cbic) + cll <- sum(var_cll, na.rm = TRUE) + edf <- sum(var_edf, na.rm = TRUE) + caic <- sum(var_caic, na.rm = TRUE) + cbic <- sum(var_cbic, na.rm = TRUE) stats <- list( nobs = nobs, diff --git a/man/cll.Rd b/man/cll.Rd new file mode 100644 index 0000000..b842258 --- /dev/null +++ b/man/cll.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cpit.R +\name{cll} +\alias{cll} +\title{Conditional log-likelihood} +\usage{ +cll(object, newdata, cores = 1) +} +\arguments{ +\item{object}{an object of class \code{vinereg}.} + +\item{newdata}{matrix of response and covariate values for which to compute +the conditional distribution.} + +\item{cores}{integer; the number of cores to use for computations.} +} +\description{ +Calculates the conditional log-likelihood of the response given the covariates. +} +\examples{ +\dontshow{ +set.seed(1) +} +# simulate data +x <- matrix(rnorm(500), 250, 2) +y <- x \%*\% c(1, -2) +dat <- data.frame(y = y, x = x, z = as.factor(rbinom(250, 2, 0.5))) + +# fit vine regression model +fit <- vinereg(y ~ ., dat) + +cll(fit, dat) +fit$stats$cll +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 5db1eda..87fa3dd 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -79,12 +79,39 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// cond_loglik_cpp +double cond_loglik_cpp(const Eigen::MatrixXd& u, const Rcpp::List& vinecop_r, size_t num_threads); +RcppExport SEXP _vinereg_cond_loglik_cpp(SEXP uSEXP, SEXP vinecop_rSEXP, SEXP num_threadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type u(uSEXP); + Rcpp::traits::input_parameter< const Rcpp::List& >::type vinecop_r(vinecop_rSEXP); + Rcpp::traits::input_parameter< size_t >::type num_threads(num_threadsSEXP); + rcpp_result_gen = Rcpp::wrap(cond_loglik_cpp(u, vinecop_r, num_threads)); + return rcpp_result_gen; +END_RCPP +} +// with_parameters_cop_cpp +Rcpp::List with_parameters_cop_cpp(const Rcpp::List& vinecop_r, const Eigen::VectorXd parameters); +RcppExport SEXP _vinereg_with_parameters_cop_cpp(SEXP vinecop_rSEXP, SEXP parametersSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::List& >::type vinecop_r(vinecop_rSEXP); + Rcpp::traits::input_parameter< const Eigen::VectorXd >::type parameters(parametersSEXP); + rcpp_result_gen = Rcpp::wrap(with_parameters_cop_cpp(vinecop_r, parameters)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_vinereg_fit_margins_cpp", (DL_FUNC) &_vinereg_fit_margins_cpp, 9}, {"_vinereg_select_dvine_cpp", (DL_FUNC) &_vinereg_select_dvine_cpp, 11}, {"_vinereg_cond_quantile_cpp", (DL_FUNC) &_vinereg_cond_quantile_cpp, 4}, {"_vinereg_cond_dist_cpp", (DL_FUNC) &_vinereg_cond_dist_cpp, 3}, + {"_vinereg_cond_loglik_cpp", (DL_FUNC) &_vinereg_cond_loglik_cpp, 3}, + {"_vinereg_with_parameters_cop_cpp", (DL_FUNC) &_vinereg_with_parameters_cop_cpp, 2}, {NULL, NULL, 0} }; diff --git a/src/vinereg.cpp b/src/vinereg.cpp index c8362e4..32f5846 100644 --- a/src/vinereg.cpp +++ b/src/vinereg.cpp @@ -33,30 +33,30 @@ fit_margins_cpp(const Eigen::MatrixXd& data, const Eigen::VectorXd& weights, size_t num_threads) { - size_t d = data.cols(); - std::vector fits_cpp(d); - num_threads = (num_threads > 1) ? num_threads : 0; - RcppThread::parallelFor( - 0, - d, - [&](const size_t& k) { - fits_cpp[k] = kde1d::Kde1d(data.col(k), - nlevels(k), - bw(k), - mult(k), - xmin(k), - xmax(k), - deg(k), - weights); - }, - num_threads); - - // we can't do the following in parallel because it calls R API - std::vector fits_r(d); - for (size_t k = 0; k < d; ++k) { - fits_r[k] = kde1d::kde1d_wrap(fits_cpp[k]); - } - return fits_r; + size_t d = data.cols(); + std::vector fits_cpp(d); + num_threads = (num_threads > 1) ? num_threads : 0; + RcppThread::parallelFor( + 0, + d, + [&](const size_t& k) { + fits_cpp[k] = kde1d::Kde1d(data.col(k), + nlevels(k), + bw(k), + mult(k), + xmin(k), + xmax(k), + deg(k), + weights); + }, + num_threads); + + // we can't do the following in parallel because it calls R API + std::vector fits_r(d); + for (size_t k = 0; k < d; ++k) { + fits_r[k] = kde1d::kde1d_wrap(fits_cpp[k]); + } + return fits_r; } // [[Rcpp::export]] @@ -73,53 +73,54 @@ select_dvine_cpp(const Eigen::MatrixXd& data, size_t cores, const std::vector& var_types) { - // set up the cpp fit controls from all the arguments ------ - std::vector fam_set(family_set.size()); - for (unsigned int fam = 0; fam < fam_set.size(); ++fam) { - fam_set[fam] = to_cpp_family(family_set[fam]); - } - FitControlsBicop controls(fam_set, - par_method, - nonpar_method, - mult, - selcrit, - weights, - psi0, - preselect_families, - cores); - - // select the model ----------------------------------------- - vinereg::DVineRegSelector selector(data, var_types, controls); - selector.select_model(); - auto selected_vars = selector.get_selected_vars(); - auto pcs = selector.get_pcs(); - - // make results R-compatible ------------------------------- - Rcpp::List vinecop_r; - // rank ensures that vars are 1, ..., p_sel - auto order = tools_stl::cat(static_cast(0), selected_vars); - auto new_struct = DVineStructure(tools_stl::rank(order)); - - auto sv = selected_vars; - std::sort(sv.begin(), sv.end()); - auto vt = std::vector(); - vt.push_back(var_types[0]); - for (auto v : sv) - vt.push_back(var_types[v]); - - vinecop_r = Rcpp::List::create( - Rcpp::Named("pair_copulas") = pair_copulas_wrap(pcs, order.size(), true), - Rcpp::Named("structure") = rvine_structure_wrap(new_struct), - Rcpp::Named("var_types") = vt, - Rcpp::Named("npars") = Vinecop(new_struct, pcs).get_npars(), - Rcpp::Named("nobs") = data.rows(), - Rcpp::Named("loglik") = NAN, - Rcpp::Named("threshold") = 0); - vinecop_r.attr("class") = Rcpp::CharacterVector{ "vinecop", "vinecop_dist" }; - for (auto& v : selected_vars) // R indexing starts at 1 - v++; - return Rcpp::List::create(Rcpp::Named("vine") = vinecop_r, - Rcpp::Named("selected_vars") = selected_vars); + // set up the cpp fit controls from all the arguments ------ + std::vector fam_set(family_set.size()); + for (unsigned int fam = 0; fam < fam_set.size(); ++fam) { + fam_set[fam] = to_cpp_family(family_set[fam]); + } + FitControlsBicop controls(fam_set, + par_method, + nonpar_method, + mult, + selcrit, + weights, + psi0, + preselect_families, + cores); + + // select the model ----------------------------------------- + vinereg::DVineRegSelector selector(data, var_types, controls); + selector.select_model(); + auto selected_vars = selector.get_selected_vars(); + auto pcs = selector.get_pcs(); + + // make results R-compatible ------------------------------- + Rcpp::List vinecop_r; + // rank ensures that vars are 1, ..., p_sel + auto order = tools_stl::cat(static_cast(0), selected_vars); + auto new_struct = DVineStructure(tools_stl::rank(order)); + + auto sv = selected_vars; + std::sort(sv.begin(), sv.end()); + auto vt = std::vector(); + vt.push_back(var_types[0]); + for (auto v : sv) + vt.push_back(var_types[v]); + + vinecop_r = Rcpp::List::create( + Rcpp::Named("pair_copulas") = pair_copulas_wrap(pcs, order.size(), true), + Rcpp::Named("structure") = rvine_structure_wrap(new_struct), + Rcpp::Named("var_types") = vt, + Rcpp::Named("npars") = Vinecop(new_struct, pcs).get_npars(), + Rcpp::Named("nobs") = data.rows(), + Rcpp::Named("loglik") = NAN, + Rcpp::Named("threshold") = 0); + vinecop_r.attr("class") = + Rcpp::CharacterVector{ "vinecop", "vinecop_dist" }; + for (auto& v : selected_vars) // R indexing starts at 1 + v++; + return Rcpp::List::create(Rcpp::Named("vine") = vinecop_r, + Rcpp::Named("selected_vars") = selected_vars); } // [[Rcpp::export]] @@ -129,99 +130,102 @@ cond_quantile_cpp(const Eigen::VectorXd& alpha, const Rcpp::List& vinecop_r, size_t num_threads) { - tools_eigen::check_if_in_unit_cube(u); - auto vinecop_cpp = vinecop_wrap(vinecop_r); - auto vine_struct_ = vinecop_cpp.get_rvine_structure(); - auto d = vine_struct_.get_dim(); - auto var_types_ = vinecop_cpp.get_var_types(); - if ((static_cast(u.cols()) != d) && - (static_cast(u.cols()) != 2 * d)) - throw std::runtime_error("data dimension is incompatible with model."); - - auto trunc_lvl = vine_struct_.get_trunc_lvl(); - auto order = vine_struct_.get_order(); - auto inverse_order = tools_stl::invert_permutation(order); - std::vector q(alpha.size()); - for (auto& qq : q) - qq.resize(u.rows()); - - auto do_batch = [&](const tools_batch::Batch& b) { - TriangularArray hfunc2(d + 1, trunc_lvl + 1); - TriangularArray hfunc1(d + 1, trunc_lvl + 1); - TriangularArray hfunc2_sub(d + 1, trunc_lvl + 1); - TriangularArray hfunc1_sub(d + 1, trunc_lvl + 1); - - // data have to be reordered to correspond to natural order - for (size_t j = 0; j < d; ++j) { - hfunc2(0, j) = u.col(order[j] - 1).segment(b.begin, b.size); - hfunc1(0, j) = u.col(order[j] - 1).segment(b.begin, b.size); - if (var_types_[order[j] - 1] == "d") { - hfunc2_sub(0, j) = u.col(d + order[j] - 1).segment(b.begin, b.size); - hfunc1_sub(0, j) = u.col(d + order[j] - 1).segment(b.begin, b.size); - } - } - - Eigen::MatrixXd u_e, u_e_sub; - for (size_t tree = 0; tree < trunc_lvl; ++tree) { - tools_interface::check_user_interrupt(u.rows() * d > 1e5); - for (size_t edge = 1; edge < d - tree - 1; ++edge) { - tools_interface::check_user_interrupt(d * u.rows() > 1e5); - auto edge_copula = vinecop_cpp.get_pair_copula(tree, edge); - auto var_types = edge_copula.get_var_types(); - size_t m = vine_struct_.min_array(tree, edge); - - u_e = Eigen::MatrixXd(b.size, 2); - u_e.col(0) = hfunc2(tree, edge); - u_e.col(1) = hfunc1(tree, m - 1); - if ((var_types[0] == "d") || (var_types[1] == "d")) { - u_e.conservativeResize(b.size, 4); - u_e.col(2) = - (var_types[0] == "d") ? hfunc2_sub(tree, edge) : hfunc2(tree, edge); - u_e.col(3) = (var_types[1] == "d") ? hfunc1_sub(tree, m - 1) - : hfunc1(tree, m - 1); + tools_eigen::check_if_in_unit_cube(u); + auto vinecop_cpp = vinecop_wrap(vinecop_r); + auto vine_struct_ = vinecop_cpp.get_rvine_structure(); + auto d = vine_struct_.get_dim(); + auto var_types_ = vinecop_cpp.get_var_types(); + if ((static_cast(u.cols()) != d) && + (static_cast(u.cols()) != 2 * d)) + throw std::runtime_error("data dimension is incompatible with model."); + + auto trunc_lvl = vine_struct_.get_trunc_lvl(); + auto order = vine_struct_.get_order(); + auto inverse_order = tools_stl::invert_permutation(order); + std::vector q(alpha.size()); + for (auto& qq : q) + qq.resize(u.rows()); + + auto do_batch = [&](const tools_batch::Batch& b) { + TriangularArray hfunc2(d + 1, trunc_lvl + 1); + TriangularArray hfunc1(d + 1, trunc_lvl + 1); + TriangularArray hfunc2_sub(d + 1, trunc_lvl + 1); + TriangularArray hfunc1_sub(d + 1, trunc_lvl + 1); + + // data have to be reordered to correspond to natural order + for (size_t j = 0; j < d; ++j) { + hfunc2(0, j) = u.col(order[j] - 1).segment(b.begin, b.size); + hfunc1(0, j) = u.col(order[j] - 1).segment(b.begin, b.size); + if (var_types_[order[j] - 1] == "d") { + hfunc2_sub(0, j) = + u.col(d + order[j] - 1).segment(b.begin, b.size); + hfunc1_sub(0, j) = + u.col(d + order[j] - 1).segment(b.begin, b.size); + } } - if (vine_struct_.needed_hfunc1(tree, edge)) { - hfunc1(tree + 1, edge) = edge_copula.hfunc1(u_e); - if (var_types[1] == "d") { - u_e_sub = u_e; - u_e_sub.col(1) = u_e.col(3); - hfunc1_sub(tree + 1, edge) = edge_copula.hfunc1(u_e_sub); - } - } - hfunc2(tree + 1, edge) = edge_copula.hfunc2(u_e); - if (var_types[0] == "d") { - u_e_sub = u_e; - u_e_sub.col(0) = u_e.col(2); - hfunc2_sub(tree + 1, edge) = edge_copula.hfunc2(u_e_sub); + Eigen::MatrixXd u_e, u_e_sub; + for (size_t tree = 0; tree < trunc_lvl; ++tree) { + tools_interface::check_user_interrupt(u.rows() * d > 1e5); + for (size_t edge = 1; edge < d - tree - 1; ++edge) { + tools_interface::check_user_interrupt(d * u.rows() > 1e5); + auto edge_copula = vinecop_cpp.get_pair_copula(tree, edge); + auto var_types = edge_copula.get_var_types(); + size_t m = vine_struct_.min_array(tree, edge); + + u_e = Eigen::MatrixXd(b.size, 2); + u_e.col(0) = hfunc2(tree, edge); + u_e.col(1) = hfunc1(tree, m - 1); + if ((var_types[0] == "d") || (var_types[1] == "d")) { + u_e.conservativeResize(b.size, 4); + u_e.col(2) = (var_types[0] == "d") ? hfunc2_sub(tree, edge) + : hfunc2(tree, edge); + u_e.col(3) = (var_types[1] == "d") ? hfunc1_sub(tree, m - 1) + : hfunc1(tree, m - 1); + } + + if (vine_struct_.needed_hfunc1(tree, edge)) { + hfunc1(tree + 1, edge) = edge_copula.hfunc1(u_e); + if (var_types[1] == "d") { + u_e_sub = u_e; + u_e_sub.col(1) = u_e.col(3); + hfunc1_sub(tree + 1, edge) = + edge_copula.hfunc1(u_e_sub); + } + } + hfunc2(tree + 1, edge) = edge_copula.hfunc2(u_e); + if (var_types[0] == "d") { + u_e_sub = u_e; + u_e_sub.col(0) = u_e.col(2); + hfunc2_sub(tree + 1, edge) = edge_copula.hfunc2(u_e_sub); + } + } } - } - } - for (size_t a = 0; a < static_cast(alpha.size()); a++) { - hfunc2(trunc_lvl, 0) = Eigen::VectorXd::Constant(b.size, alpha[a]); - for (ptrdiff_t tree = trunc_lvl - 1; tree >= 0; --tree) { - tools_interface::check_user_interrupt(d * u.rows() > 1e3); - Bicop edge_copula = vinecop_cpp.get_pair_copula(tree, 0); - Eigen::MatrixXd U_e(b.size, 2); - U_e.col(0) = hfunc2(tree + 1, 0); - U_e.col(1) = hfunc1(tree, 1); - if (edge_copula.get_var_types()[1] == "d") { - U_e.conservativeResize(b.size, 4); - U_e.col(2) = U_e.col(0); - U_e.col(3) = hfunc1_sub(tree, 1); + for (size_t a = 0; a < static_cast(alpha.size()); a++) { + hfunc2(trunc_lvl, 0) = Eigen::VectorXd::Constant(b.size, alpha[a]); + for (ptrdiff_t tree = trunc_lvl - 1; tree >= 0; --tree) { + tools_interface::check_user_interrupt(d * u.rows() > 1e3); + Bicop edge_copula = vinecop_cpp.get_pair_copula(tree, 0); + Eigen::MatrixXd U_e(b.size, 2); + U_e.col(0) = hfunc2(tree + 1, 0); + U_e.col(1) = hfunc1(tree, 1); + if (edge_copula.get_var_types()[1] == "d") { + U_e.conservativeResize(b.size, 4); + U_e.col(2) = U_e.col(0); + U_e.col(3) = hfunc1_sub(tree, 1); + } + hfunc2(tree, 0) = edge_copula.hinv2(U_e); + } + q[a].segment(b.begin, b.size) = hfunc2(0, 0); } - hfunc2(tree, 0) = edge_copula.hinv2(U_e); - } - q[a].segment(b.begin, b.size) = hfunc2(0, 0); - } - }; + }; - RcppThread::ThreadPool pool((num_threads == 1) ? 0 : num_threads); - pool.map(do_batch, tools_batch::create_batches(u.rows(), num_threads)); - pool.join(); + RcppThread::ThreadPool pool((num_threads == 1) ? 0 : num_threads); + pool.map(do_batch, tools_batch::create_batches(u.rows(), num_threads)); + pool.join(); - return q; + return q; } // [[Rcpp::export]] @@ -230,83 +234,227 @@ cond_dist_cpp(const Eigen::MatrixXd& u, const Rcpp::List& vinecop_r, size_t num_threads) { - tools_eigen::check_if_in_unit_cube(u); - auto vinecop_cpp = vinecop_wrap(vinecop_r); - auto vine_struct_ = vinecop_cpp.get_rvine_structure(); - auto d = vine_struct_.get_dim(); - auto var_types_ = vinecop_cpp.get_var_types(); - if ((static_cast(u.cols()) != d) && - (static_cast(u.cols()) != 2 * d)) - throw std::runtime_error("data dimension is incompatible with model."); - - auto trunc_lvl = vine_struct_.get_trunc_lvl(); - auto order = vine_struct_.get_order(); - auto inverse_order = tools_stl::invert_permutation(order); - - Eigen::VectorXd p(u.rows()); - auto do_batch = [&](const tools_batch::Batch& b) { - Eigen::MatrixXd hfunc1, hfunc2, u_e, hfunc1_sub, hfunc2_sub, u_e_sub; - hfunc1 = Eigen::MatrixXd::Zero(b.size, d); - hfunc2 = Eigen::MatrixXd::Zero(b.size, d); - hfunc1_sub = hfunc1; - hfunc2_sub = hfunc2; - - // data have to be reordered to correspond to natural order - for (size_t j = 0; j < d; ++j) { - hfunc2.col(j) = u.block(b.begin, order[j] - 1, b.size, 1); - if (var_types_[order[j] - 1] == "d") { - hfunc2_sub.col(j) = u.block(b.begin, d + order[j] - 1, b.size, 1); - } - } + tools_eigen::check_if_in_unit_cube(u); + auto vinecop_cpp = vinecop_wrap(vinecop_r); + auto vine_struct_ = vinecop_cpp.get_rvine_structure(); + auto d = vine_struct_.get_dim(); + auto var_types_ = vinecop_cpp.get_var_types(); + if ((static_cast(u.cols()) != d) && + (static_cast(u.cols()) != 2 * d)) + throw std::runtime_error("data dimension is incompatible with model."); + + auto trunc_lvl = vine_struct_.get_trunc_lvl(); + auto order = vine_struct_.get_order(); + auto inverse_order = tools_stl::invert_permutation(order); + + Eigen::VectorXd p(u.rows()); + auto do_batch = [&](const tools_batch::Batch& b) { + Eigen::MatrixXd hfunc1, hfunc2, u_e, hfunc1_sub, hfunc2_sub, u_e_sub; + hfunc1 = Eigen::MatrixXd::Zero(b.size, d); + hfunc2 = Eigen::MatrixXd::Zero(b.size, d); + hfunc1_sub = hfunc1; + hfunc2_sub = hfunc2; + + // data have to be reordered to correspond to natural order + for (size_t j = 0; j < d; ++j) { + hfunc2.col(j) = u.block(b.begin, order[j] - 1, b.size, 1); + if (var_types_[order[j] - 1] == "d") { + hfunc2_sub.col(j) = + u.block(b.begin, d + order[j] - 1, b.size, 1); + } + } - for (size_t tree = 0; tree < trunc_lvl; ++tree) { - for (size_t edge = 0; edge < d - tree - 1; ++edge) { - tools_interface::check_user_interrupt(); - Bicop edge_copula = vinecop_cpp.get_pair_copula(tree, edge); - auto var_types = edge_copula.get_var_types(); - size_t m = vine_struct_.min_array(tree, edge); - - u_e = Eigen::MatrixXd(b.size, 2); - u_e.col(0) = hfunc2.col(edge); - if (m == vine_struct_.struct_array(tree, edge, true)) { - u_e.col(1) = hfunc2.col(m - 1); - } else { - u_e.col(1) = hfunc1.col(m - 1); + for (size_t tree = 0; tree < trunc_lvl; ++tree) { + for (size_t edge = 0; edge < d - tree - 1; ++edge) { + tools_interface::check_user_interrupt(); + Bicop edge_copula = vinecop_cpp.get_pair_copula(tree, edge); + auto var_types = edge_copula.get_var_types(); + size_t m = vine_struct_.min_array(tree, edge); + + u_e = Eigen::MatrixXd(b.size, 2); + u_e.col(0) = hfunc2.col(edge); + if (m == vine_struct_.struct_array(tree, edge, true)) { + u_e.col(1) = hfunc2.col(m - 1); + } else { + u_e.col(1) = hfunc1.col(m - 1); + } + + if ((var_types[0] == "d") || (var_types[1] == "d")) { + u_e.conservativeResize(b.size, 4); + u_e.col(2) = hfunc2_sub.col(edge); + if (m == vine_struct_.struct_array(tree, edge, true)) { + u_e.col(3) = hfunc2_sub.col(m - 1); + } else { + u_e.col(3) = hfunc1_sub.col(m - 1); + } + } + + if (vine_struct_.needed_hfunc1(tree, edge)) { + hfunc1.col(edge) = edge_copula.hfunc1(u_e); + if (var_types[1] == "d") { + u_e_sub = u_e; + u_e_sub.col(1) = u_e.col(3); + hfunc1_sub.col(edge) = edge_copula.hfunc1(u_e_sub); + } + } + + hfunc2.col(edge) = edge_copula.hfunc2(u_e); + if (var_types[0] == "d" && + vine_struct_.needed_hfunc2(tree, edge)) { + u_e_sub = u_e; + u_e_sub.col(0) = u_e.col(2); + hfunc2_sub.col(edge) = edge_copula.hfunc2(u_e_sub); + } + } } + p.segment(b.begin, b.size) = hfunc2.col(0); + }; + + RcppThread::ThreadPool pool((num_threads == 1) ? 0 : num_threads); + pool.map(do_batch, tools_batch::create_batches(u.rows(), num_threads)); + pool.join(); + + return p; +} - if ((var_types[0] == "d") || (var_types[1] == "d")) { - u_e.conservativeResize(b.size, 4); - u_e.col(2) = hfunc2_sub.col(edge); - if (m == vine_struct_.struct_array(tree, edge, true)) { - u_e.col(3) = hfunc2_sub.col(m - 1); - } else { - u_e.col(3) = hfunc1_sub.col(m - 1); - } +// [[Rcpp::export]] +double +cond_loglik_cpp(const Eigen::MatrixXd& u, + const Rcpp::List& vinecop_r, + size_t num_threads) +{ + tools_eigen::check_if_in_unit_cube(u); + auto vinecop_cpp = vinecop_wrap(vinecop_r); + auto rvine_structure_ = vinecop_cpp.get_rvine_structure(); + auto d_ = rvine_structure_.get_dim(); + auto var_types_ = vinecop_cpp.get_var_types(); + auto pair_copulas_ = vinecop_cpp.get_all_pair_copulas(); + if ((static_cast(u.cols()) != d_) && + (static_cast(u.cols()) != 2 * d_)) + throw std::runtime_error("data dimension is incompatible with model."); + + int n_discrete = 0; + for (auto t : var_types_) { + n_discrete += (t == "d"); + } + + // info about the vine structure (reverse rows (!) for more natural + // indexing) + size_t trunc_lvl = rvine_structure_.get_trunc_lvl(); + auto order = rvine_structure_.get_order(); + auto disc_cols = tools_select::get_disc_cols(var_types_); + + // initial value must be 1.0 for multiplication + Eigen::VectorXd pdf = Eigen::VectorXd::Constant(u.rows(), 1.0); + + auto do_batch = [&](const tools_batch::Batch& b) { + // temporary storage objects (all data must be in (0, 1)) + Eigen::MatrixXd hfunc1, hfunc2, u_e, hfunc1_sub, hfunc2_sub, u_e_sub; + hfunc1 = Eigen::MatrixXd::Zero(b.size, d_); + hfunc2 = Eigen::MatrixXd::Zero(b.size, d_); + if (n_discrete > 0) { + hfunc1_sub = hfunc1; + hfunc2_sub = hfunc2; } - if (vine_struct_.needed_hfunc1(tree, edge)) { - hfunc1.col(edge) = edge_copula.hfunc1(u_e); - if (var_types[1] == "d") { - u_e_sub = u_e; - u_e_sub.col(1) = u_e.col(3); - hfunc1_sub.col(edge) = edge_copula.hfunc1(u_e_sub); - } + // fill first row of hfunc2 matrix with evaluation points; + // points have to be reordered to correspond to natural order + for (size_t j = 0; j < d_; ++j) { + hfunc2.col(j) = u.block(b.begin, order[j] - 1, b.size, 1); + if (var_types_[order[j] - 1] == "d") { + hfunc2_sub.col(j) = + u.block(b.begin, d_ + disc_cols[order[j] - 1], b.size, 1); + } } - hfunc2.col(edge) = edge_copula.hfunc2(u_e); - if (var_types[0] == "d" && vine_struct_.needed_hfunc2(tree, edge)) { - u_e_sub = u_e; - u_e_sub.col(0) = u_e.col(2); - hfunc2_sub.col(edge) = edge_copula.hfunc2(u_e_sub); + for (size_t tree = 0; tree < trunc_lvl; ++tree) { + tools_interface::check_user_interrupt( + static_cast(u.rows()) * static_cast(d_) > 1e5); + for (size_t edge = 0; edge < d_ - tree - 1; ++edge) { + tools_interface::check_user_interrupt(edge % 100 == 0); + // extract evaluation point from hfunction matrices (have been + // computed in previous tree level) + Bicop* edge_copula = &pair_copulas_[tree][edge]; + auto var_types = edge_copula->get_var_types(); + size_t m = rvine_structure_.min_array(tree, edge); + + u_e = Eigen::MatrixXd(b.size, 2); + u_e.col(0) = hfunc2.col(edge); + if (m == rvine_structure_.struct_array(tree, edge, true)) { + u_e.col(1) = hfunc2.col(m - 1); + } else { + u_e.col(1) = hfunc1.col(m - 1); + } + + if ((var_types[0] == "d") || (var_types[1] == "d")) { + u_e.conservativeResize(b.size, 4); + u_e.col(2) = hfunc2_sub.col(edge); + if (m == rvine_structure_.struct_array(tree, edge, true)) { + u_e.col(3) = hfunc2_sub.col(m - 1); + } else { + u_e.col(3) = hfunc1_sub.col(m - 1); + } + } + + if (edge == 0) { + pdf.segment(b.begin, b.size) = + pdf.segment(b.begin, b.size) + .cwiseProduct(edge_copula->pdf(u_e)); + } + + // h-functions are only evaluated if needed in next step + if (rvine_structure_.needed_hfunc1(tree, edge)) { + hfunc1.col(edge) = edge_copula->hfunc1(u_e); + if (var_types[1] == "d") { + u_e_sub = u_e; + u_e_sub.col(1) = u_e.col(3); + hfunc1_sub.col(edge) = edge_copula->hfunc1(u_e_sub); + } + } + if (rvine_structure_.needed_hfunc2(tree, edge)) { + hfunc2.col(edge) = edge_copula->hfunc2(u_e); + if (var_types[0] == "d") { + u_e_sub = u_e; + u_e_sub.col(0) = u_e.col(2); + hfunc2_sub.col(edge) = edge_copula->hfunc2(u_e_sub); + } + } + } } - } + }; + + if (trunc_lvl > 0) { + tools_thread::ThreadPool pool((num_threads == 1) ? 0 : num_threads); + pool.map(do_batch, tools_batch::create_batches(u.rows(), num_threads)); + pool.join(); } - p.segment(b.begin, b.size) = hfunc2.col(0); - }; - RcppThread::ThreadPool pool((num_threads == 1) ? 0 : num_threads); - pool.map(do_batch, tools_batch::create_batches(u.rows(), num_threads)); - pool.join(); + return pdf.array().log().sum(); +} + +// [[Rcpp::export()]] +Rcpp::List +with_parameters_cop_cpp(const Rcpp::List& vinecop_r, + const Eigen::VectorXd parameters) +{ + auto vc = vinecop_wrap(vinecop_r); + auto d = vc.get_dim(); + auto pcs = vc.get_all_pair_copulas(); + + size_t i = 0; + for (size_t t = 0; t < d - 1; t++) { + for (size_t e = 0; e < std::min(d, d - t - 1); e++) { + if (pcs[t][e].get_family() == BicopFamily::indep) + continue; + auto lb = pcs[t][e].get_parameters_lower_bounds(); + auto ub = pcs[t][e].get_parameters_upper_bounds(); + auto new_pars = + parameters.segment(i, lb.size()).cwiseMax(lb).cwiseMin(ub); + pcs[t][e].set_parameters(new_pars); + i += lb.size(); + } + } - return p; + vc.set_all_pair_copulas(pcs); + return vinecop_wrap(vc, false); } diff --git a/tests/testthat/test-test-cpit.R b/tests/testthat/test-test-cpit.R index a8dac86..71b3014 100644 --- a/tests/testthat/test-test-cpit.R +++ b/tests/testthat/test-test-cpit.R @@ -42,3 +42,48 @@ test_that("works on uscale", { p <- cpit(fit, dat) expect_true(all(p >= 0 & p <= 1)) }) + + +context("cll()") + +# simulate data +set.seed(3) +x <- matrix(rnorm(30), 10, 3) +y <- x %*% c(1, -1, 2) +dat <- data.frame(y = y, x = x, z = as.factor(rbinom(10, 3, 0.5))) +fit <- vinereg(y ~ ., family = "gauss", dat) + +test_that("catches missing variables", { + expect_error(cll(fit, dat[1:2])) +}) + +test_that("catches incorrect type", { + dat[2] <- ordered(1:10) + expect_error(cll(fit, dat)) +}) + +test_that("catches incorrect levels", { + levels(dat[[5]]) <- 1:50 + expect_error(cll(fit, dat)) +}) + +test_that("works in bivariate case", { + fit <- vinereg(y ~ ., dat[1:2]) + expect_equal(cll(fit, dat[1:2]), fit$stats$cll) +}) + +test_that("works with continuous response", { + expect_equal(cll(fit, dat), fit$stats$cll) +}) + +test_that("works with discrete response", { + dat$y <- as.ordered(dat$y) + fit <- vinereg(y ~ ., dat, fam = "tll") + expect_equal(cll(fit, dat), fit$stats$cll, 0.1) +}) + +test_that("works on uscale", { + dat[] <- runif(nrow(dat) * ncol(dat)) + fit <- vinereg(y ~ ., dat, uscale = TRUE) + expect_equal(cll(fit, dat), fit$stats$cll) +})