Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
* `ragnar_inspector()` now renders all urls as clickable links in the chunk markdown
viewer, even if url is not a formal markdown link (#82).

* Before running examples and tests we now check if ragnar can load DuckDB extensions.
This fixes issues in environments where DuckDB pre-built binaries for extensions are not
compatible with the installed DuckDB version (#94).

# ragnar 0.2.0

* `ragnar_store_create()` gains a new argument: `version`, with default `2`.
Expand Down
39 changes: 39 additions & 0 deletions R/aaa-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,42 @@ prop_string <- function(
)
)
}

can_load_duckdb_extensions <- local({
# DuckDB extensions are shared libraries downloaded when
# running `INSTAll <name>`. They are pre-built by the DuckDB
# team https://github.com/duckdb/extension-ci-tools
# They are built for the major platforms using the standard
# compilers.
# One of the CRAN test server is a Linux machine with R compiled
# with clang instead of GCC. Turns since the compilers have different
# ABIs, a crash happens when trying to execute extensions that are
# pre-built on GCC.
# To avoid the crash on CRAN machines, we check if the extensions can
# be loaded in a separate process and proceed if that's possible.
can <- NULL
function() {
if (is.null(can)) {
can <<- 0 == system2(
rscript_exe(),
"-",
input = c(
"con <- DBI::dbConnect(duckdb::duckdb())",
"DBI::dbExecute(con, 'INSTALL fts; LOAD fts;')",
"DBI::dbExecute(con, 'INSTALL vss; LOAD vss;')"
),
stderr = FALSE,
stdout = FALSE
)
}
can
}
})

rscript_exe <- function() {
file.path(
R.home("bin"),
if (is_windows()) "Rscript.exe" else "Rscript"
)
}

2 changes: 1 addition & 1 deletion R/retrieve.R
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ ragnar_retrieve_vss_and_bm25 <- function(store, text, top_k = 3, ...) {
#' represents a chunk and always contains a `text` column.
#'
#' @family ragnar_retrieve
#' @examplesIf (rlang::is_installed("dbplyr") && nzchar(Sys.getenv("OPENAI_API_KEY")))
#' @examplesIf (rlang::is_installed("dbplyr") && nzchar(Sys.getenv("OPENAI_API_KEY")) && ragnar:::can_load_duckdb_extensions())
#' ## Build a small store with categories
#' store <- ragnar_store_create(
#' embed = \(x) ragnar::embed_openai(x, model = "text-embedding-3-small"),
Expand Down
2 changes: 1 addition & 1 deletion R/store.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#'
#' @returns a `RagnarStore` object
#' @export
#' @examples
#' @examplesIf ragnar:::can_load_duckdb_extensions()
#' # A store with a dummy embedding
#' store <- ragnar_store_create(
#' embed = \(x) matrix(stats::runif(10), nrow = length(x), ncol = 10),
Expand Down
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/sh
"${R_HOME}/bin/Rscript" tools/configure_reticulate.R
"${R_HOME}/bin/Rscript" tools/configure_deps.R
2 changes: 1 addition & 1 deletion configure.win
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/bin/sh
"${R_HOME}/bin/Rscript" tools/configure_reticulate.R
"${R_HOME}/bin/Rscript" tools/configure_deps.R
2 changes: 1 addition & 1 deletion man/ragnar_retrieve.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ragnar_retrieve_vss.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/ragnar_retrieve_vss_and_bm25.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions man/ragnar_store_create.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/helper-doc.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ skip_on_cran <- function() {
skip_if(maybe_on_cran(), "Maybe On CRAN")
}

skip_if_cant_load_duckdb_extensions <- function() {
if (!can_load_duckdb_extensions()) {
testthat::skip("DuckDB extensions cannot be loaded")
}
}

skip_if_cant_use_motherduck <- function() {
skip_if_cant_load_duckdb_extensions()
if (Sys.getenv("motherduck_token") == "") {
testthat::skip("motherduck_token not set")
}
Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test-extra-cols.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
test_that("extra cols works", {
skip_on_cran() # See comment in test-retrieve.R and test-read-markdown.R
skip_if_cant_load_duckdb_extensions()
store <- ragnar_store_create(
version = 2,
embed = \(x) matrix(nrow = length(x), ncol = 100, stats::runif(100)),
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-retrieve.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ system.time(test_that("retrieving works as expected, v1", {
# > Running R code in 'testthat.R' had CPU time 2.6 times elapsed time
# Unfortunately, this means we can't test properly on CRAN.
skip_on_cran()
skip_if_cant_load_duckdb_extensions()

# Create a simple store and insert some chunks
store <- ragnar_store_create(
Expand Down Expand Up @@ -57,6 +58,7 @@ system.time(test_that("retrieving works as expected, v1", {

test_that("retrieving works as expected", {
skip_on_cran() # See comment (above) in test-retrieve.R
skip_if_cant_load_duckdb_extensions()
# Create a simple store and insert some chunks
store <- ragnar_store_create(
embed = \(x) matrix(nrow = length(x), ncol = 100, stats::runif(100))
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-store.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ test_that("additional columns", {

test_that("Allow a NULL embedding function", {
skip_on_cran() # See comment in test-retrieve.R
skip_if_cant_load_duckdb_extensions()
store <- ragnar_store_create(embed = NULL, version = 1)
maybe_set_threads(store)
chunks <- data.frame(
Expand Down Expand Up @@ -302,6 +303,7 @@ test_that("embed functions get the defaults stored", {

test_that("store v1 accepts markdown chunks (from v2)", {
skip_on_cran() # See comment in test-retrieve.R
skip_if_cant_load_duckdb_extensions()
store <- ragnar_store_create(
version = 1,
embed = \(x) matrix(nrow = length(x), ncol = 100, stats::runif(100))
Expand Down
48 changes: 48 additions & 0 deletions tools/configure_deps.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# tools/configure_reticulate.R

# Make reticulate setup the ephemeral venv in advance,
# primarily so CRAN examples run quickly and don't trigger a warning

Sys.setenv("RETICULATE_PYTHON" = "managed")

library(reticulate)
py_require(c(
"markitdown[all]",
if (identical(.Platform$OS.type, "windows")) {
py_require("onnxruntime<=1.20.1")
}
))
try({
print(py_config())
import("markitdown")
})



rscript_exe <- function() {
file.path(
R.home("bin"),
if (is_windows()) "Rscript.exe" else "Rscript"
)
}

load_duckdb_extensions_in_subprocess <- function() {
# download duckdb extensions (which are also cached by duckdb)
# same motivation as reticulate, avoid NOTE due to first-run download:
# 'Examples with CPU (user + system) or elapsed time > 5s
# We do this in a subprocess in case of segfaults with mismatched ABI,
# see comments in package code.
try(system2(
rscript_exe(),
"-",
input = c(
"con <- DBI::dbConnect(duckdb::duckdb())",
"DBI::dbExecute(con, 'INSTALL fts; INSTALL vss;')",
"DBI::dbExecute(con, 'LOAD fts; LOAD vss;')"
)
))
}

load_duckdb_extensions_in_subprocess()

NULL
18 changes: 0 additions & 18 deletions tools/configure_reticulate.R

This file was deleted.

Loading