diff --git a/.github/shiny-workflows/package-install.sh b/.github/shiny-workflows/package-install.sh new file mode 100755 index 000000000..e717ce520 --- /dev/null +++ b/.github/shiny-workflows/package-install.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# ggsql (Suggests) requires unixODBC to compile from source on macOS +if [ "$RUNNER_OS" == "macOS" ]; then + brew install unixodbc +fi diff --git a/js/build.mjs b/js/build.mjs index a07f83844..6b95400d0 100644 --- a/js/build.mjs +++ b/js/build.mjs @@ -20,6 +20,10 @@ const jsTargets = [ source: "src/viz.ts", output: "../pkg-py/src/querychat/static/js/viz.js", }, + { + source: "src/viz.ts", + output: "../pkg-r/inst/htmldep/viz.js", + }, ]; const cssTargets = [ @@ -27,6 +31,10 @@ const cssTargets = [ source: "src/viz.css", output: "../pkg-py/src/querychat/static/css/viz.css", }, + { + source: "src/viz.css", + output: "../pkg-r/inst/htmldep/viz.css", + }, ]; const ensureParentDir = async (relativePath) => { @@ -81,10 +89,9 @@ const reportMissingSources = async () => { }; export const stageBuildOutputs = async (stageDir) => { - const cssSourcePath = path.resolve(rootDir, "src/viz.css"); - const cssSource = await readFile(cssSourcePath, "utf8"); - for (const target of cssTargets) { + const cssSourcePath = path.resolve(rootDir, target.source); + const cssSource = await readFile(cssSourcePath, "utf8"); const outputPath = resolveOutputPath(stageDir, target.output); await mkdir(path.dirname(outputPath), { recursive: true }); await writeFile(outputPath, `${banner(target.source)}${cssSource}`, "utf8"); diff --git a/pkg-r/DESCRIPTION b/pkg-r/DESCRIPTION index 6cda087e6..487a8c331 100644 --- a/pkg-r/DESCRIPTION +++ b/pkg-r/DESCRIPTION @@ -22,7 +22,8 @@ BugReports: https://github.com/posit-dev/querychat/issues Depends: R (>= 4.1.0) Imports: - bslib, + bsicons, + bslib (>= 0.11.0), cli, DBI, ellmer (>= 0.4.1), @@ -37,17 +38,19 @@ Imports: utils, whisker Suggests: - bsicons, dbplyr, dplyr, DT, duckdb, + ggsql, knitr, palmerpenguins, rmarkdown, RSQLite, + rsvg, shinytest2, testthat (>= 3.0.0), + V8, withr VignetteBuilder: knitr diff --git a/pkg-r/R/QueryChat.R b/pkg-r/R/QueryChat.R index 1d1b4f91d..f08e5fabe 100644 --- a/pkg-r/R/QueryChat.R +++ b/pkg-r/R/QueryChat.R @@ -137,8 +137,10 @@ QueryChat <- R6::R6Class( create_session_client = function( client_spec = NULL, tools = NA, + session = NULL, update_dashboard = function(query, title) {}, - reset_dashboard = function() {} + reset_dashboard = function() {}, + visualize = function(data) {} ) { spec <- client_spec %||% private$.client_spec chat <- as_querychat_client(spec) @@ -169,6 +171,21 @@ QueryChat <- R6::R6Class( chat$register_tool(tool_query(private$.data_source)) } + if ("visualize" %in% tools) { + rlang::check_installed( + "ggsql", + reason = "for visualization support." + ) + chat$register_tool( + tool_visualize_dashboard( + private$.data_source, + session = session, + update_fn = visualize, + has_tool_query = "query" %in% tools + ) + ) + } + chat } ), @@ -250,7 +267,11 @@ QueryChat <- R6::R6Class( # Validate arguments check_string(id, allow_null = TRUE) check_string(greeting, allow_null = TRUE) - arg_match(tools, values = c("filter", "update", "query"), multiple = TRUE) + arg_match( + tools, + values = c("filter", "update", "query", "visualize"), + multiple = TRUE + ) tools <- normalize_tools(tools) check_string(data_description, allow_null = TRUE) check_number_whole(categorical_threshold, min = 1) @@ -321,17 +342,25 @@ QueryChat <- R6::R6Class( #' `title` generated by the LLM for the `update_dashboard` tool. #' @param reset_dashboard Optional function to call when the #' `reset_dashboard` tool is called. + #' @param visualize Optional function to call with a list containing + #' `ggsql`, `title`, and `widget_id` when a visualization succeeds. + #' @param session A Shiny session object. Required when `"visualize"` is + #' in `tools` and you want interactive chart rendering. When `NULL` + #' (the default), visualizations still execute but are not rendered + #' as Shiny outputs. client = function( tools = NA, update_dashboard = function(query, title) {}, - reset_dashboard = function() {} + reset_dashboard = function() {}, + visualize = function(data) {}, + session = NULL ) { private$require_data_source("$client") if (!is_na(tools) && !is.null(tools)) { tools <- arg_match( tools, - values = c("filter", "update", "query"), + values = c("filter", "update", "query", "visualize"), multiple = TRUE ) tools <- normalize_tools(tools) @@ -339,8 +368,10 @@ QueryChat <- R6::R6Class( private$create_session_client( tools = tools, + session = session, update_dashboard = update_dashboard, - reset_dashboard = reset_dashboard + reset_dashboard = reset_dashboard, + visualize = visualize ) }, @@ -421,7 +452,6 @@ QueryChat <- R6::R6Class( app_obj = function(..., bookmark_store = "url") { private$require_data_source("$app_obj") check_installed("DT") - check_installed("bsicons") check_dots_empty() table_name <- private$.data_source$table_name @@ -709,6 +739,7 @@ QueryChat <- R6::R6Class( data_source = private$.data_source, greeting = self$greeting, client = create_session_client, + tools = self$tools, enable_bookmarking = enable_bookmarking ) }, diff --git a/pkg-r/R/QueryChatSystemPrompt.R b/pkg-r/R/QueryChatSystemPrompt.R index 694355075..ba11d454b 100644 --- a/pkg-r/R/QueryChatSystemPrompt.R +++ b/pkg-r/R/QueryChatSystemPrompt.R @@ -101,10 +101,24 @@ QueryChatSystemPrompt <- R6::R6Class( extra_instructions = self$extra_instructions, has_tool_update = if ("update" %in% tools) "true", has_tool_query = if ("query" %in% tools) "true", + has_tool_visualize = if ("visualize" %in% tools) "true", include_query_guidelines = if (length(tools) > 0) "true" ) - whisker::whisker.render(self$template, context) + partials <- list() + syntax_path <- system.file( + "prompts", + "ggsql-syntax.md", + package = "querychat" + ) + if (nzchar(syntax_path)) { + partials[["ggsql-syntax"]] <- paste( + readLines(syntax_path), + collapse = "\n" + ) + } + + whisker::whisker.render(self$template, context, partials = partials) } ) ) diff --git a/pkg-r/R/querychat_module.R b/pkg-r/R/querychat_module.R index 3182ced59..3af8a2746 100644 --- a/pkg-r/R/querychat_module.R +++ b/pkg-r/R/querychat_module.R @@ -26,6 +26,7 @@ mod_server <- function( data_source, greeting, client, + tools, enable_bookmarking = FALSE ) { shiny::moduleServer(id, function(input, output, session) { @@ -62,11 +63,24 @@ mod_server <- function( querychat_tool_result(action = "reset") } + # Non-reactive bookkeeping for bookmark save/restore of viz widgets + viz_widgets <- list() + + on_visualize <- function(data) { + viz_widgets[[length(viz_widgets) + 1L]] <<- list( + widget_id = data$widget_id, + ggsql = data$ggsql + ) + } + # Set up the chat object for this session check_function(client) chat <- client( update_dashboard = update_dashboard, - reset_dashboard = reset_query + reset_dashboard = reset_query, + visualize = on_visualize, + tools = tools, + session = session ) # Prepopulate the chat UI with a welcome message that appears to be from the @@ -129,6 +143,9 @@ mod_server <- function( state$values$querychat_sql <- current_query() state$values$querychat_title <- current_title() state$values$querychat_has_greeted <- has_greeted() + if (length(viz_widgets) > 0) { + state$values$querychat_viz_widgets <- viz_widgets + } }) shiny::onRestore(function(state) { @@ -141,6 +158,14 @@ mod_server <- function( if (!is.null(state$values$querychat_has_greeted)) { has_greeted(state$values$querychat_has_greeted) } + if (!is.null(state$values$querychat_viz_widgets)) { + restored <- restore_viz_widgets( + data_source, + state$values$querychat_viz_widgets, + session + ) + viz_widgets <<- restored + } }) } @@ -159,3 +184,35 @@ GREETING_PROMPT <- paste( "Include a few sample suggestions grouped under ##### headings,", "using the suggestion card format from your instructions." ) + +restore_viz_widgets <- function(data_source, saved_widgets, session) { + if (!rlang::is_installed("ggsql")) { + warning( + "ggsql is not installed; skipping restoration of visualization widgets.", + call. = FALSE + ) + return(list()) + } + + restored <- list() + for (entry in saved_widgets) { + tryCatch( + { + validated <- ggsql::ggsql_validate(entry$ggsql) + spec <- execute_ggsql(data_source, validated) + session$output[[entry$widget_id]] <- ggsql::renderGgsql(spec) + restored <- c(restored, list(entry)) + }, + error = function(e) { + warning( + sprintf( + "Failed to restore visualization widget '%s' on bookmark restore.", + entry$widget_id + ), + call. = FALSE + ) + } + ) + } + restored +} diff --git a/pkg-r/R/querychat_tools.R b/pkg-r/R/querychat_tools.R index 60e99af9c..03f33216a 100644 --- a/pkg-r/R/querychat_tools.R +++ b/pkg-r/R/querychat_tools.R @@ -92,8 +92,13 @@ tool_query <- function(data_source) { db_type <- data_source$get_db_type() ellmer::tool( - function(query, `_intent` = "") { - querychat_tool_result(data_source, query, action = "query") + function(query, `_intent` = "", collapsed = FALSE) { + querychat_tool_result( + data_source, + query, + action = "query", + collapsed = collapsed + ) }, name = "querychat_query", description = interpolate_package("tool-query.md", db_type = db_type), @@ -106,6 +111,10 @@ tool_query <- function(data_source) { ), `_intent` = ellmer::type_string( "A brief, user-friendly description of what this query calculates or retrieves." + ), + collapsed = ellmer::type_boolean( + "Optional (default: false). Set to true for exploratory or preparatory queries whose results aren't the primary answer. When true, the result card starts collapsed.", + required = FALSE ) ), annotations = ellmer::tool_annotations( @@ -161,7 +170,8 @@ querychat_tool_result <- function( data_source, query, title = NULL, - action = "update" + action = "update", + collapsed = NULL ) { action <- arg_match(action, c("update", "query", "reset")) @@ -231,7 +241,11 @@ querychat_tool_result <- function( title = if (action == "update" && !is.null(title)) title, show_request = is_error, markdown = display_md, - open = querychat_tool_starts_open(action) + open = if (!is.null(collapsed)) { + !collapsed + } else { + querychat_tool_starts_open(action) + } ) ) ) diff --git a/pkg-r/R/querychat_viz.R b/pkg-r/R/querychat_viz.R new file mode 100644 index 000000000..55b8cab2a --- /dev/null +++ b/pkg-r/R/querychat_viz.R @@ -0,0 +1,389 @@ +tool_visualize_dashboard <- function( + data_source, + session, + update_fn = function(data) {}, + has_tool_query = FALSE +) { + check_data_source(data_source) + check_function(update_fn) + + db_type <- data_source$get_db_type() + + ellmer::tool( + tool_visualize_impl(data_source, session, update_fn), + name = "querychat_visualize", + description = render_viz_tool_description( + db_type = db_type, + has_tool_query = has_tool_query + ), + arguments = list( + ggsql = ellmer::type_string( + ellmer::interpolate( + "A full ggsql query. Must include a VISUALISE clause and at least one DRAW clause. The SELECT portion uses {{db_type}} SQL; VISUALISE and MAPPING accept column names only, not expressions. Do NOT include `LABEL title => ...` in the query -- use the `title` parameter instead.", + db_type = db_type + ) + ), + title = ellmer::type_string( + "A brief, user-friendly title for this visualization. This is displayed as the card header above the chart." + ) + ), + annotations = ellmer::tool_annotations( + title = "Query Visualization", + icon = viz_icon() + ) + ) +} + +tool_visualize_impl <- function(data_source, session, update_fn) { + force(data_source) + force(session) + force(update_fn) + + function(ggsql, title) { + visualize_result(data_source, session, update_fn, ggsql, title) + } +} + +random_hex <- function(n_bytes = 8) { + paste0( + format(as.hexmode(sample(256, n_bytes, replace = TRUE) - 1L), width = 2), + collapse = "" + ) +} + +visualize_result <- function( + data_source, + session, + update_fn, + ggsql_str, + title +) { + rlang::check_installed("ggsql", reason = "for visualization support.") + + validated <- ggsql::ggsql_validate(ggsql_str) + has_visual <- ggsql::ggsql_has_visual(validated) + + if (!has_visual) { + rlang::abort( + "Query must include a VISUALISE clause. Use querychat_query for queries without visualization." + ) + } + + if (!isTRUE(validated$valid)) { + rlang::abort(collapse_validation_errors(validated)) + } + + spec <- execute_ggsql(data_source, validated) + + widget_id <- paste0("querychat_viz_", random_hex()) + + viz_container <- NULL + if (!is.null(session)) { + session$output[[widget_id]] <- ggsql::renderGgsql(spec) + viz_container <- htmltools::div( + class = "querychat-viz-container", + bslib::as_fill_carrier(), + ggsql::ggsqlOutput(session$ns(widget_id)), + viz_dep() + ) + } else { + # Non-Shiny usage: print the Spec to display via the interactive viewer + print(spec) + update_fn(list(ggsql = ggsql_str, title = title, widget_id = widget_id)) + return(ellmer::ContentToolResult( + value = sprintf( + "Chart displayed%s.", + if (nzchar(title)) sprintf(" with title '%s'", title) else "" + ) + )) + } + + # PNG snapshot for LLM feedback (best-effort; requires V8 + rsvg) + png_file <- tempfile(fileext = ".png") + on.exit(unlink(png_file), add = TRUE) + png_content <- tryCatch( + { + ggsql::ggsql_save(spec, png_file, width = 500, height = 300) + ellmer::content_image_file(png_file) + }, + error = function(e) { + cli::cli_warn(c( + "Unable to render PNG preview for the visualization card.", + "i" = "The interactive chart will still render, but the LLM will not receive the static image preview for this visualization.", + "i" = "PNG preview generation requires optional dependencies used by {.fn ggsql::ggsql_save}, typically {.pkg V8} and {.pkg rsvg}.", + "x" = "Underlying error: {.msg {conditionMessage(e)}}" + )) + NULL + } + ) + + title_display <- if (nzchar(title)) { + sprintf(" with title '%s'", title) + } else { + "" + } + text <- sprintf("Chart displayed%s.", title_display) + + # All list elements must be Content S7 objects for ellmer's + # expand_content_if_needed() to handle mixed text+image results. + value <- if (!is.null(png_content)) { + list(ellmer::ContentText(text), png_content) + } else { + text + } + + update_fn(list(ggsql = ggsql_str, title = title, widget_id = widget_id)) + + footer <- build_viz_footer( + ggsql_str, + title, + widget_id, + dom_widget_id = session$ns(widget_id) + ) + extra <- list( + display = list( + html = viz_container, + title = if (nzchar(title)) title else "Query Visualization", + show_request = FALSE, + open = querychat_tool_starts_open("visualize"), + full_screen = TRUE, + icon = viz_icon(), + footer = footer + ) + ) + + ellmer::ContentToolResult(value = value, extra = extra) +} + +collapse_validation_errors <- function(validated) { + errors <- validated$errors + if (is.null(errors) || !nrow(errors)) { + return("Invalid ggsql query.") + } + + messages <- errors$message + messages <- messages[!is.na(messages) & nzchar(messages)] + if (!length(messages)) { + return("Invalid ggsql query.") + } + + paste(messages, collapse = "\n") +} + +build_viz_footer <- function( + ggsql_str, + title, + widget_id, + dom_widget_id +) { + footer_id <- paste0("querychat_footer_", random_hex()) + query_section_id <- paste0(footer_id, "_query") + code_editor_id <- paste0(footer_id, "_code") + + code_editor <- bslib::input_code_editor( + id = code_editor_id, + value = ggsql_str, + language = "ggsql", + read_only = TRUE, + line_numbers = FALSE, + height = "auto", + theme_dark = "github-dark" + ) + + query_section <- shiny::tags$div( + class = "querychat-query-section", + id = query_section_id, + code_editor + ) + + buttons_row <- shiny::tags$div( + class = "querychat-footer-buttons", + # Left: Show Query toggle + shiny::tags$div( + class = "querychat-footer-left", + shiny::tags$button( + class = "querychat-show-query-btn", + `data-querychat-action` = "show-query", + `data-target` = query_section_id, + shiny::tags$span(class = "querychat-query-chevron", "\u25b6"), + shiny::tags$span(class = "querychat-query-label", "Show Query") + ) + ), + # Right: Save dropdown + shiny::tags$div( + class = "querychat-footer-right", + shiny::tags$div( + class = "querychat-save-dropdown", + shiny::tags$button( + class = "querychat-save-btn", + `data-querychat-action` = "save-toggle", + `data-widget-id` = dom_widget_id, + bsicons::bs_icon("download", class = "querychat-icon"), + "Save", + bsicons::bs_icon("chevron-down", class = "querychat-dropdown-chevron") + ), + shiny::tags$div( + class = "querychat-save-menu", + shiny::tags$button( + class = "querychat-save-png-btn", + `data-querychat-action` = "save-png", + `data-widget-id` = dom_widget_id, + `data-title` = title, + "Save as PNG" + ), + shiny::tags$button( + class = "querychat-save-svg-btn", + `data-querychat-action` = "save-svg", + `data-widget-id` = dom_widget_id, + `data-title` = title, + "Save as SVG" + ) + ) + ) + ) + ) + + htmltools::tagList(buttons_row, query_section) +} + +viz_icon <- function() { + '' +} + +viz_dep <- function() { + htmltools::htmlDependency( + name = "querychat-viz", + version = utils::packageVersion("querychat"), + package = "querychat", + src = "htmldep", + stylesheet = "viz.css", + script = "viz.js" + ) +} + +render_viz_tool_description <- function(db_type, has_tool_query = FALSE) { + path <- system.file("prompts", "tool-visualize.md", package = "querychat") + stopifnot(nzchar(path), file.exists(path)) + template <- paste(readLines(path, warn = FALSE), collapse = "\n") + whisker::whisker.render( + template, + list( + db_type = db_type, + has_tool_query = if (isTRUE(has_tool_query)) "true" + ) + ) +} + +#' Execute a pre-validated ggsql query against a DataSource +#' +#' Executes the SQL portion through a DataSource (preserving database pushdown), +#' then feeds the result into a ggsql DuckDB reader to produce a Spec. +#' +#' @param data_source A querychat DataSource R6 object. +#' @param validated A pre-validated ggsql query (from `ggsql::ggsql_validate()`). +#' Must be a list with `$sql` and `$visual` fields. +#' +#' @return A `ggsql::Spec` R6 object (the writer-independent plot specification). +#' +#' @keywords internal +execute_ggsql <- function(data_source, validated) { + rlang::check_installed("ggsql", reason = "for visualization support.") + + visual <- validated$visual + + if (has_layer_level_source(visual)) { + cli::cli_abort( + "Layer-specific sources are not currently supported in querychat visual queries. Rewrite the query so that all layers come from the final SQL result." + ) + } + + df <- data_source$execute_query(validated$sql) + + if (inherits(df, "tbl_sql")) { + # Materialize the query for ggsql, {dplyr} guaranteed by TblSqlSource + df <- dplyr::collect(df) + } + + reader <- ggsql::duckdb_reader() + table <- extract_visualise_table(visual) + + if (!is.null(table)) { + # VISUALISE [mappings] FROM — register data under the + # referenced table name and execute the visual part directly. + name <- if (startsWith(table, '"') && endsWith(table, '"')) { + substr(table, 2, nchar(table) - 1) + } else { + table + } + ggsql::ggsql_register(reader, df, name) + ggsql::ggsql_execute(reader, visual) + } else { + # SELECT ... VISUALISE — no FROM in VISUALISE clause, so register + # under a synthetic name and prepend a SELECT. + ggsql::ggsql_register(reader, df, "_data") + ggsql::ggsql_execute(reader, paste("SELECT * FROM _data", visual)) + } +} + +#' Extract the table name from a VISUALISE clause's FROM, if present +#' +#' Looks only in the portion of the visual string before the first DRAW keyword, +#' so FROM clauses inside DRAW (e.g., MAPPING x FROM other) are ignored. +#' +#' @param visual A ggsql VISUALISE string. +#' @return The table name string (possibly quoted), or `NULL` if not present. +#' +#' @keywords internal +extract_visualise_table <- function(visual) { + draw_pos <- regexpr("\\bDRAW\\b", visual, ignore.case = TRUE, perl = TRUE) + vis_clause <- if (draw_pos > 0) substr(visual, 1, draw_pos - 1L) else visual + m <- regmatches( + vis_clause, + regexpr( + '\\bFROM\\s+("[^"]+?"|\\S+)', + vis_clause, + ignore.case = TRUE, + perl = TRUE + ) + ) + if (length(m) == 0 || !nzchar(m)) { + return(NULL) + } + sub("^(?i)FROM\\s+", "", m, perl = TRUE) +} + +#' Detect whether a VISUALISE string has a layer-level FROM source +#' +#' Returns `TRUE` when a DRAW clause defines its own `FROM ` via a +#' MAPPING sub-clause. Querychat replays VISUALISE against a single local +#' relation, so layer-specific sources cannot be preserved reliably. +#' +#' @param visual A ggsql VISUALISE string. +#' @return `TRUE` if any DRAW clause contains a MAPPING ... FROM source. +#' +#' @keywords internal +has_layer_level_source <- function(visual) { + # Split at clause boundaries (DRAW, SCALE, etc.) using a lookbehind for + # whitespace rather than \b, which can split mid-word in R's PCRE engine. + clauses <- strsplit( + visual, + "(?i)(?<=\\s)(?=DRAW|SCALE|PROJECT|FACET|PLACE|LABEL|THEME)", + perl = TRUE + )[[1]] + for (clause in clauses) { + if (!grepl("^\\s*DRAW\\b", clause, ignore.case = TRUE, perl = TRUE)) { + next + } + if ( + grepl( + "\\bMAPPING\\b[\\s\\S]*?\\bFROM\\s+(\"[^\"]+?\"|\\S+)", + clause, + ignore.case = TRUE, + perl = TRUE + ) + ) { + return(TRUE) + } + } + FALSE +} diff --git a/pkg-r/README.md b/pkg-r/README.md index d6f70fa0a..ca99f97fb 100644 --- a/pkg-r/README.md +++ b/pkg-r/README.md @@ -52,6 +52,11 @@ querychat can also handle more general questions about the data that require cal ![](man/figures/quickstart-summary.png){alt="Screenshot of the querychat's app with a summary statistic inlined in the chat." class="rounded shadow"} +querychat can also create visualizations, powered by [ggsql](https://ggsql.org/). +With the [visualization tool](https://posit-dev.github.io/querychat/r/articles/visualize.html) enabled, ask for a chart and it appears inline in the conversation: + +![](man/figures/viz-bar-chart.png){alt="Bar chart showing average body mass by penguin species." class="rounded shadow"} + ## Custom apps querychat is designed to be highly extensible -- it provides programmatic access to the chat interface, the filtered/sorted data frame, SQL queries, and more. @@ -93,6 +98,7 @@ From here, you might want to learn more about: - [Models](https://posit-dev.github.io/querychat/r/articles/models.html): customize the LLM behind querychat. - [Data sources](https://posit-dev.github.io/querychat/r/articles/data-sources.html): different data sources you can use with querychat. - [Provide context](https://posit-dev.github.io/querychat/r/articles/context.html): provide the LLM with the context it needs to work well. +- [Visualizations](https://posit-dev.github.io/querychat/r/articles/visualize.html): create inline charts powered by ggsql. - [Build an app](https://posit-dev.github.io/querychat/r/articles/build.html): design a custom Shiny app around querychat. - [Greet users](https://posit-dev.github.io/querychat/r/articles/greet.html): create welcoming onboarding experiences. - [Tools](https://posit-dev.github.io/querychat/r/articles/tools.html): understand what querychat can do under the hood. diff --git a/pkg-r/inst/examples-shiny/10-viz-app/app.R b/pkg-r/inst/examples-shiny/10-viz-app/app.R new file mode 100644 index 000000000..fbed89e73 --- /dev/null +++ b/pkg-r/inst/examples-shiny/10-viz-app/app.R @@ -0,0 +1,32 @@ +library(shiny) +library(bslib) +library(querychat) +library(palmerpenguins) + +qc <- QueryChat$new( + penguins, + tools = c("update", "query", "visualize"), + data_description = paste( + "The Palmer Penguins dataset contains measurements of bill", + "dimensions, flipper length, body mass, sex, and species", + "(Adelie, Chinstrap, and Gentoo) collected from three islands in", + "the Palmer Archipelago, Antarctica." + ) +) + +ui <- page_sidebar( + title = "querychat viz demo", + sidebar = qc$sidebar(width = 400, open = TRUE, position = "right"), + card( + full_screen = TRUE, + card_header("Data"), + DT::DTOutput("dt") + ) +) + +server <- function(input, output, session) { + qc_vals <- qc$server() + output$dt <- DT::renderDT(qc_vals$df(), fillContainer = TRUE) +} + +shinyApp(ui, server) diff --git a/pkg-r/inst/htmldep/viz.css b/pkg-r/inst/htmldep/viz.css new file mode 100644 index 000000000..bbf54e6e6 --- /dev/null +++ b/pkg-r/inst/htmldep/viz.css @@ -0,0 +1,150 @@ +/* Generated file. Source: js/src/viz.css. Do not edit directly. */ +/* Hide Vega's built-in action dropdown (we have our own save button) */ +.querychat-viz-container details:has(> .vega-actions) { + display: none !important; +} + +/* ---- Visualization container ---- */ + +.querychat-viz-container { + aspect-ratio: 4 / 2; + width: 100%; +} + +/* In full-screen mode, let the chart fill the available space */ +.shiny-tool-card[fullscreen] .querychat-viz-container { + aspect-ratio: unset; +} + +/* ---- Visualization footer ---- */ + +.querychat-footer-buttons { + display: flex; + justify-content: space-between; + align-items: center; +} + +.querychat-footer-left, +.querychat-footer-right { + display: flex; + align-items: center; + gap: 4px; +} + +.querychat-show-query-btn, +.querychat-save-btn { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 2px 8px; + height: 28px; + border: none; + border-radius: var(--bs-border-radius, 4px); + background: transparent; + color: var(--bs-secondary-color, #6c757d); + font-size: 0.75rem; + cursor: pointer; + white-space: nowrap; +} + +.querychat-show-query-btn:hover, +.querychat-save-btn:hover { + color: var(--bs-body-color, #212529); + background-color: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05); +} + +.querychat-query-chevron { + font-size: 0.625rem; + transition: transform 150ms; + display: inline-block; +} + +.querychat-query-chevron--expanded { + transform: rotate(90deg); +} + +.querychat-icon { + width: 14px; + height: 14px; +} + +.querychat-dropdown-chevron { + width: 12px; + height: 12px; + margin-left: 2px; +} + +.querychat-save-dropdown { + position: relative; +} + +.querychat-save-menu { + display: none; + position: absolute; + right: 0; + bottom: 100%; + margin-bottom: 4px; + z-index: 20; + background: var(--bs-body-bg, #fff); + border: 1px solid var(--bs-border-color, #dee2e6); + border-radius: var(--bs-border-radius, 4px); + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); + padding: 4px 0; + min-width: 120px; +} + +.querychat-save-menu--visible { + display: block; +} + +.querychat-save-menu button { + display: block; + width: 100%; + padding: 6px 12px; + border: none; + background: transparent; + color: var(--bs-body-color, #212529); + font-size: 0.75rem; + text-align: left; + cursor: pointer; +} + +.querychat-save-menu button:hover { + background-color: rgba(var(--bs-emphasis-color-rgb, 0, 0, 0), 0.05); +} + +.querychat-query-section { + display: none; + position: relative; + border-top: 1px solid var(--bs-border-color, #dee2e6); + margin: 8px -16px -8px; +} + +.querychat-query-section--visible { + display: block; +} + + +/* shinychat sets max-height:500px on all cards, which is too small for viz+editor */ +.shiny-tool-card:has(.querychat-viz-container) { + max-height: 700px; + overflow: hidden; +} + +.shiny-tool-card:has(.querychat-viz-container) > .card-footer { + flex: 0 0 auto; +} + +.shiny-tool-card[fullscreen]:has(.querychat-viz-container) { + max-height: none; +} + +.querychat-query-section bslib-code-editor .code-editor { + margin: 1em; +} + +.querychat-query-section bslib-code-editor .prism-code-editor { + background-color: var(--bs-light, #f8f8f8); + max-height: 200px; + overflow-y: auto; +} diff --git a/pkg-r/inst/htmldep/viz.js b/pkg-r/inst/htmldep/viz.js new file mode 100644 index 000000000..d6b4d2038 --- /dev/null +++ b/pkg-r/inst/htmldep/viz.js @@ -0,0 +1,169 @@ +/* Generated file. Source: js/src/viz.ts. Do not edit directly. */ + +"use strict"; +(() => { + // src/viz-core.ts + function findWidgetContainer(widgetId) { + return document.getElementById(widgetId); + } + function findVegaAction(container, format) { + return container.querySelector( + `.vega-actions a[download$=".${format}"]` + ); + } + function triggerVegaAction(link, filename) { + link.download = filename; + if (link.href && link.href !== "#" && !link.href.endsWith("#")) { + link.click(); + return; + } + const observer = new MutationObserver(() => { + if (link.href && link.href !== "#" && !link.href.endsWith("#")) { + observer.disconnect(); + clearTimeout(timeoutId); + link.click(); + } + }); + observer.observe(link, { + attributes: true, + attributeFilter: ["href"] + }); + const timeoutId = window.setTimeout(() => { + observer.disconnect(); + console.error("Timed out waiting for vega-embed to generate image"); + }, 5e3); + link.dispatchEvent(new MouseEvent("mousedown", { bubbles: true })); + } + var openSaveMenu = null; + function closeSaveMenu(menu) { + menu.classList.remove("querychat-save-menu--visible"); + if (openSaveMenu === menu) { + openSaveMenu = null; + } + } + function closeOpenSaveMenu() { + if (openSaveMenu) { + closeSaveMenu(openSaveMenu); + } + } + function handleShowQuery(event, button) { + event.stopPropagation(); + const targetId = button.dataset.target; + if (!targetId) { + return; + } + const section = document.getElementById(targetId); + if (!section) { + return; + } + const isVisible = section.classList.toggle("querychat-query-section--visible"); + const label = button.querySelector(".querychat-query-label"); + const chevron = button.querySelector(".querychat-query-chevron"); + if (label) { + label.textContent = isVisible ? "Hide Query" : "Show Query"; + } + if (chevron) { + chevron.classList.toggle("querychat-query-chevron--expanded", isVisible); + } + } + function handleSaveToggle(event, button) { + event.stopPropagation(); + const menu = button.parentElement?.querySelector( + ".querychat-save-menu" + ); + if (!menu) { + return; + } + if (openSaveMenu && openSaveMenu !== menu) { + closeSaveMenu(openSaveMenu); + } + if (menu.classList.contains("querychat-save-menu--visible")) { + closeSaveMenu(menu); + } else { + menu.classList.add("querychat-save-menu--visible"); + openSaveMenu = menu; + } + } + function handleSaveExport(event, button, format, adapter) { + event.stopPropagation(); + const widgetId = button.dataset.widgetId; + if (!widgetId) { + return; + } + const filename = button.dataset.title || "chart"; + const menu = button.closest(".querychat-save-menu"); + if (menu) { + closeSaveMenu(menu); + } + adapter.exportPlot(widgetId, format, filename); + } + function handleCopy(event, button) { + event.stopPropagation(); + const query = button.dataset.query; + if (!query) { + return; + } + navigator.clipboard.writeText(query).then(() => { + const original = button.textContent; + button.textContent = "Copied!"; + setTimeout(() => { + button.textContent = original; + }, 2e3); + }).catch((error) => { + console.error("Failed to copy:", error); + }); + } + var installed = false; + function installVizFooter(adapter) { + if (installed) return; + installed = true; + window.addEventListener("click", (event) => { + const target = event.target; + if (!(target instanceof Element)) { + closeOpenSaveMenu(); + return; + } + const actionElement = target.closest("[data-querychat-action]"); + const action = actionElement?.dataset.querychatAction; + if (!action || !actionElement) { + closeOpenSaveMenu(); + return; + } + switch (action) { + case "show-query": + handleShowQuery(event, actionElement); + return; + case "save-toggle": + handleSaveToggle(event, actionElement); + return; + case "save-png": + handleSaveExport(event, actionElement, "png", adapter); + return; + case "save-svg": + handleSaveExport(event, actionElement, "svg", adapter); + return; + case "copy": + handleCopy(event, actionElement); + return; + } + }); + } + function createVegaActionAdapter() { + return { + exportPlot(widgetId, format, filename) { + const container = findWidgetContainer(widgetId); + if (!container) { + return; + } + const link = findVegaAction(container, format); + if (!link) { + return; + } + triggerVegaAction(link, `${filename}.${format}`); + } + }; + } + + // src/viz.ts + installVizFooter(createVegaActionAdapter()); +})(); diff --git a/pkg-r/inst/prompts/ggsql-syntax.md b/pkg-r/inst/prompts/ggsql-syntax.md new file mode 100644 index 000000000..868d7c334 --- /dev/null +++ b/pkg-r/inst/prompts/ggsql-syntax.md @@ -0,0 +1,553 @@ +## ggsql Syntax Reference + +### Quick Reference + +```sql +[WITH cte AS (...), ...] +[SELECT columns FROM table WHERE conditions] +VISUALISE [mappings] [FROM source] +DRAW geom_type + [MAPPING col AS aesthetic, ... FROM source] + [REMAPPING stat AS aesthetic, ...] + [SETTING param => value, ...] + [FILTER sql_condition] + [PARTITION BY col, ...] + [ORDER BY col [ASC|DESC], ...] +[SCALE [TYPE] aesthetic [FROM ...] [TO ...] [VIA ...] [SETTING ...] [RENAMING ...]] +[PROJECT [aesthetics] TO coord_system [SETTING ...]] +[FACET var | row_var BY col_var [SETTING free => 'x'|'y'|('x','y'), ncol => N, nrow => N]] +[PLACE geom_type SETTING param => value, ...] +[LABEL x => '...', y => '...', ...] +``` + +### VISUALISE Clause + +Entry point for visualization. Marks where SQL ends and visualization begins. Mappings in VISUALISE and MAPPING accept **column names only** — no SQL expressions, functions, or casts. All data transformations must happen in the SELECT clause. + +```sql +-- After SELECT (most common) +SELECT date, revenue, region FROM sales +VISUALISE date AS x, revenue AS y, region AS color +DRAW line + +-- Shorthand with FROM (auto-generates SELECT * FROM) +VISUALISE FROM sales +DRAW bar MAPPING region AS x, total AS y + +-- FROM can also come first +FROM sales +VISUALISE date AS x, revenue AS y +DRAW line +``` + +### Mapping Styles + +| Style | Syntax | Use When | +|-------|--------|----------| +| Explicit | `date AS x` | Column name differs from aesthetic | +| Implicit | `x` | Column name equals aesthetic name | +| Wildcard | `*` | Map all matching columns automatically | +| Literal | `'string' AS color` | Use a literal value (for legend labels in multi-layer plots) | +| Null | `null AS color` | Suppress an inherited global mapping for this layer | + +### DRAW Clause (Layers) + +Multiple DRAW clauses create layered visualizations. + +```sql +DRAW geom_type + [MAPPING col AS aesthetic, ... FROM source] + [REMAPPING stat AS aesthetic, ...] + [SETTING param => value, ...] + [FILTER sql_condition] + [PARTITION BY col, ...] + [ORDER BY col [ASC|DESC], ...] +``` + +**Geom types:** + +| Category | Types | +|----------|-------| +| Basic | `point`, `line`, `path`, `bar`, `area`, `tile`, `polygon`, `ribbon` | +| Statistical | `histogram`, `density`, `smooth`, `boxplot`, `violin` | +| Annotation | `text`, `label`, `segment`, `arrow`, `rule`, `rect`, `range` | + +- `path` is like `line` but preserves data order instead of sorting by x. +- `tile` draws rectangles for heatmaps or range indicators. Map `x`/`y` for center (defaults to width/height of 1), or use `xmin`/`xmax`/`ymin`/`ymax` for explicit bounds. +- `smooth` fits a trendline to data. Settings: `method` (`'nw'` default for kernel regression, `'ols'` for linear, `'tls'` for total least squares), `bandwidth`, `adjust`, `kernel`. +- `text` (or `label`) renders text labels. Map `label` for the text content. Settings: `format` (template string for label formatting), `offset` (pixel offset as `(x, y)`). Labels containing `\n` are automatically split into multiple lines. +- `arrow` draws arrows between two points. Requires `x`, `y`, `xend`, `yend` aesthetics. +- `rule` draws full-span reference lines. Map a value to `y` for a horizontal line or `x` for a vertical line. Optionally map `slope` to create diagonal reference lines: `y = a + slope * x` (when `y` is mapped) or `x = a + slope * y` (when `x` is mapped). +- `rect` draws rectangles. Pick 2 per axis from center (`x`/`y`), min (`xmin`/`ymin`), max (`xmax`/`ymax`), `width`, `height`. Or just map center (defaults to width/height of 1). +- `range` displays interval marks. Requires `x`, `ymin`, `ymax` for vertical intervals, or `y`, `xmin`, `xmax` for horizontal intervals. Use it for confidence intervals, lollipops, and candlestick-style ranges. Setting `width => null` hides the hinges. +- `segment` draws arbitrary connections between two points and requires `x`, `y`, `xend`, and `yend`. +- `line` and `path` support continuously varying `linewidth`, `stroke`, and `opacity` aesthetics within groups. + +**Aesthetics (MAPPING):** + +| Category | Aesthetics | +|----------|------------| +| Position | `x`, `y`, `xmin`, `xmax`, `ymin`, `ymax`, `xend`, `yend` | +| Color | `color`/`colour`, `fill`, `stroke`, `opacity` | +| Size/Shape | `size`, `shape`, `linewidth`, `linetype`, `width`, `height` | +| Text | `label`, `typeface`, `fontweight`, `italic`, `fontsize`, `hjust`, `vjust`, `rotation` | +| Aggregation | `weight` (for histogram/bar/density/violin) | +| Rule | `slope` (for diagonal `rule` lines) | + +**PARTITION BY** groups data without visual encoding (useful for separate lines per group without color): + +```sql +DRAW line PARTITION BY category +``` + +**ORDER BY** controls row ordering within a layer: + +```sql +DRAW line ORDER BY date ASC +``` + +### PLACE Clause (Annotations) + +`PLACE` creates annotation layers with literal values only — no data mappings. Use it for reference lines, text labels, and other fixed annotations. All aesthetics are set via `SETTING` and bypass scaling. + +```sql +PLACE geom_type SETTING param => value, ... +``` + +**Examples:** +```sql +-- Horizontal reference line +PLACE rule SETTING y => 100 + +-- Vertical reference line +PLACE rule SETTING x => '2024-06-01' + +-- Multiple reference lines (array values) +PLACE rule SETTING y => (50, 75, 100) + +-- Text annotation +PLACE text SETTING x => 10, y => 50, label => 'Threshold' + +-- Diagonal reference line (y = -1 + 0.4 * x) +PLACE rule SETTING slope => 0.4, y => -1 +``` + +`PLACE` supports any geom type but is most useful with `rule`, `text`, `segment`, and `tile`. Use `PLACE` for fixed annotation values known at query time; use `DRAW` with `MAPPING` when values come from data columns. Unlike `DRAW`, `PLACE` has no `MAPPING`, `FILTER`, `PARTITION BY`, or `ORDER BY` sub-clauses. Array values in PLACE SETTING are recycled into multiple rows only for supported aesthetics; geom parameters (like `offset` on `text`) are passed through as-is. + +### Statistical Layers and REMAPPING + +Some layers compute statistics. Use REMAPPING to access computed values: + +| Layer | Computed Stats | Default Remapping | +|-------|---------------|-------------------| +| `bar` (y unmapped) | `count`, `proportion` | `count AS y` | +| `histogram` | `count`, `density` | `count AS y` | +| `density` | `density`, `intensity` | `density AS y` | +| `violin` | `density`, `intensity` | `density AS offset` | +| `smooth` | `intensity` | `intensity AS y` | +| `boxplot` | `value`, `type` | `value AS y` | + +`boxplot` displays box-and-whisker plots. Settings: `outliers` (`true` default — show outlier points), `coef` (`1.5` default — whisker fence coefficient), `width` (`0.9` default — box width, 0–1). + +`smooth` fits a trendline to data. Settings: `method` (`'nw'` or `'nadaraya-watson'` default kernel regression, `'ols'` for OLS linear, `'tls'` for total least squares). NW-only settings: `bandwidth` (numeric), `adjust` (multiplier, default 1), `kernel` (`'gaussian'` default, `'epanechnikov'`, `'triangular'`, `'rectangular'`, `'uniform'`, `'biweight'`, `'quartic'`, `'cosine'`). + +`density` computes a KDE from a continuous `x`. Settings: `bandwidth` (numeric), `adjust` (multiplier, default 1), `kernel` (`'gaussian'` default, `'epanechnikov'`, `'triangular'`, `'rectangular'`, `'uniform'`, `'biweight'`, `'quartic'`, `'cosine'`). Use `REMAPPING intensity AS y` to show unnormalized density that reflects group size differences. Use `SETTING position => 'stack'` for stacked densities. + +`violin` displays mirrored KDE curves for groups. Requires both `x` (categorical) and `y` (continuous). Accepts the same bandwidth/adjust/kernel settings as density. Use `REMAPPING intensity AS offset` to reflect group size differences. Additional settings: `side` (`'both'` default, `'left'`/`'bottom'`, `'right'`/`'top'` — for half-violin/ridgeline plots), `width` (any value > 0; values > 1 enable ridgeline-style overlapping). + +**Examples:** + +```sql +-- Density histogram (instead of count) +VISUALISE FROM products +DRAW histogram MAPPING price AS x REMAPPING density AS y + +-- Bar showing proportion +VISUALISE FROM sales +DRAW bar MAPPING region AS x REMAPPING proportion AS y + +-- Overlay histogram and density on the same scale +VISUALISE FROM measurements +DRAW histogram MAPPING value AS x SETTING opacity => 0.5 +DRAW density MAPPING value AS x REMAPPING intensity AS y SETTING opacity => 0.5 + +-- Violin plot +SELECT department, salary FROM employees +VISUALISE department AS x, salary AS y +DRAW violin +``` + +### SCALE Clause + +Configures how data maps to visual properties. All sub-clauses are optional; type and transform are auto-detected from data when omitted. + +```sql +SCALE [TYPE] aesthetic [FROM range] [TO output] [VIA transform] [SETTING prop => value, ...] [RENAMING ...] +``` + +**Type identifiers** (optional — auto-detected if omitted): + +| Type | Description | +|------|-------------| +| `CONTINUOUS` | Numeric data on a continuous axis | +| `DISCRETE` | Categorical/nominal data | +| `BINNED` | Pre-bucketed data | +| `ORDINAL` | Ordered categories with interpolated output | +| `IDENTITY` | Data values are already visual values (e.g., literal hex colors) | + +**Important — integer columns used as categories:** When an integer column represents categories (e.g., a 0/1 `survived` column), ggsql will treat it as continuous by default. This causes errors when mapping to `fill`, `color`, `shape`, or using it in `FACET`. Two fixes: +- **Preferred:** Cast to string in the SELECT clause: `SELECT CAST(survived AS VARCHAR) AS survived ...`, then map the column by name in VISUALISE: `survived AS fill` +- **Alternative:** Declare the scale: `SCALE DISCRETE fill` or `SCALE fill VIA bool` + +**FROM** — input domain: +```sql +SCALE x FROM (0, 100) -- explicit min and max +SCALE x FROM (0, null) -- explicit min, auto max +SCALE DISCRETE x FROM ('A', 'B', 'C') -- explicit category order +``` + +**TO** — output range or palette: +```sql +SCALE color TO sequential -- default continuous palette (derived from navia) +SCALE color TO viridis -- other continuous: viridis, plasma, inferno, magma, cividis, navia, batlow +SCALE color TO vik -- diverging: vik, rdbu, rdylbu, spectral, brbg, berlin, roma +SCALE DISCRETE color TO ggsql10 -- discrete (default: ggsql10): tableau10, category10, set1, set2, set3, dark2, paired, kelly +SCALE color TO ('red', 'blue') -- explicit color array +SCALE size TO (1, 10) -- numeric output range +``` + +**VIA** — transformation: +```sql +SCALE x VIA date -- date axis (auto-detected from Date columns) +SCALE x VIA datetime -- datetime axis +SCALE y VIA log10 -- base-10 logarithm +SCALE y VIA sqrt -- square root +``` + +| Category | Transforms | +|----------|------------| +| Logarithmic | `log10`, `log2`, `log` (natural) | +| Power | `sqrt`, `square` | +| Exponential | `exp`, `exp2`, `exp10` | +| Other | `asinh`, `pseudo_log` | +| Temporal | `date`, `datetime`, `time` | +| Type coercion | `integer`, `string`, `bool` | + +**SETTING** — additional properties: +```sql +SCALE x SETTING breaks => 5 -- number of tick marks +SCALE x SETTING breaks => '2 months' -- interval-based breaks +SCALE x SETTING expand => 0.05 -- expand scale range by 5% +SCALE x SETTING reverse => true -- reverse direction +SCALE y FROM (0, 100) SETTING oob => 'squish' -- squish out-of-bounds values to range boundary +``` + +`oob` (out-of-bounds) controls data outside the scale range: `'keep'` (default for x/y), `'censor'` (remove, default for other aesthetics), `'squish'` (clamp to boundary). + +**RENAMING** — custom axis/legend labels: +```sql +SCALE DISCRETE x RENAMING 'A' => 'Alpha', 'B' => 'Beta' +SCALE CONTINUOUS x RENAMING * => '{} units' -- template for all labels +SCALE x VIA date RENAMING * => '{:time %b %Y}' -- date label formatting +``` + +### Date/Time Axes + +Temporal transforms are auto-detected from column data types, including after `DATE_TRUNC`. + +**Break intervals:** +```sql +SCALE x SETTING breaks => 'month' -- one break per month +SCALE x SETTING breaks => '2 weeks' -- every 2 weeks +SCALE x SETTING breaks => '3 months' -- quarterly +SCALE x SETTING breaks => 'year' -- yearly +``` + +Valid units: `day`, `week`, `month`, `year` (for date); also `hour`, `minute`, `second` (for datetime/time). + +**Date label formatting** (strftime syntax): +```sql +SCALE x VIA date RENAMING * => '{:time %b %Y}' -- "Jan 2024" +SCALE x VIA date RENAMING * => '{:time %B %d, %Y}' -- "January 15, 2024" +SCALE x VIA date RENAMING * => '{:time %b %d}' -- "Jan 15" +``` + +### PROJECT Clause + +Sets coordinate system. Use `PROJECT ... TO` to specify coordinates. + +**Coordinate systems:** `cartesian` (default), `polar`. + +**Polar aesthetics:** In polar coordinates, positional aesthetics use `angle` and `radius` (instead of `x` and `y`). Variants `anglemin`, `anglemax`, `angleend`, `radiusmin`, `radiusmax`, `radiusend` are also available. Typically you map to `x`/`y` and let `PROJECT TO polar` handle the conversion, but you can use `angle`/`radius` explicitly when needed. + +```sql +PROJECT TO cartesian -- explicit default (usually omitted) +PROJECT y, x TO cartesian -- flip axes (maps y to horizontal, x to vertical) +PROJECT TO polar -- pie/radial charts +PROJECT TO polar SETTING start => 90 -- start at 3 o'clock +PROJECT TO polar SETTING inner => 0.5 -- donut chart (50% hole) +PROJECT TO polar SETTING start => -90, end => 90 -- half-circle gauge +``` + +**Cartesian settings:** +- `clip` — clip out-of-bounds data (default `true`) +- `ratio` — enforce aspect ratio between axes + +**Polar settings:** +- `start` — starting angle in degrees (0 = 12 o'clock, 90 = 3 o'clock) +- `end` — ending angle in degrees (default: start + 360; use for partial arcs/gauges) +- `inner` — inner radius as proportion 0–1 (0 = full pie, 0.5 = donut with 50% hole) +- `clip` — clip out-of-bounds data (default `true`) + +**Axis flipping:** To create horizontal bar charts or flip axes, use `PROJECT y, x TO cartesian`. This maps anything on `y` to the horizontal axis and `x` to the vertical axis. + +### FACET Clause + +Creates small multiples (subplots by category). + +```sql +FACET category -- Single variable, wrapped layout +FACET row_var BY col_var -- Grid layout (rows x columns) +FACET category SETTING free => 'y' -- Independent y-axes +FACET category SETTING free => ('x', 'y') -- Independent both axes +FACET category SETTING ncol => 4 -- Control number of columns +FACET category SETTING nrow => 2 -- Control number of rows (mutually exclusive with ncol) +``` + +Custom strip labels via SCALE: +```sql +FACET region +SCALE panel RENAMING 'N' => 'North', 'S' => 'South' +``` + +Filter to specific panels via SCALE FROM: +```sql +FACET island +SCALE panel FROM ('Biscoe', 'Dream') +``` + +### LABEL Clause + +Use LABEL for axis labels, subtitles, and captions. Do NOT use `LABEL title => ...` — the tool's `title` parameter handles chart titles. Set a label to `null` to suppress it. + +Available labels: any aesthetic name (`x`, `y`, `fill`, `color`, etc.), `subtitle`, `caption`. + +```sql +LABEL x => 'X Axis Label', y => 'Y Axis Label' +LABEL x => null -- suppress x-axis label +LABEL subtitle => 'Q4 2024 data', caption => 'Source: internal database' +``` + +## Complete Examples + +**Line chart with multiple series:** +```sql +SELECT date, revenue, region FROM sales WHERE year = 2024 +VISUALISE date AS x, revenue AS y, region AS color +DRAW line +SCALE x VIA date +LABEL x => 'Date', y => 'Revenue ($)' +``` + +**Bar chart (auto-count):** +```sql +VISUALISE FROM products +DRAW bar MAPPING category AS x +``` + +**Horizontal bar chart:** +```sql +SELECT region, COUNT(*) as n FROM sales GROUP BY region +VISUALISE region AS y, n AS x +DRAW bar +PROJECT y, x TO cartesian +``` + +**Scatter plot with trend line:** +```sql +SELECT mpg, hp, cylinders FROM cars +VISUALISE mpg AS x, hp AS y +DRAW point MAPPING cylinders AS color +DRAW smooth +``` + +**Histogram with density overlay:** +```sql +VISUALISE FROM measurements +DRAW histogram MAPPING value AS x SETTING bins => 20, opacity => 0.5 +DRAW density MAPPING value AS x REMAPPING intensity AS y SETTING opacity => 0.5 +``` + +**Density plot with groups:** +```sql +VISUALISE FROM measurements +DRAW density MAPPING value AS x, category AS color SETTING opacity => 0.7 +``` + +**Heatmap with tile:** +```sql +SELECT day, month, temperature FROM weather +VISUALISE day AS x, month AS y, temperature AS color +DRAW tile +``` + +**Threshold reference lines (using PLACE):** +```sql +SELECT date, temperature FROM sensor_data +VISUALISE date AS x, temperature AS y +DRAW line +PLACE rule SETTING y => 100, stroke => 'red', linetype => 'dashed' +LABEL y => 'Temperature (F)' +``` + +**Faceted chart:** +```sql +SELECT month, sales, region FROM data +VISUALISE month AS x, sales AS y +DRAW line +DRAW point +FACET region +SCALE x VIA date +``` + +**CTE with aggregation and date formatting:** +```sql +WITH monthly AS ( + SELECT DATE_TRUNC('month', order_date) as month, SUM(amount) as total + FROM orders GROUP BY 1 +) +VISUALISE month AS x, total AS y FROM monthly +DRAW line +DRAW point +SCALE x VIA date SETTING breaks => 'month' RENAMING * => '{:time %b %Y}' +LABEL y => 'Revenue ($)' +``` + +**Ribbon / confidence band:** +```sql +WITH daily AS ( + SELECT DATE_TRUNC('day', timestamp) as day, + AVG(temperature) as avg_temp, + MIN(temperature) as min_temp, + MAX(temperature) as max_temp + FROM sensor_data + GROUP BY DATE_TRUNC('day', timestamp) +) +VISUALISE day AS x FROM daily +DRAW ribbon MAPPING min_temp AS ymin, max_temp AS ymax SETTING opacity => 0.3 +DRAW line MAPPING avg_temp AS y +SCALE x VIA date +LABEL y => 'Temperature' +``` + +**Text labels on bars:** +```sql +SELECT region, COUNT(*) AS n FROM sales GROUP BY region +VISUALISE region AS x, n AS y +DRAW bar +DRAW text MAPPING n AS label SETTING offset => (0, -11), fill => 'white' +``` + +**Lollipop chart:** +```sql +SELECT ROUND(bill_dep) AS bill_dep, COUNT(*) AS n FROM penguins GROUP BY 1 +VISUALISE bill_dep AS x, n AS y +DRAW range MAPPING 0 AS ymin, n AS ymax SETTING width => null +DRAW point +``` + +**Ridgeline / joy plot:** +```sql +VISUALISE temp AS x, month AS y FROM weather +DRAW violin SETTING width => 4, side => 'top' +SCALE ORDINAL y +``` + +**Donut chart:** +```sql +VISUALISE FROM products +DRAW bar MAPPING category AS fill +PROJECT TO polar SETTING inner => 0.5 +``` + +## Important Notes + +1. **Numeric columns as categories**: Integer columns representing categories (e.g., 0/1 `survived`) are treated as continuous by default, causing errors with `fill`, `color`, `shape`, and `FACET`. Fix by casting in SQL or declaring the scale: + ```sql + -- WRONG: integer fill without discrete scale — causes validation error + SELECT sex, survived FROM titanic + VISUALISE sex AS x, survived AS fill + DRAW bar + + -- CORRECT: cast to string in SQL (preferred) + SELECT sex, CAST(survived AS VARCHAR) AS survived FROM titanic + VISUALISE sex AS x, survived AS fill + DRAW bar + + -- ALSO CORRECT: declare the scale as discrete + SELECT sex, survived FROM titanic + VISUALISE sex AS x, survived AS fill + DRAW bar + SCALE DISCRETE fill + ``` +2. **Do not mix `VISUALISE FROM` with a preceding `SELECT`**: `VISUALISE FROM table` is shorthand that auto-generates `SELECT * FROM table`. If you already have a `SELECT`, use `SELECT ... VISUALISE` instead: + ```sql + -- WRONG: VISUALISE FROM after SELECT + SELECT * FROM titanic + VISUALISE FROM titanic + DRAW bar MAPPING class AS x + + -- CORRECT: use VISUALISE (without FROM) after SELECT + SELECT * FROM titanic + VISUALISE class AS x + DRAW bar + + -- ALSO CORRECT: use VISUALISE FROM without any SELECT + VISUALISE FROM titanic + DRAW bar MAPPING class AS x + ``` +3. **In querychat, all layers must come from the final SQL result**: Do not use layer-specific `FROM source` inside `DRAW ... MAPPING ...` clauses. If you need raw data and a summary in one chart, put both into one final relation and distinguish layers with a column such as `layer_type`: + ```sql + WITH raw AS ( + SELECT + date, + amount, + region, + 'raw' AS layer_type + FROM sales + ), + summary AS ( + SELECT + date, + AVG(amount) AS amount, + region, + 'summary' AS layer_type + FROM sales + GROUP BY date, region + ), + combined AS ( + SELECT * FROM raw + UNION ALL + SELECT * FROM summary + ) + SELECT * FROM combined + VISUALISE date AS x, amount AS y + DRAW point MAPPING region AS color FILTER layer_type = 'raw' + DRAW line MAPPING region AS color FILTER layer_type = 'summary' + ``` +4. **String values use single quotes**: In SETTING, LABEL, and RENAMING clauses, always use single quotes for string values. Double quotes cause parse errors. +5. **Statistical layers**: When using `histogram`, `bar` (without y), `density`, `smooth`, `violin`, or `boxplot`, the layer computes statistics. Use REMAPPING to access `density`, `intensity`, `proportion`, etc. +6. **No trailing commas**: SETTING, LABEL, MAPPING, and RENAMING clauses must not end with a trailing comma. A comma after the last item causes a parse error. + ```sql + -- WRONG: trailing comma after the last label + LABEL x => 'Gender', y => 'Count', + + -- CORRECT + LABEL x => 'Gender', y => 'Count' + ``` +7. **Bar position adjustments**: Bars stack automatically when `fill` is mapped. Use `SETTING position => 'dodge'` for side-by-side bars, or `position => 'stack', total => 1` for proportional (100%) stacking: + ```sql + DRAW bar MAPPING category AS x, subcategory AS fill -- stacked (default) + DRAW bar MAPPING category AS x, subcategory AS fill SETTING position => 'dodge' -- side-by-side + DRAW bar MAPPING category AS x, subcategory AS fill SETTING position => 'stack', total => 1 -- proportional + ``` diff --git a/pkg-r/inst/prompts/prompt.md b/pkg-r/inst/prompts/prompt.md index 3d2956045..32476962e 100644 --- a/pkg-r/inst/prompts/prompt.md +++ b/pkg-r/inst/prompts/prompt.md @@ -1,4 +1,4 @@ -You are a data dashboard chatbot that operates in a sidebar interface. Your role is to help users interact with their data through filtering, sorting, and answering questions. +You are a data dashboard chatbot that operates in a sidebar interface. Your role is to help users interact with their data through filtering, sorting, and answering questions.{{#has_tool_visualize}} You can also help them explore data visually.{{/has_tool_visualize}} You have access to a {{db_type}} SQL database with the following schema: @@ -118,11 +118,95 @@ Response: "The average revenue is $X." This simple response is sufficient, as the user can see the SQL query used. {{/has_tool_query}} +{{#has_tool_visualize}} +### Visualizing Data + +You can create visualizations using the `querychat_visualize` tool, which uses ggsql — a SQL extension for declarative data visualization. Write a ggsql query (SQL with a VISUALISE clause), and the tool executes the SQL, renders the VISUALISE clause as an interactive chart, and displays it inline in the chat. + +#### Visualization best practices + +The database schema in this prompt includes column names, types, and summary statistics. {{#has_tool_query}}If that context isn't sufficient for a confident visualization — e.g., you're unsure about value distributions, need to check for NULLs, or want to gauge row counts before choosing a chart type — use the `querychat_query` tool to inspect the data before visualizing. Always pass `collapsed=true` for these preparatory queries so the chart remains the focal point of the response.{{/has_tool_query}} + +Follow the principles below to produce clear, interpretable charts. + +#### Axis labels must be readable + +When the x-axis contains categorical labels (names, categories, long strings), prefer flipping axes with `PROJECT y, x TO cartesian` so labels read naturally left-to-right. Short numeric or date labels on the x-axis are fine horizontal — this applies specifically to text categories. + +#### Always include axis labels with units + +Charts should be interpretable without reading the surrounding prose. Always include axis labels that describe what is shown, including units when applicable (e.g., `LABEL y => 'Revenue ($M)'`, not just `LABEL y => 'Revenue'`). + +#### Maximize data-ink ratio + +Every visual element should serve a purpose: + +- Don't map columns to aesthetics (color, size, shape) unless the distinction is meaningful to the user's question. A single-series bar chart doesn't need color. +- When using color for categories, keep to 7 or fewer distinct values. Beyond that, consider filtering to the most important categories or using facets instead. +- Avoid dual-encoding the same variable (e.g., mapping the same column to both x-position and color) unless it genuinely aids interpretation. + +#### Avoid overplotting + +When a dataset has many rows, plotting one mark per row creates clutter that obscures patterns. Before generating a query, consider the row count and data characteristics visible in the schema. + +**For large datasets (hundreds+ rows):** + +- **Aggregate first**: Use `GROUP BY` with `COUNT`, `AVG`, `SUM`, or other aggregates to reduce to meaningful summaries before visualizing. +- **Choose chart types that summarize naturally**: histograms for distributions, boxplots for group comparisons, line charts for trends over time. + +**For two numeric variables with many rows:** + +Bin in SQL and use `DRAW tile` to create a heatmap: + +```sql +WITH binned AS ( + SELECT ROUND(x_col / 5) * 5 AS x_bin, + ROUND(y_col / 5) * 5 AS y_bin, + COUNT(*) AS n + FROM large_table + GROUP BY x_bin, y_bin +) +SELECT * FROM binned +VISUALISE x_bin AS x, y_bin AS y, n AS fill +DRAW tile +SCALE fill TO viridis +``` + +**If individual points matter** (e.g., outlier detection): use `SETTING opacity` to reveal density through overlap. + +#### Choose chart types based on the data relationship + +Match the chart type to what the user is trying to understand: + +- **Comparison across categories**: bar chart (`DRAW bar`, with `PROJECT y, x TO cartesian` for long labels). Order bars by value, not alphabetically. +- **Trend over time**: line chart (`DRAW line`). Use `SCALE x VIA date` for date columns. +- **Distribution of a single variable**: histogram (`DRAW histogram`) or density (`DRAW density`). +- **Relationship between two numeric variables**: scatter plot (`DRAW point`), but prefer aggregation or heatmap if the dataset is large. +- **Part-of-whole**: stacked bar chart (map subcategory to `fill`). Avoid pie charts — position along a common scale is easier to decode than angle. + +#### ggsql syntax reference + + +{{> ggsql-syntax}} + +{{#has_tool_query}} + +**Avoid redundant expanded results.** If you run a preparatory query before visualizing, or if both a table and chart would show the same data, always pass `collapsed=true` on the query so the user sees the chart prominently, not a duplicate table above it. The user can still expand the table if they want the exact values. +{{/has_tool_query}} +{{/has_tool_visualize}} +{{^has_tool_visualize}} +### Visualization Requests + +You cannot create charts or visualizations. If users ask for a plot, chart, or visual representation of the data, explain that visualization is not currently enabled.{{#has_tool_query}} Offer to answer their question with a tabular query instead.{{/has_tool_query}} Suggest that the developer can enable visualization by installing the `ggsql` package and adding `"visualize"` to the `tools` parameter. + +{{/has_tool_visualize}} {{^has_tool_query}} +{{^has_tool_visualize}} ### Questions About Data You cannot query or analyze the data. If users ask questions about data values, statistics, or calculations (e.g., "What is the average ____?" or "How many ____ are there?"), explain that you're not able to run queries on this data. Do not attempt to answer based on your own knowledge or assumptions about the data, even if the dataset seems familiar. +{{/has_tool_visualize}} {{/has_tool_query}} ### Providing Suggestions for Next Steps @@ -145,12 +229,22 @@ Use explicit HTML `