# RSnowflake -- Feature Demo

An interactive walkthrough of **RSnowflake** (pure R DBI connector for
Snowflake) running inside a Snowflake Workspace Notebook.

**Sections:**
1. Setup (R environment + PAT auth)
2. Connect
3. Simple queries & type mapping
4. Table operations (write, read, append)
5. Identifier case handling
6. Parameterized queries
7. Transactions
8. dbplyr / dplyr integration
9. Arrow fast path
10. Connections-pane browsing (dbListObjects)
11. Cleanup

## 1. Setup

Install the R environment (skip if already done in this session),
register the `%%R` magic, create a PAT, and push session context to env vars.

In [None]:
# Install R + rpy2 via setup script (included in this directory)
!bash setup_r_environment.sh --basic

In [None]:
from r_helpers import setup_r_environment
setup_r_environment()

In [None]:
%%R
# Install RSnowflake dependencies (no-op if already present)
pkgs <- c("DBI", "httr2", "jsonlite", "rlang", "cli",
          "dbplyr", "dplyr", "nanoarrow")
for (pkg in pkgs) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg, repos = "https://cloud.r-project.org")
  }
}
cat("Dependencies OK.\n")

In [None]:
%%R
# Install (or reinstall) RSnowflake from the repo source.
if ("RSnowflake" %in% loadedNamespaces()) {
  try(detach("package:RSnowflake", unload = TRUE), silent = TRUE)
  unloadNamespace("RSnowflake")
  cat("Unloaded previous RSnowflake namespace.\n")
}

pkg_dir <- normalizePath(file.path(getwd(), "..", ".."))
cat("Installing RSnowflake from:", pkg_dir, "\n")
install.packages(pkg_dir, repos = NULL, type = "source")

In [None]:
from snowflake.snowpark.context import get_active_session
from r_helpers import PATManager

session = get_active_session()
pat_mgr = PATManager(session)
result = pat_mgr.create_pat(days_to_expiry=1, force_recreate=True)

if result['success']:
    print(f"PAT created for {result['user']} (role: {result['role_restriction']})")
    print(f"Expires: {result['expires_at']}")
else:
    print(f"PAT creation failed: {result['error']}")

In [None]:
import os

os.environ["SNOWFLAKE_ACCOUNT"]   = session.get_current_account().replace('"', '')
os.environ["SNOWFLAKE_USER"]      = session.sql("SELECT CURRENT_USER()").collect()[0][0]
os.environ["SNOWFLAKE_DATABASE"]  = (session.get_current_database() or "").replace('"', '')
os.environ["SNOWFLAKE_SCHEMA"]    = (session.get_current_schema() or "").replace('"', '')
os.environ["SNOWFLAKE_WAREHOUSE"] = (session.get_current_warehouse() or "").replace('"', '')
os.environ["SNOWFLAKE_ROLE"]      = (session.get_current_role() or "").replace('"', '')

for k in ["SNOWFLAKE_ACCOUNT", "SNOWFLAKE_DATABASE", "SNOWFLAKE_WAREHOUSE", "SNOWFLAKE_ROLE"]:
    print(f"{k}: {os.environ[k]}")

## 2. Connect

In [None]:
%%R
if (!nzchar(Sys.getenv("TZ", ""))) Sys.setenv(TZ = "UTC")
options(width = 200)

library(DBI)
library(RSnowflake)

con <- dbConnect(Snowflake())
con
dbGetInfo(con)

## 3. Simple Queries & Type Mapping

In [None]:
%%R
dbGetQuery(con, "SELECT CURRENT_VERSION() AS version")

In [None]:
%%R
dbGetQuery(con, "
  SELECT
    42            AS int_val,
    3.14::DOUBLE  AS dbl_val,
    'hello'       AS str_val,
    TRUE          AS bool_val,
    CURRENT_DATE()          AS date_val,
    CURRENT_TIMESTAMP()     AS ts_val
")

## 4. Table Operations

Write a demo data.frame, read it back, and append more rows.
Column names are uppercased by default (standard Snowflake behaviour).

In [None]:
%%R
demo <- data.frame(
  id     = 1:10,
  city   = c("London", "Paris", "Tokyo", "Sydney", "NYC",
             "Berlin", "Toronto", "Mumbai", "Seoul", "Dubai"),
  temp_c = c(12.5, 15.2, 22.3, 25.1, 18.7,
             10.3, 8.9, 33.2, 19.8, 38.5),
  rainy  = c(TRUE, TRUE, FALSE, FALSE, TRUE,
             TRUE, TRUE, FALSE, FALSE, FALSE),
  stringsAsFactors = FALSE
)

dbWriteTable(con, "DEMO_CITIES", demo, overwrite = TRUE)
cat("Table created.\n")

# Column names are uppercased by default
dbListFields(con, "DEMO_CITIES")

In [None]:
%%R
dbReadTable(con, "DEMO_CITIES")

In [None]:
%%R
extra <- data.frame(
  id = 11:12,
  city = c("Rome", "Cairo"),
  temp_c = c(20.1, 35.0),
  rainy = c(FALSE, FALSE)
)
dbAppendTable(con, "DEMO_CITIES", extra)

dbGetQuery(con, "SELECT COUNT(*) AS n FROM DEMO_CITIES")

## 5. Identifier Case Handling

By default, RSnowflake uppercases table and column names to match
Snowflake convention. In raw SQL you can reference them unquoted
(Snowflake auto-uppercases) or with uppercase quoted identifiers.

In [None]:
%%R
# Columns are uppercase -- unquoted names work, or use uppercase quoted identifiers
dbGetQuery(con, 'SELECT CITY, TEMP_C FROM DEMO_CITIES WHERE TEMP_C > 25')

In [None]:
%%R
# dbQuoteIdentifier wraps names in double-quotes
dbQuoteIdentifier(con, "myColumn")

# dbUnquoteIdentifier parses back
dbUnquoteIdentifier(con, SQL('"mydb"."myschema"."mytable"'))

## 6. Parameterized Queries

Use `?` placeholders with `params` or `dbBind`.

In [None]:
%%R
dbGetQuery(
  con,
  'SELECT * FROM DEMO_CITIES WHERE TEMP_C > ?',
  params = list(30)
)

In [None]:
%%R
res <- dbSendQuery(con, 'SELECT * FROM DEMO_CITIES WHERE CITY = ?')
dbBind(res, list("Tokyo"))
dbFetch(res)
dbClearResult(res)

## 7. Transactions (not yet supported)

The Snowflake SQL API v2 is stateless per-request, so session-based
transactions (`dbBegin`/`dbCommit`/`dbRollback`) are not yet supported.
This section demonstrates that RSnowflake reports a clear error.

In [None]:
%%R
# Manual transaction -- expected to fail (SQL API v2 is stateless)
tryCatch(
  dbBegin(con),
  error = function(e) cat("Expected:", conditionMessage(e), "\n")
)

In [None]:
%%R
# dbWithTransaction -- also expected to fail
tryCatch(
  dbWithTransaction(con, {
    dbExecute(con, "SELECT 1")
  }),
  error = function(e) cat("Expected:", conditionMessage(e), "\n")
)

## 8. dbplyr / dplyr Integration

If `dbplyr` and `dplyr` are available, queries can be composed with
familiar tidyverse verbs and translated to Snowflake SQL lazily.

In [None]:
%%R
if (requireNamespace("dbplyr", quietly = TRUE) &&
    requireNamespace("dplyr", quietly = TRUE)) {

  library(dplyr)

  cities_tbl <- tbl(con, "DEMO_CITIES")

  # Lazy query -- translated to Snowflake SQL, not executed yet
  hot_cities <- cities_tbl |>
    filter(temp_c > 20) |>
    select(city, temp_c) |>
    arrange(desc(temp_c))

  cat("== Generated SQL ==\n")
  show_query(hot_cities)

  cat("\n== Results ==\n")
  print(hot_cities |> collect())

  cat("\n== Aggregation ==\n")
  print(
    cities_tbl |>
      summarise(
        avg_temp = mean(temp_c, na.rm = TRUE),
        n_rainy  = sum(as.integer(rainy), na.rm = TRUE),
        n_cities = n()
      ) |>
      collect()
  )

} else {
  cat("Skipped: install dbplyr and dplyr for this section.\n")
}

## 9. Arrow Fast Path (optional)

If `nanoarrow` is installed, RSnowflake can stream results in Arrow format
for lower overhead on large result sets.

In [None]:
%%R
if (requireNamespace("nanoarrow", quietly = TRUE)) {
  stream <- dbGetQueryArrow(con, "SELECT * FROM DEMO_CITIES")
  arrow_df <- as.data.frame(stream)
  str(arrow_df)
  cat("Arrow result:", nrow(arrow_df), "rows,", ncol(arrow_df), "columns\n")
} else {
  cat("Skipped: install nanoarrow for Arrow fast path.\n")
}

## 10. Connections-Pane Browsing (dbListObjects)

The `dbListObjects` method powers IDE connection panes. It works the same
in a notebook -- pass a prefix to drill into databases, schemas, and tables.

In [None]:
%%R
# Top level: databases
cat("== Databases (first 5) ==\n")
head(dbListObjects(con), 5)

In [None]:
%%R
# Drill into the current database -> schemas
db <- Sys.getenv("SNOWFLAKE_DATABASE", "")
if (nzchar(db)) {
  cat("== Schemas in", db, "==\n")
  print(dbListObjects(con, prefix = Id(catalog = db)))
}

In [None]:
%%R
# Drill into PUBLIC schema -> tables
if (nzchar(db)) {
  cat("== Tables in", db, ".PUBLIC ==\n")
  print(dbListObjects(con, prefix = Id(catalog = db, schema = "PUBLIC")))
}

## 11. Cleanup

In [None]:
%%R
dbRemoveTable(con, "DEMO_CITIES")
dbDisconnect(con)
cat("Done! Table removed and connection closed.\n")