### Python Packages

In [None]:
!pip install tabulate
!pip install -U ipywidgets

In [None]:
### Packages
## Python packages
import os
from os import listdir
from os.path import isfile, join
from datetime import date, datetime
import time 
from decimal import Decimal
import numpy as np
import pandas as pd
pd.set_option('display.max_colwidth', 500)
import tabulate
from pathlib import Path
import json

## SNOWFLAKE

## Snowpark
from snowflake.snowpark.version import VERSION
from snowflake.snowpark import functions as F, types as T
from snowflake.snowpark.types import StringType
# Snowpark functions representing some SQL functions we need
# tryparsejson = F.builtin('TRY_PARSE_JSON')
# timestampadd = F.builtin('TIMESTAMPADD')
# 
# from snowflake.ml.utils import connection_params
# 
# # Snowflake ML preprocessing
# from snowflake.ml.modeling.preprocessing import OrdinalEncoder
# 
# # Model Registry
# from snowflake.ml.registry import registry
# from snowflake.ml.model import custom_model
# from typing import Optional
# from snowflake.ml.model.model_signature import FeatureSpec, DataType, ModelSignature

### Python Database Session

In [None]:
# Source Data Database and Schema
src_database = 'SIMON'
src_schema = 'RPY2' # <-- Modify this if you want to test with one of the larger data scale-factors. e.g. TPCH_SF1, TPCH_SF10, TPCH_SF100, TPCH_SF1000

# Database to use to create Schemas
sess_db = 'SIMON' # The database within which we will create our Feature Store (schema), and data-source schema.

In [None]:
# CREATE SESSION
# ## Using Snowflake Notebook
from snowflake.snowpark.context import get_active_session

session = get_active_session()

In [None]:
session.sql_simplifier_enabled = True

# Capture and Print the Current Environment Details
snowflake_environment = session.sql('SELECT current_user(), current_version()').collect()
snowpark_version = VERSION
session_role = session.get_current_role().replace('"', "")
session.use_database(src_database)
session.use_schema(src_schema)
session_database = session.get_current_database().replace('"', "")
session_schema = session.get_current_schema().replace('"', "")
session_vw = session.get_current_warehouse().replace('"', "")

vw_status = session.sql(f"""show warehouses like '{session_vw}' """).collect()[0]
vw_type = vw_status['type']
vw_state = vw_status['state']
vw_size = vw_status['size'].upper()
vw_available = vw_status['available']
print('================================================================================')
print('\nConnection Established with the following parameters:')
print(f'Account                      : {session.sql("select current_account()").collect()[0][0]}')
print(f'User                         : {snowflake_environment[0][0]}')
print(f'Role                         : {session_role}')
print(f'Database                     : {session.get_current_database().replace('"', "")}')
print(f'Schema                       : {session.get_current_schema().replace('"', "")}')
print(f'Warehouse Name               : {session_vw}')
print(f'Warehouse Type               : {vw_type}')
print(f'Warehouse State              : {vw_state}')
print(f'Warehouse Size               : {vw_size}')
print(f'Warehouse Available Resource :{vw_available}')
print(f'Snowflake version            : {snowflake_environment[0][1]}')
print(f'Snowpark for Python version  : {snowpark_version[0]}.{snowpark_version[1]}.{snowpark_version[2]} \n')

### Oauth token exists in Notebook File-System

In [None]:

! [ -f /snowflake/session/token ] && echo "SPCS OAuth token file is present" || echo "No token file"


In [None]:
! ls -l /snowflake/session 2>/dev/null || echo "No /snowflake/session directory"

In [None]:
import os
print("Token file exists:", os.path.isfile("/snowflake/session/token"))

### Install R, ADBC & requested R Packages

In [None]:
!bash setup_r_micromamba_adbc.sh

### Configure the R Environment & iPython magic

In [None]:
import os, sys, subprocess

# Point kernel Python to the R env we created
ENV_PREFIX = "/root/.local/share/mamba/envs/r_env"  # from script output

os.environ["PATH"] = f"{ENV_PREFIX}/bin:" + os.environ["PATH"]
os.environ["R_HOME"] = f"{ENV_PREFIX}/lib/R"

print("Kernel Python:", sys.executable)
print("R_HOME:", os.environ["R_HOME"])

# Install rpy2 into THIS Python (the notebook venv), no --user
subprocess.run(
    [sys.executable, "-m", "pip", "install", "rpy2"],
    check=True,
)
# Add %%R magic so we can use R from within Python Notebook cells
from rpy2.ipython import rmagic
ip = get_ipython()
ip.register_magics(rmagic.RMagics)

In [None]:
%%R
R.version.string

### Working with Dataframes

We show how we can inter-operate with R dataframes and Pandas Dataframes.

#### Using Python

In [None]:
from functools import partial
from rpy2.ipython import html
from rpy2.robjects.packages import importr
# Import R package - Utils
utils = importr('utils')

html.html_rdataframe=partial(html.html_rdataframe, table_class="docutils")

dataf = utils.read_csv('https://raw.githubusercontent.com/jakevdp/PythonDataScienceHandbook/refs/heads/master/notebooks_v1/data/california_cities.csv')

# import rpy2.ipython.html
rpy2.ipython.html.init_printing()

dataf

In [None]:
# R Dataframe column names
dataf.colnames

In [None]:
# R Linear Regression
stats = importr('stats')
base = importr('base')
stats.lm('elevation_m ~ latd + longd', data=dataf)

In [None]:
%%R -i dataf
# Pass the Python object into R (%%R) using -i (above), and make use of it in an R cell.
require(dplyr)
glimpse(dataf)

Create and print a Pandas Dataframe

In [None]:
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

pd_df = pd.DataFrame({'int_values': [1,2,3],
                      'str_values': ['abc', 'def', 'ghi']})

pd_df

In [None]:
print(pd_df)
print('\n\n')
print(pd_df.to_markdown())

In [None]:
# Convert the Python Pandas Dataframe to an R Dataframe
with (ro.default_converter + pandas2ri.converter).context():
  r_from_pd_df = ro.conversion.get_conversion().py2rpy(pd_df)

r_from_pd_df

Convert Pandas dataframe to R dataframe, and print it.

In [None]:
# Run an R summary on the Pandas Dataframe (inline conversion).
# Result returned as Pandas dataframe for printing
with (ro.default_converter + pandas2ri.converter).context():
  df_summary = base.summary(pd_df)

#print(df_summary)
print(df_summary)

In [None]:
from rpy2 import robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

# Load iris into the R session (it’s in the 'datasets' package), imported by default
ro.r("data(iris)")

# Grab the R data.frame, and assign to a python variable
iris_r = ro.r["iris"]

# Convert inside a local converter context to a Pandas Dataframe, to display it using Notebook
with localconverter(ro.default_converter + pandas2ri.converter):
    iris_df = pandas2ri.rpy2py(iris_r)

iris_df.head()

In [None]:
from rpy2.robjects import r
# Get the installed packages in our R environment
ro.r("ip =  as.data.frame(installed.packages()[,c(1,3:4)])")
ip_pyt = ro.r("ip[is.na(ip$Priority),1:2,drop=FALSE]")
print(ip_pyt)

In [None]:
%%R 
# Get the installed packages in an R cell (R%%) 
# Create an R dataframe 
ipr <- as.data.frame(installed.packages()[,c(1,3:4)])
ipr <- ip[is.na(ip$Priority),1:2,drop=FALSE]
# Doesnt pring very nicely!!
print(ipr)

In [None]:
# But we can grab the R dataframe (ipr) from the R environment in a Python cell, and print it from Python.
print(ro.r.ipr)

In [None]:
%%bash
# Make sure micromamba is on PATH
export PATH="$HOME/micromamba/bin:$PATH"

# Install Go into the existing r_env
micromamba install -y -n r_env -c conda-forge go

## Install ADBC
We installed GO from micromamba during the R installation script.  We needed it install the snowflake adbc package that needs to be compiled with GO during installation

In [None]:
%%R
# Point GO_BIN at the go inside r_env
Sys.setenv(
  GO_BIN = file.path(
    Sys.getenv("HOME"),
    ".local/share/mamba/envs/r_env/bin/go"
  )
)

cat("GO_BIN =", Sys.getenv("GO_BIN"), "\n")

install.packages("adbcsnowflake", repos = "https://community.r-multiverse.org")

#### Using OAuth Token stored in Container

```
%%R
library(adbcdrivermanager)
library(adbcsnowflake)

token <- readLines("/snowflake/session/token", warn = FALSE)

db <- adbc_database_init(
  adbcsnowflake::adbcsnowflake(),
  `adbc.snowflake.sql.account`                  = Sys.getenv("SNOWFLAKE_ACCOUNT"),
  `adbc.snowflake.sql.client_option.auth_token` = token,
  `adbc.snowflake.sql.auth_type`                = "auth_oauth",
  `adbc.snowflake.sql.db`                       = Sys.getenv("SNOWFLAKE_DATABASE"),
  `adbc.snowflake.sql.schema`                   = Sys.getenv("SNOWFLAKE_SCHEMA"),
  `adbc.snowflake.sql.warehouse`                = "YOUR_WH"
)
con <- adbc_connection_init(db)
stmt <- adbc_statement_init(con)
adbc_statement_set_sql_query(stmt, "SELECT CURRENT_USER(), CURRENT_ROLE(), CURRENT_WAREHOUSE()")
res <- adbc_statement_execute_query(stmt)
res
```
This doesnt work.  You cant make use of the OAuth token that Snowflake injects into the container at start up for this purpose.


```
IO: [Snowflake] 395092 (08004): Error connecting to Snowflake via Snowpark Container Services. Client is unauthorized to use Snowpark Container Services OAuth token.
```

#### Using conventional username and password.

__Python Cell__
```
import os

# TEMP for testing – don't check this into git
os.environ["SNOWFLAKE_PASSWORD"] = "yourpasswordhere"
```

__R Cell__
```
%%R
library(adbcdrivermanager)
library(adbcsnowflake)

# Sanity check on env
Sys.getenv(c(
  "SNOWFLAKE_ACCOUNT",
  "SNOWFLAKE_HOST",
  "SNOWFLAKE_DATABASE",
  "SNOWFLAKE_SCHEMA",
  "SNOWFLAKE_USER",
  "SNOWFLAKE_PASSWORD"
))

db <- adbc_database_init(
  adbcsnowflake::adbcsnowflake(),
  # core auth
  username                          = Sys.getenv("SNOWFLAKE_USER"),
  password                          = Sys.getenv("SNOWFLAKE_PASSWORD"),

  # connection context
  `adbc.snowflake.sql.account`      = Sys.getenv("SNOWFLAKE_ACCOUNT"),
  `adbc.snowflake.sql.uri.host`     = Sys.getenv("SNOWFLAKE_HOST"),

  # session defaults
  `adbc.snowflake.sql.db`           = Sys.getenv("SNOWFLAKE_DATABASE"),
  `adbc.snowflake.sql.schema`       = Sys.getenv("SNOWFLAKE_SCHEMA"),
  `adbc.snowflake.sql.warehouse`    = "COMPUTE_WH"   # <- change to a real warehouse
)

con  <- adbc_connection_init(db)
stmt <- adbc_statement_init(con)

adbc_statement_set_sql_query(
  stmt,
  "SELECT CURRENT_USER(), CURRENT_ROLE(), CURRENT_WAREHOUSE()"
)

res <- adbc_statement_execute_query(stmt)
res
```
This doesnt work either

```
[Snowflake] 395090 (08004): Error connecting to Snowflake via Snowpark Container Services. Please use OAuth when connecting to Snowflake. For more information please refer to https://docs.snowflake.com/en/developer-guide/snowpark-container-services/additional-considerations-services-jobs#connecting-to-snowflake-from-inside-a-container.'
```

#### Using Programmatic Access Token (PAT)

In [None]:
session.sql('''
ALTER USER SIMON REMOVE PROGRAMMATIC ACCESS TOKEN r_notebook_pat;
''').collect()

In [None]:
pat_df = session.sql('''
ALTER USER SIMON
ADD PROGRAMMATIC ACCESS TOKEN r_notebook_pat
  ROLE_RESTRICTION = 'SYSADMIN'
  DAYS_TO_EXPIRY   = 1              -- short-lived for testing
  MINS_TO_BYPASS_NETWORK_POLICY_REQUIREMENT = 240  -- 4 hours
  COMMENT = 'PAT for R/ADBC test from Notebook';
  ''').collect()

In [None]:
import os
os.environ["SNOWFLAKE_PAT"] = pat_df[0]['token_secret']

In [None]:
%%R
library(adbcdrivermanager)
library(adbcsnowflake)

# Read connection context from environment
account   <- Sys.getenv("SNOWFLAKE_ACCOUNT")
user      <- Sys.getenv("SNOWFLAKE_USER")
database  <- Sys.getenv("SNOWFLAKE_DATABASE")
schema    <- Sys.getenv("SNOWFLAKE_SCHEMA")
warehouse <- Sys.getenv("SNOWFLAKE_WAREHOUSE")
role      <- Sys.getenv("SNOWFLAKE_ROLE")          # may be empty, that's fine
pat       <- Sys.getenv("SNOWFLAKE_PAT")
public_host <- Sys.getenv("SNOWFLAKE_PUBLIC_HOST") # optional

if (identical(pat, "")) {
  stop("SNOWFLAKE_PAT is not set; cannot authenticate with PAT.")
}

# Sanity check (optional)
cat("Account  :", account,  "\n")
cat("User     :", user,     "\n")
cat("Database :", database, "\n")
cat("Schema   :", schema,   "\n")
cat("Warehouse:", warehouse, "\n")
cat("Role     :", role,     "\n")
cat("Public host:", if (nzchar(public_host)) public_host else "<driver default>", "\n")

# Build the ADBC Snowflake database handle
if (nzchar(public_host)) {
  db <- adbc_database_init(
    adbcsnowflake::adbcsnowflake(),

    # Core identity (user may be required even with PAT)
    username                          = user,

    # Connection context
    `adbc.snowflake.sql.account`      = account,
    `adbc.snowflake.sql.uri.host`     = public_host,

    # Session defaults
    `adbc.snowflake.sql.db`           = database,
    `adbc.snowflake.sql.schema`       = schema,
    `adbc.snowflake.sql.warehouse`    = warehouse,
    `adbc.snowflake.sql.role`         = role,

    # Authentication: Programmatic Access Token
    `adbc.snowflake.sql.auth_type`                = "auth_pat",
    `adbc.snowflake.sql.client_option.auth_token` = pat
  )
} else {
  # Let driver infer host from account/region
  db <- adbc_database_init(
    adbcsnowflake::adbcsnowflake(),

    username                          = user,
    `adbc.snowflake.sql.account`      = account,
    `adbc.snowflake.sql.db`           = database,
    `adbc.snowflake.sql.schema`       = schema,
    `adbc.snowflake.sql.warehouse`    = warehouse,
    `adbc.snowflake.sql.role`         = role,

    `adbc.snowflake.sql.auth_type`                = "auth_pat",
    `adbc.snowflake.sql.client_option.auth_token` = pat
  )
}

# Open a connection and run a test query
con  <- adbc_connection_init(db)
stmt <- adbc_statement_init(con)

adbc_statement_set_sql_query(
  stmt,
  "SELECT 'THIS_COLUMN' as THIS_COLUMN "
)

res <- adbc_statement_execute_query(stmt)
res

In [None]:
%%R 
con |>
  read_adbc("SELECT 'THIS_COLUMN' as THIS_COLUMN ") |>
  tibble::as_tibble()

In [None]:
%%R 
con |>
  read_adbc("SELECT N_NATIONKEY, N_NAME FROM SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.NATION LIMIT 10") |>
  tibble::as_tibble()