# R on Vertex AI Pipelines

Run an R script in a Vertex AI Pipeline use KFP components.

In [4]:
r_file = '../../08 - R/code/train.R'

In [5]:
with open(r_file, 'r') as file:
    r_script = file.read()

In [6]:
print(r_script)

# library import
library(bigrquery)
library(dplyr)

# inputs
args <- commandArgs(trailingOnly = TRUE)
bq_project <- args[1]
bq_dataset <- args[2]
bq_table <- args[3]
var_target <- args[4]
var_omit <- args[5]

# data source
get_data <- function(s){
    
    # query for table
    query <- sprintf('
        SELECT * EXCEPT(%s)
        FROM `%s.%s.%s`
        WHERE splits = "%s"
    ', var_omit, bq_project, bq_dataset, bq_table, s)
    
    # connect to table
    table <- bq_project_query(bq_project, query)
    
    # load table to dataframe
    return(bq_table_download(table, n_max = Inf))

}
train <- get_data("TRAIN")
test <- get_data("TEST")

# logistic regression model
model_exp = paste0(var_target, "~ .")

model <- glm(
    as.formula(model_exp),
    data = train,
    family = binomial)

# predictions for evaluation
preds <- predict(model, test, type = "response")

# evaluate
actual <- test[, var_target]
names(actual) <- 'actual'
pred <- tibble(round(preds))
names(pred) <- 'pred'
resu

add some more output to the script:
writeLines(paste('project:", bq_project, 'dataset:', bq_dataset))

In [9]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'statmike-mlops-349915'

In [10]:
REGION = 'us-central1'
EXPERIMENT = 'frameworks'
SERIES = 'r-pipeline'

# gcs bucket
GCS_BUCKET = PROJECT_ID

In [25]:
import os, typing
from google.cloud import aiplatform
import kfp

In [12]:
# vertex ai clients
aiplatform.init(project = PROJECT_ID, location = REGION)

In [15]:
DIR = f"temp/{SERIES}-{EXPERIMENT}"

In [16]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

'1026793852137-compute@developer.gserviceaccount.com'

environment:
- make a local folder for temporary storage

In [19]:
if not os.path.exists(DIR):
    os.makedirs(DIR)

In [20]:
kfp.local.init(
    runner = kfp.local.DockerRunner(),
    pipeline_root = DIR
)

In [56]:
@kfp.dsl.container_component
def r_container(
    r_script: str,
    r_args: str,
    r_libs: str
):
    install_command = ''
    if r_libs:
        install_command = f"""R -e "install.packages(c({r_libs}), repos=\'http://cran.us.r-project.org\')" && """

    script_args = ''
    if r_args:
        script_args = r_args 

    return kfp.dsl.ContainerSpec(
        image='r-base:latest',
        command=['sh', '-c'],
        args=[
            f"""
            {install_command} \\
            Rscript -e '{r_script}' -- {script_args}
            """
        ]
    )

In [57]:
@kfp.dsl.pipeline(
    name = f'{SERIES}-{EXPERIMENT}',
)
def r_pipeline(
    bq_project: str,
    bq_dataset: str,
    bq_table: str,
    var_target: str,
    var_omit: str,
    r_script: str
):
    
    r_job = r_container(
        r_script = r_script,
        r_args = f"{bq_project} {bq_dataset} {bq_table} {var_target} {var_omit}",
        r_libs = "'dplyr', 'bigrquery'"
    )

In [58]:
kfp.compiler.Compiler().compile(
    pipeline_func = r_pipeline,
    package_path = f'{DIR}/{SERIES}-{EXPERIMENT}.yaml'
)

In [59]:
test_pipeline = r_pipeline(
    bq_project = PROJECT_ID,
    bq_dataset = SERIES,
    bq_table = EXPERIMENT,
    var_target = 'Class',
    var_omit = 'transaction_id',
    r_script = r_script
)

02:13:09.124 - INFO - Running pipeline: [95m'r-pipeline-frameworks'[0m
--------------------------------------------------------------------------------
02:13:09.128 - INFO - Executing task [96m'r-container'[0m
02:13:09.129 - INFO - Streamed logs:

    Found image 'r-base:latest'

    
    R version 4.4.3 (2025-02-28) -- "Trophy Case"
    Copyright (C) 2025 The R Foundation for Statistical Computing
    Platform: x86_64-pc-linux-gnu
    
    R is free software and comes with ABSOLUTELY NO WARRANTY.
    You are welcome to redistribute it under certain conditions.
    Type 'license()' or 'licence()' for distribution details.
    
      Natural language support but running in an English locale
    
    R is a collaborative project with many contributors.
    Type 'contributors()' for more information and
    'citation()' on how to cite R or R packages in publications.
    
    Type 'demo()' for some demos, 'help()' for on-line help, or
    'help.start()' for an HTML browser interface t

RuntimeError: Pipeline [95m'r-pipeline-frameworks'[0m finished with status [91mFAILURE[0m. Inner task failed: [96m'r-container'[0m.