In [None]:
# function to setup enviroment variables
setup_env <- function() {
    key <- Sys.getenv("JUPYTERLAB_S3_ACCESS_KEY_ID")
    secret <- Sys.getenv("JUPYTERLAB_S3_SECRET_ACCESS_KEY")
    endpoint <- Sys.getenv("JUPYTERLAB_S3_ENDPOINT")
    endpoint <- gsub("http://", "", endpoint)
    endpoint <- paste0(endpoint, ":80")
    Sys.setenv(
        "AWS_ACCESS_KEY_ID" = key,
        "AWS_SECRET_ACCESS_KEY" = secret,
        "AWS_DEFAULT_REGION" = "",
        "AWS_S3_ENDPOINT" = endpoint)
}

# function to get object metadata
get_meta <- function(path, bucket) {
    headers <- head_object(path, bucket, use_http = F)
    fields <- attributes(headers)
    meta <- list()
    for (i in seq(1,length(fields))){
        value <- fields[i]
        name <- names(value)
        if (startsWith(name, "x-amz-meta-"))
             meta[name] = value
    }
    return(meta)
}

# function to get object tags
get_tags <- function(path, bucket) {
    r <- s3HTTP(
        verb = "GET", 
        bucket = bucket,
        path = path,
        query = list(tagging = ""),
        headers = list(), 
        request_body = "",
        write_disk = NULL,
        accelerate = FALSE,
        dualstack = FALSE,
        parse_response = TRUE, 
        check_region = FALSE,
        url_style = c("path", "virtual"),
        base_url = Sys.getenv("AWS_S3_ENDPOINT"),
        verbose = FALSE,
        region = Sys.getenv("AWS_DEFAULT_REGION"), 
        key = Sys.getenv("AWS_ACCESS_KEY_ID"), 
        secret = Sys.getenv("AWS_SECRET_ACCESS_KEY"), 
        session_token = NULL,
        use_https = FALSE)
    return(r)
}

# ------------------------------------------------------------------------------

setup_env()
library("minio.s3")
library(arrow)

bucketName <- 'XXXX' #Update to match local environment
remoteFileName <- 'pipeline-tx.parquet'
localFileName <- 'pipeline-rx.parquet'

# get object tags and metadata
get_tags(remoteFileName, bucketName)
get_meta(remoteFileName, bucketName)

# get an object from S3 storage and write it in local storage
raw <- get_object(remoteFileName, bucketName, use_https = F)
write_parquet(read_parquet(raw), localFileName)