In [1]:
# Turn warnings off globally
options(warn=-1)

In [2]:
# Install reticulate library and import sagemaker
library(reticulate)
library(readr)

sagemaker <- import('sagemaker')
boto3 <- import('boto3')

In [3]:
session <- sagemaker$Session()
bucket <- session$default_bucket()

In [4]:
s3 <- boto3$client('s3')

In [5]:
s3_dest = paste('s3://', bucket, '/athena/results/', sep='')
athena_query = "SELECT * FROM directmarketing"

print(s3_dest)

[1] "s3://sagemaker-ap-southeast-2-153805901131/athena/results/"


In [6]:
source_python("utils.py")
results_uri <- execute_athena_query(athena_query, bucket)

In [7]:
print(results_uri)

[1] "s3://sagemaker-ap-southeast-2-153805901131/athena/results/4cdd3fd9-5ab5-4557-b581-92bfe9445cd6.csv"


In [8]:
key = "athena/results/4cdd3fd9-5ab5-4557-b581-92bfe9445cd6.csv"
s3$download_file(bucket, key, "./result.csv")

In [9]:
# read file
directmarketing <- read_csv(file = "./result.csv", col_names = TRUE)
head(directmarketing)


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  .default = col_character(),
  age = [32mcol_double()[39m,
  duration = [32mcol_double()[39m,
  campaign = [32mcol_double()[39m,
  pdays = [32mcol_double()[39m,
  previous = [32mcol_double()[39m,
  emp.var.rate = [32mcol_double()[39m,
  cons.price.idx = [32mcol_double()[39m,
  cons.conf.idx = [32mcol_double()[39m,
  euribor3m = [32mcol_double()[39m,
  nr.employed = [32mcol_double()[39m
)
[36mℹ[39m Use [30m[47m[30m[47m`spec()`[47m[30m[49m[39m for the full column specifications.




age,job,marital,education,default,housing,loan,contact,month,day_of_week,⋯,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
57,services,married,high.school,unknown,no,no,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
37,services,married,high.school,no,yes,no,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
40,admin.,married,basic.6y,no,no,no,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
56,services,married,high.school,no,no,yes,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no
45,services,married,basic.9y,unknown,no,no,telephone,may,mon,⋯,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191,no


## using boto3 directly

In [10]:
athena <- boto3$client('athena')

In [11]:
output_location <- list('OutputLocation' = s3_dest)

In [12]:
athena_query = "select age, job, education, y from directmarketing"

In [13]:
query_id = athena$start_query_execution(
    QueryString= athena_query, 
    ResultConfiguration=output_location
)['QueryExecutionId']

In [14]:
result_uri = paste(s3_dest, query_id, '.csv', sep="")
result_uri

In [15]:
key = "athena/results/1652d40d-95a6-469d-bf93-b88f7aae48a0.csv"
s3$download_file(bucket, key, "./result3.csv")

In [16]:
# read file
directmarketing <- read_csv(file = "./result3.csv", col_names = TRUE)
head(directmarketing)


[36m──[39m [1m[1mColumn specification[1m[22m [36m────────────────────────────────────────────────────────[39m
cols(
  age = [32mcol_double()[39m,
  job = [31mcol_character()[39m,
  education = [31mcol_character()[39m,
  y = [31mcol_character()[39m
)




age,job,education,y
<dbl>,<chr>,<chr>,<chr>
56,housemaid,basic.4y,no
57,services,high.school,no
37,services,high.school,no
40,admin.,basic.6y,no
56,services,high.school,no
45,services,basic.9y,no
