In [1]:
import boto3
import sagemaker

In [2]:
region = boto3.session.Session().region_name
role = !aws configure get role_arn --profile sagemaker
role = role[0]

In [18]:
import subprocess

In [22]:
'aws configure get role_arn --profile sagemaker'.split(' ')

['aws', 'configure', 'get', 'role_arn', '--profile', 'sagemaker']

In [37]:
p = subprocess.run('aws configure get role_arn --profile sagemaker'.split(' '), stdout=subprocess.PIPE) \
    .stdout \
    .decode('utf-8') \
    .strip()

In [38]:
p

'arn:aws:iam::495577990003:role/service-role/AmazonSageMaker-ExecutionRole-20191110T141611'

In [3]:
import json

In [4]:
image_json = !aws ecr describe-repositories --repository-names "sagemaker-tidymodels"

In [5]:
image_uri = json.loads("".join(image_json))['repositories'][0]['repositoryArn']

In [6]:
from sagemaker.estimator import Framework
from sagemaker.model import FrameworkModel
from sagemaker.predictor import RealTimePredictor

In [7]:
class TidymodelsPredictor(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session=None, **kwargs):
        super(TidymodelsPredictor, self).__init__(
            endpoint_name, 
            sagemaker_session=sagemaker_session, 
            **kwargs
        )

In [8]:
from sagemaker.fw_utils import model_code_key_prefix
from sagemaker.model import MODEL_SERVER_WORKERS_PARAM_NAME

class TidymodelsModel(FrameworkModel):
    
    # `FrameworkModel` accepts a `dependencies` argument to make more code availabe
    # in `/opt/ml/code`: https://github.com/aws/sagemaker-python-sdk/blob/8b2d5c8d73236b59bca6fdcaf96f227a01488288/src/sagemaker/model.py#L704-L712
    
    __framework_name__ = "tidymodels"
    
    def __init__(
        self, 
        model_data, 
        image, role, 
        entry_point, 
        predictor_cls=TidymodelsPredictor, 
        **kwargs
    ):
        super(TidymodelsModel, self).__init__(
            model_data,
            image,
            role,
            entry_point,
            predictor_cls=predictor_cls,
            **kwargs
        )

        
class Tidymodels(Framework):
    def __init__(self, **kwargs):
        super(Tidymodels, self).__init__(**kwargs)
        
    def create_model(self, **kwargs):
        # todo: entry_point=None, replace to use a specific serve script
        # uses self or supplied entry_point: https://github.com/aws/sagemaker-python-sdk/blob/1872483681a6647bdad126b8214fb6cc35e164fd/src/sagemaker/sklearn/estimator.py#L204
        return TidymodelsModel(
            entry_point=self.entry_point,
            image=self.image_name,
            role=self.role,
            model_data=self.model_data,
            **kwargs
        )

In [9]:
tidymodels = Tidymodels(
    entry_point="R/train.R",
    train_instance_count=1,
    train_instance_type="local",
    role=role,
    image_name='sagemaker-tidymodels'
)



In [10]:
logs = tidymodels.fit({'train': 's3://sagemaker-sample-data-us-east-2/processing/census/census-income.csv'})



Creating tmpla9mqln0_algo-1-2h15l_1 ... 
[1BAttaching to tmpla9mqln0_algo-1-2h15l_12mdone[0m
[36malgo-1-2h15l_1  |[0m s3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-57-04-788/source/sourcedir.tar.gz
[36malgo-1-2h15l_1  |[0m train.R
[36malgo-1-2h15l_1  |[0m []
[36malgo-1-2h15l_1  |[0m Loading required package: dplyr
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m Attaching package: ‘dplyr’
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m The following objects are masked from ‘package:stats’:
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m     filter, lag
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m The following objects are masked from ‘package:base’:
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m     intersect, setdiff, setequal, union
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m Attaching package: ‘recipes’
[36malgo-1-2h15l_1  |[0m 
[36malgo-1-2h15l_1  |[0m The following

In [11]:
tidymodels.model_data

's3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-57-04-788/model.tar.gz'

In [13]:
tidymodels.uploaded_code

UserCode(s3_prefix='s3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-57-04-788/source/sourcedir.tar.gz', script_name='train.R')

In [14]:
# recipes is out of date on docker??
# couldn't figure out why, but I fixed it by changing some of the name vectors

# instance_type="ml.t2.medium"

In [15]:
predictor = tidymodels.deploy(initial_instance_count=1, instance_type='local')



Attaching to tmpf97ht9xe_algo-1-cjiwd_1
[36malgo-1-cjiwd_1  |[0m Loading required package: dplyr
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m Attaching package: ‘dplyr’
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m The following objects are masked from ‘package:stats’:
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m     filter, lag
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m The following objects are masked from ‘package:base’:
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m     intersect, setdiff, setequal, union
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m Attaching package: ‘recipes’
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m The following object is masked from ‘package:stats’:
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m     step
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m 
[36malgo-1-cjiwd_1  |[0m Attaching package: ‘janitor’
[36malgo-1-cjiwd_1  |[0m 
[36malgo-

In [16]:
example_data = r'73,Not in universe,0,0,High school graduate,0,Not in universe,Widowed,Not in universe or children,Not in universe,White,All other,Female,Not in universe,Not in universe,Not in labor force,0,0,0,Nonfiler,Not in universe,Not in universe,Other Rel 18+ ever marr not in subfamily,Other relative of householder,1700.09,?,?,?,Not in universe under 1 year old,?,0,Not in universe,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,2,0,95,- 50000.\n'

In [17]:
# string output without deserializer
predictor.predict(example_data)

[36malgo-1-cjiwd_1  |[0m   prediction from a rank-deficient fit may be misleading


b'["- 50000.\\n"]'

In [28]:
import numpy as np

In [29]:
sagemaker.predictor.csv_serializer(np.array([0, 1]))

'0,1'

In [30]:
sagemaker.predictor.csv_deserializer(sagemaker.predictor.csv_serializer(np.array([0, 1])), content_type='text/csv')

AttributeError: 'str' object has no attribute 'close'

In [31]:
predictor.content_type = 'application/json'
predictor.deserializer = sagemaker.predictor.json_deserializer

In [32]:
# list of values with json_deserializer
pred2 = predictor.predict(example_data)

[36malgo-1-9j842_1  |[0m   prediction from a rank-deficient fit may be misleading


In [33]:
print(pred2)
pred2[0]

['- 50000.\n']


'- 50000.\n'

In [48]:
# {sagemaker} always return a string, and then has a separate R process
# to format the string json or csv into an R object. 
# This is probably (??) more efficient than letting python deserialize into a
# numpy array and then copy that into an R array. See here: https://rstudio.github.io/reticulate/articles/arrays.html