In [1]:
import boto3
import sagemaker

In [2]:
region = boto3.session.Session().region_name
role = !aws configure get role_arn --profile sagemaker
role = role[0]

In [3]:
import json

In [4]:
image_json = !aws ecr describe-repositories --repository-names "sagemaker-tidymodels"

In [5]:
image_uri = json.loads("".join(image_json))['repositories'][0]['repositoryArn']

In [6]:
from sagemaker.estimator import Framework
from sagemaker.model import FrameworkModel
from sagemaker.predictor import RealTimePredictor

In [7]:
class TidymodelsPredictor(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session=None, **kwargs):
        super(TidymodelsPredictor, self).__init__(
            endpoint_name, 
            sagemaker_session=sagemaker_session, 
            **kwargs
        )

In [8]:
from sagemaker.fw_utils import model_code_key_prefix
from sagemaker.model import MODEL_SERVER_WORKERS_PARAM_NAME

class TidymodelsModel(FrameworkModel):
    
    # `FrameworkModel` accepts a `dependencies` argument to make more code availabe
    # in `/opt/ml/code`: https://github.com/aws/sagemaker-python-sdk/blob/8b2d5c8d73236b59bca6fdcaf96f227a01488288/src/sagemaker/model.py#L704-L712
    
    __framework_name__ = "tidymodels"
    
    def __init__(
        self, 
        model_data, 
        image, role, 
        entry_point, 
        predictor_cls=TidymodelsPredictor, 
        model_server_workers=None,
        **kwargs
    ):
        super(TidymodelsModel, self).__init__(
            model_data,
            image,
            role,
            entry_point,
            predictor_cls=predictor_cls,
            **kwargs
        )
        
        self.model_server_workers = model_server_workers
        
    def prepare_container_def(self, instance_type=None, accelerator_type=None):
        """Return a container definition with framework configuration set in
        model environment variables.
        Args:
            instance_type (str): The EC2 instance type to deploy this Model to.
                This parameter is unused because Scikit-learn supports only CPU.
            accelerator_type (str): The Elastic Inference accelerator type to
                deploy to the instance for loading and making inferences to the
                model. This parameter is unused because accelerator types
                are not supported by SKLearnModel.
        Returns:
            dict[str, str]: A container definition object usable with the
            CreateModel API.
        """
        if accelerator_type:
            raise ValueError("Accelerator types are not supported for Tidymodels.")

        deploy_image = self.image
        if not deploy_image:
            deploy_image = self.serving_image_uri(
                self.sagemaker_session.boto_region_name, instance_type
            )

        deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
        self._upload_code(key_prefix=deploy_key_prefix, repack=self.enable_network_isolation())
        deploy_env = dict(self.env)
        deploy_env.update(self._framework_env_vars())

        if self.model_server_workers:
            deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
        model_data_uri = (
            self.repacked_model_data if self.enable_network_isolation() else self.model_data
        )
        return sagemaker.container_def(deploy_image, model_data_uri, deploy_env)
        
        
class Tidymodels(Framework):
    def __init__(self, **kwargs):
        super(Tidymodels, self).__init__(**kwargs)
        
    def create_model(self, **kwargs):
        # todo: entry_point=None, replace to use a specific serve script
        # uses self or supplied entry_point: https://github.com/aws/sagemaker-python-sdk/blob/1872483681a6647bdad126b8214fb6cc35e164fd/src/sagemaker/sklearn/estimator.py#L204
        return TidymodelsModel(
            entry_point=self.entry_point,
            image=self.image_name,
            role=self.role,
            model_data=self.model_data,
            **kwargs
        )

In [11]:
tidymodels = Tidymodels(
    entry_point="R/train.R",
    train_instance_count=1,
    train_instance_type="local",
    role=role,
    image_name='sagemaker-tidymodels'
)



In [12]:
logs = tidymodels.fit({'train': 's3://sagemaker-sample-data-us-east-2/processing/census/census-income.csv'})



Creating tmptry7iu84_algo-1-nvi4p_1 ... 
[1BAttaching to tmptry7iu84_algo-1-nvi4p_12mdone[0m
[36malgo-1-nvi4p_1  |[0m s3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-05-32-843/source/sourcedir.tar.gz
[36malgo-1-nvi4p_1  |[0m train.R
[36malgo-1-nvi4p_1  |[0m []
[36malgo-1-nvi4p_1  |[0m Loading required package: dplyr
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m Attaching package: ‘dplyr’
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m The following objects are masked from ‘package:stats’:
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m     filter, lag
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m The following objects are masked from ‘package:base’:
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m     intersect, setdiff, setequal, union
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m Attaching package: ‘recipes’
[36malgo-1-nvi4p_1  |[0m 
[36malgo-1-nvi4p_1  |[0m The following

In [13]:
tidymodels.model_data

's3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-05-32-843/model.tar.gz'

In [22]:
tidymodels.uplobaded_code

UserCode(s3_prefix='s3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-25-22-17-53-014/source/sourcedir.tar.gz', script_name='train.R')

In [12]:
# recipes is out of date on docker??
# couldn't figure out why, but I fixed it by changing some of the name vectors

# instance_type="ml.t2.medium"

In [15]:
predictor = tidymodels.deploy(initial_instance_count=1, instance_type='local')



Attaching to tmp1r9hknhw_algo-1-9p3j3_1
[36malgo-1-9p3j3_1  |[0m [1] "train.R"
[36malgo-1-9p3j3_1  |[0m [1] "/"
[36malgo-1-9p3j3_1  |[0m [1] "SAGEMAKER_SUBMIT_DIRECTORY"
[36malgo-1-9p3j3_1  |[0m [1] "s3://sagemaker-us-east-2-495577990003/sagemaker-tidymodels-2020-07-26-00-05-32-843/sourcedir.tar.gz"
[36malgo-1-9p3j3_1  |[0m character(0)
[36malgo-1-9p3j3_1  |[0m character(0)
[36malgo-1-9p3j3_1  |[0m /bin/tar: /user_module/: Cannot open: No such file or directory
[36malgo-1-9p3j3_1  |[0m /bin/tar: Error is not recoverable: exiting now
[36malgo-1-9p3j3_1  |[0m In untar("/user_module/") :
[36malgo-1-9p3j3_1  |[0m   ‘/bin/tar -xf '/user_module/'’ returned error code 2
[36malgo-1-9p3j3_1  |[0m character(0)
[36malgo-1-9p3j3_1  |[0m  [1] "bin"            "boot"           "dev"            "entry"         
[36malgo-1-9p3j3_1  |[0m  [5] "etc"            "home"           "lib"            "lib64"         
[36malgo-1-9p3j3_1  |[0m  [9] "media"          "mnt"            "o



[36malgo-1-9p3j3_1  |[0m 
[36malgo-1-9p3j3_1  |[0m Attaching package: ‘janitor’
[36malgo-1-9p3j3_1  |[0m 
[36malgo-1-9p3j3_1  |[0m The following objects are masked from ‘package:stats’:
[36malgo-1-9p3j3_1  |[0m 
[36malgo-1-9p3j3_1  |[0m     chisq.test, fisher.test
[36malgo-1-9p3j3_1  |[0m 
[36malgo-1-9p3j3_1  |[0m [1] "hello starting plumber..."
[36malgo-1-9p3j3_1  |[0m Starting server to listen on port 8080
[36malgo-1-9p3j3_1  |[0m [1] "hello"
[36malgo-1-9p3j3_1  |[0m [1] "hello from other func!!!"
!

In [None]:
example_data = r'73,Not in universe,0,0,High school graduate,0,Not in universe,Widowed,Not in universe or children,Not in universe,White,All other,Female,Not in universe,Not in universe,Not in labor force,0,0,0,Nonfiler,Not in universe,Not in universe,Other Rel 18+ ever marr not in subfamily,Other relative of householder,1700.09,?,?,?,Not in universe under 1 year old,?,0,Not in universe,United-States,United-States,United-States,Native- Born in the United States,0,Not in universe,2,0,95,- 50000.\n'

In [None]:
# string output without deserializer
predictor.predict(example_data)

In [44]:
import numpy as np

In [28]:
sagemaker.predictor.csv_serializer(np.array([0, 1]))

'0,1'

In [30]:
sagemaker.predictor.csv_deserializer(sagemaker.predictor.csv_serializer(np.array([0, 1])), content_type='text/csv')

AttributeError: 'str' object has no attribute 'close'

In [63]:
predictor.content_type = 'application/json'
predictor.deserializer = sagemaker.predictor.json_deserializer

In [64]:
# list of values with json_deserializer
pred2 = predictor.predict(example_data)

[36malgo-1-5pmix_1  |[0m   prediction from a rank-deficient fit may be misleading


In [65]:
print(pred2)
pred2[0]

['- 50000.\n']


'- 50000.\n'

[36mtmp80wdc_om_algo-1-5pmix_1 exited with code 137
[0mAborting on container exit...


Exception in thread Thread-10:
Traceback (most recent call last):
  File "/Users/Tim/anaconda/envs/jupyter/lib/python3.7/site-packages/sagemaker/local/image.py", line 618, in run
    _stream_output(self.process)
  File "/Users/Tim/anaconda/envs/jupyter/lib/python3.7/site-packages/sagemaker/local/image.py", line 677, in _stream_output
    raise RuntimeError("Process exited with code: %s" % exit_code)
RuntimeError: Process exited with code: 137

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/Tim/anaconda/envs/jupyter/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Users/Tim/anaconda/envs/jupyter/lib/python3.7/site-packages/sagemaker/local/image.py", line 623, in run
    raise RuntimeError(msg)
RuntimeError: Failed to run: ['docker-compose', '-f', '/private/var/folders/tg/y_4wm8y935sbct1kwymzgnpr0000gn/T/tmp80wdc_om/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'],

In [48]:
# {sagemaker} always return a string, and then has a separate R process
# to format the string json or csv into an R object. 
# This is probably (??) more efficient than letting python deserialize into a
# numpy array and then copy that into an R array. See here: https://rstudio.github.io/reticulate/articles/arrays.html