In [1]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

#train_input = 's3://1814-tigerassign1/B/housing.csv'
# S3 prefix
bucket = sagemaker_session.default_bucket()
prefix = 'SkLinearLearner-pipeline'

In [2]:

#WORK_DIRECTORY = 'housing_data'
WORK_DIRECTORY = '.'

train_input = sagemaker_session.upload_data(
    path='{}/{}'.format(WORK_DIRECTORY, 'housing.csv'), 
    bucket=bucket,
    key_prefix='{}/{}'.format(prefix, 'train'))

# Create SageMaker Scikit Estimator 


In [3]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = './sklearn_pipe.py'

sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.m4.xlarge",
    #instance_type = "local",
    sagemaker_session=sagemaker_session)


In [4]:
train_input

's3://sagemaker-ap-south-1-961558279552/SkLinearLearner-pipeline/train/housing.csv'

In [5]:
sklearn_preprocessor.fit({'train': train_input})

2021-06-15 16:06:55 Starting - Starting the training job...
2021-06-15 16:07:17 Starting - Launching requested ML instancesProfilerReport-1623773214: InProgress
......
2021-06-15 16:08:17 Starting - Preparing the instances for training......
2021-06-15 16:09:20 Downloading - Downloading input data...
2021-06-15 16:09:38 Training - Downloading the training image...
2021-06-15 16:10:22 Uploading - Uploading generated training model
2021-06-15 16:10:22 Completed - Training job completed
[34m2021-06-15 16:10:10,176 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-06-15 16:10:10,178 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-15 16:10:10,189 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-06-15 16:10:10,507 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-15 16:10:10,522 sagemaker-training-to

# Batch transform our training data 

In [6]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, 
    instance_type='ml.m4.xlarge',
    assemble_with = 'Line',
    accept = 'text/csv')

In [7]:

# Preprocess training input
transformer.transform(train_input, content_type="text/csv")
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()
preprocessed_train = transformer.output_path

................................[34m2021-06-15 16:15:47,309 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-15 16:15:47,312 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2021-06-15 16:15:47,313 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;

  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }

  server {
    listen 8080 deferred;
    client_max_body_size 0;

    keepalive_timeout 3;

    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redir

# Serial Inference Pipeline with Scikit preprocessor and Linear Learner


In [8]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime

timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

scikit_learn_inference_model = sklearn_preprocessor.create_model()

model_name = 'inference-pipeline-' + timestamp_prefix
endpoint_name = 'inference-pipeline-ep-' + timestamp_prefix


scikit_learn_inference_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge', endpoint_name=endpoint_name)

---------------!

<sagemaker.sklearn.model.SKLearnPredictor at 0x7f69b1eefa90>

# Make a request to our pipeline endpoint


In [9]:

from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer

payload = '-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,NEAR BAY,452600.0'
actual_rings = 10
predictor = Predictor(
    endpoint_name=endpoint_name,
    sagemaker_session=sagemaker_session,
    serializer=CSVSerializer())

print(predictor.predict(payload))

b'{"instances": [{"features": [452600.0, -1.3278352216308462, 1.0525482830366848, 0.9821426581785077, -0.8048190966246049, -0.9724764790070289, -0.9744285971768408, -0.9770328537634586, 2.3447657583017163, 0.0, 0.0, 0.0, 1.0, 0.0]}]}'
