In [1]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn

In [None]:
# to avoid UnknownServiceError when getting execution role (run once then restart kernel)
%pip install --upgrade boto3

In [2]:
# get a SageMaker-compatible role and session used by this notebook instance
role = get_execution_role()
print(role)
sagemaker_session = sagemaker.Session()
print(sagemaker_session)

arn:aws:iam::914211408554:role/service-role/AmazonSageMaker-ExecutionRole-20230121T161181
<sagemaker.session.Session object at 0x7fd9f83ef6d0>


In [3]:
# automatically creates S3 bucket and uploads data (https://s3.console.aws.amazon.com/s3/buckets/sagemaker-ap-southeast-2-914211408554?region=ap-southeast-2&tab=objects)
s3_uploaded_data_path = sagemaker_session.upload_data("50_Startups.csv")
s3_uploaded_data_path

's3://sagemaker-ap-southeast-2-914211408554/data/50_Startups.csv'

In [4]:
# create SageMaker SKLearn estimator
sklearn = SKLearn(
    entry_point="startup_prediction.py",
    instance_type="ml.m4.xlarge",
    framework_version="0.20.0",
    py_version="py3",
    role=role,
    sagemaker_session=sagemaker_session)

In [5]:
# train estimator (https://ap-southeast-2.console.aws.amazon.com/sagemaker/home?region=ap-southeast-2#/jobs)
sklearn.fit({'train': s3_uploaded_data_path})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-01-21-06-18-57-115


2023-01-21 06:18:57 Starting - Starting the training job...
2023-01-21 06:19:23 Starting - Preparing the instances for training.........
2023-01-21 06:20:34 Downloading - Downloading input data...
2023-01-21 06:20:58 Training - Downloading the training image...
2023-01-21 06:21:54 Uploading - Uploading generated training model[34m2023-01-21 06:21:45,949 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-01-21 06:21:45,952 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-01-21 06:21:45,963 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-01-21 06:21:46,173 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-01-21 06:21:46,186 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-01-21 06:21:46,200 sagemaker-training-toolkit INFO     No GPUs detected (normal if no


2023-01-21 06:22:05 Completed - Training job completed
Training seconds: 92
Billable seconds: 92


In [6]:
# deploy estimator (wait until you see '!' in output)
# - endpoint: https://ap-southeast-2.console.aws.amazon.com/sagemaker/home?region=ap-southeast-2#/endpoints
# - model that's used in endpoint: https://ap-southeast-2.console.aws.amazon.com/sagemaker/home?region=ap-southeast-2#/models
deployment = sklearn.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")
# deployment id - use to integrate model with other services e.g. when creating a Lambda function
print(deployment.endpoint)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2023-01-21-06-23-51-611
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2023-01-21-06-23-51-611
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2023-01-21-06-23-51-611


------!

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


sagemaker-scikit-learn-2023-01-21-06-23-51-611


In [14]:
# values provided in same order of columns as dataset (assuming they weren't moved during preprocessing)
deployment.predict([[165349.2,136897.8,471784.1,1]])

array([192100.60897406])