In [1]:
import boto3
import itertools
import numpy as np
import pandas as pd
import os
import sagemaker
from sagemaker.sklearn.estimator import SKLearn

In [2]:
# S3 prefix
prefix = "smu-iot"

# Establish a SageMaker session
sagemaker_session = sagemaker.Session()
role = 'arn:aws:iam::371380984152:role/SageMaker-SMU-IOT'

In [3]:
# Download the Iris dataset
os.makedirs("./data", exist_ok=True)
s3_client = boto3.client("s3")
s3_client.download_file(
    f"sagemaker-sample-files", "datasets/tabular/iris/iris.data", "./data/iris.csv"
)
df_iris = pd.read_csv("./data/iris.csv", header=None)
df_iris[4] = df_iris[4].map({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2})
iris = df_iris[[4, 0, 1, 2, 3]].to_numpy()
np.savetxt("./data/iris.csv", iris, delimiter=",", fmt="%1.1f, %1.3f, %1.3f, %1.3f, %1.3f")

In [4]:
# Upload the Iris dataset to S3
WORK_DIRECTORY = "data"
train_input = sagemaker_session.upload_data(
    WORK_DIRECTORY, key_prefix="{}/{}".format(prefix, WORK_DIRECTORY)
)

In [5]:
# Define the SKLearn estimator
FRAMEWORK_VERSION = "1.0-1"
script_path = "sklearn.py"
sklearn = SKLearn(
    entry_point=script_path,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.m5.large",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={"max_leaf_nodes": 30},
)

In [6]:
# Train the SKLearn estimator
sklearn.fit({"train": train_input})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-03-16-08-23-11-995


2023-03-16 08:23:13 Starting - Starting the training job...
2023-03-16 08:23:28 Starting - Preparing the instances for training...
2023-03-16 08:24:17 Downloading - Downloading input data...
2023-03-16 08:24:42 Training - Downloading the training image...
2023-03-16 08:25:22 Training - Training image download completed. Training in progress..2023-03-16 08:25:24,826 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
2023-03-16 08:25:24,829 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2023-03-16 08:25:24,837 sagemaker_sklearn_container.training INFO     Invoking user training script.
2023-03-16 08:25:25,024 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2023-03-16 08:25:25,035 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2023-03-16 08:25:25,048 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2023-03-16 08:25:

UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2023-03-16-08-23-11-995: Failed. Reason: AlgorithmError: framework error: 
Traceback (most recent call last):
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_trainer.py", line 84, in train
    entrypoint()
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_sklearn_container/training.py", line 39, in main
    train(environment.Environment())
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_sklearn_container/training.py", line 31, in train
    entry_point.run(uri=training_environment.module_dir,
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/entry_point.py", line 99, in run
    return runner.get(runner_type, user_entry_point, args, env_vars, extra_opts).run(
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/process.py", line 286, in run
    process = check_error(
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/process.py", line 204, in check_error
    raise error_class(
sagemaker_training.errors.ExecuteUserScriptError: ExecuteUserScriptError:
ExitCode 1
Error

In [None]:
# Deploy the SKLearn estimator
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m5.large")

In [None]:
# Test the SKLearn estimator
shape = pd.read_csv("data/iris.csv", header=None)
a = [50 * i for i in range(3)]
b = [40 + i for i in range(10)]
indices = [i + j for i, j in itertools.product(a, b)]
test_data = shape.iloc[indices[:-1]]
test_X = test_data.iloc[:, 1:]
test_y = test_data.iloc[:, 0]

In [None]:
# Predict the values
print(predictor.predict(test_X.values))
print(test_y.values)

In [None]:
# Delete the endpoint
predictor.delete_endpoint()