## Import necessary libraries

In [1]:
import os

import boto3
import sagemaker
from sagemaker.sklearn import SKLearn
from sagemaker.sklearn.model import SKLearnModel

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\tochi\AppData\Local\sagemaker\sagemaker\config.yaml


## Train Model using Sagemaker

In [2]:
sagemaker_session = sagemaker.Session()
account = boto3.client('sts').get_caller_identity().get('Account')
role = "f"arn:aws:iam::{account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole"" 

In [3]:
training_input_path = "s3://modeldata1234567/train/train.csv" #url to the s3 training data
output_path = "s3://sagemaker-us-east-2-930627915954/modeldata1234567/model_artifacts/" #url to where to store the trained model on s3 bucket
sk_estimator = SKLearn(
    entry_point="train.py",
    role=role,
    instance_count=1,
    instance_type="ml.c5.xlarge", #training happens on aws ec2 instance even though the development is local
    py_version="py3",
    framework_version="1.2-1",
    sagemaker_session=sagemaker_session,
    hyperparameters={"n_estimators":4},
    output_path = output_path
)

# Train the estimator
sk_estimator.fit({"train": training_input_path})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2024-08-29-21-24-39-885


2024-08-29 21:24:40 Starting - Starting the training job...
2024-08-29 21:25:04 Starting - Preparing the instances for training...
2024-08-29 21:25:26 Downloading - Downloading input data...
2024-08-29 21:25:56 Downloading - Downloading the training image...
2024-08-29 21:26:42 Training - Training image download completed. Training in progress.
2024-08-29 21:26:42 Uploading - Uploading generated training model[34m2024-08-29 21:26:36,256 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-08-29 21:26:36,258 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-08-29 21:26:36,261 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2024-08-29 21:26:36,277 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-08-29 21:26:36,508 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34

## Deploy the model

In [5]:
sagemaker_session = sagemaker.Session()
sk_model = SKLearnModel(
    model_data='s3://sagemaker-us-east-2-930627915954/modeldata1234567/model_artifacts/sagemaker-scikit-learn-2024-08-29-21-24-39-885/output/model.tar.gz',
    role=role,
    framework_version='1.2-1', 
    entry_point='train.py',
    sagemaker_session=sagemaker_session
)
predictor = sk_model.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-08-29-21-31-05-394
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2024-08-29-21-31-06-448
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2024-08-29-21-31-06-448


-----!

## Test the deployed endpoint

In [10]:
#fetch the test data
test_data = pd.read_csv("data/iris_test.csv")
test_df = test_data.drop('label', axis = 1)
label = test_data['label']


In [12]:
#pass test data to the predictor
print('actual label values are: ', label.values)
print('Predicted values are: ',predictor.predict(test_df.values))

actual label values are:  [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Predicted values are:  [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 1 0 2 2 2 2 2 0 0]
