### Load the Libraries

In [96]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = sagemaker_session.boto_session.region_name

### Load the data and do all the needed preprocessing

In [97]:
from sklearn import datasets

# Load the Iris dataset
iris = datasets.load_iris()

# Create a DataFrame with the data and target
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Save the DataFrame as a CSV file
df.to_csv('iris.csv', index=False)

In [98]:
df=pd.read_csv("iris.csv")

In [99]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [100]:
df.columns=["sepal_length","sepal_width","petal_length","petal_width","target"]

In [101]:
df.shape

(150, 5)

In [102]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
target          0
dtype: int64

In [103]:
y=df["target"]

In [104]:
train_data,test_data=train_test_split(df,test_size=0.2,stratify=y)

In [105]:
train_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
18,5.7,3.8,1.7,0.3,0
40,5.0,3.5,1.3,0.3,0
148,6.2,3.4,5.4,2.3,2
36,5.5,3.5,1.3,0.2,0
105,7.6,3.0,6.6,2.1,2
...,...,...,...,...,...
1,4.9,3.0,1.4,0.2,0
100,6.3,3.3,6.0,2.5,2
121,5.6,2.8,4.9,2.0,2
108,6.7,2.5,5.8,1.8,2


In [106]:
test_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
109,7.2,3.6,6.1,2.5,2
28,5.2,3.4,1.4,0.2,0
82,5.8,2.7,3.9,1.2,1
96,5.7,2.9,4.2,1.3,1
84,5.4,3.0,4.5,1.5,1
35,5.0,3.2,1.2,0.2,0
117,7.7,3.8,6.7,2.2,2
138,6.0,3.0,4.8,1.8,2
134,6.1,2.6,5.6,1.4,2
21,5.1,3.7,1.5,0.4,0


In [107]:
train_data.to_csv('iris_train.csv',index=False)

In [108]:
test_data.to_csv('iris_test.csv',index=False)

### Upload the train, test data in s3 bucket and speicfy the model directory

In [109]:
train_file = 'iris_train.csv'
test_file = 'iris_test.csv'

In [110]:
bucket_name = 'ml-exp-storage'

training_folder = r'ml-iris/train'
test_folder = r'ml-iris/test'
model_folder = r'ml-iris/model/'

training_data_loc = r's3://' + bucket_name + r'/' + training_folder
testing_data_loc = r's3://' + bucket_name + r'/' + test_folder
model_data_loc = r's3://' + bucket_name + r'/' + model_folder

In [111]:
sagemaker_session.upload_data(train_file,
                              bucket=bucket_name, 
                              key_prefix=training_folder)

's3://ml-exp-storage/ml-iris/train/iris_train.csv'

In [112]:
sagemaker_session.upload_data(test_file, 
                              bucket=bucket_name, 
                              key_prefix=test_folder)

's3://ml-exp-storage/ml-iris/test/iris_test.csv'

### Start the training

In [116]:
instance_type='ml.m5.xlarge'

In [117]:
estimator = SKLearn(entry_point='train.py',
                    framework_version = "0.23-1",
                    py_version = 'py3',
                    instance_type= instance_type,                     
                    role=role, 
                    output_path=model_data_loc,
                    base_job_name='sklearn-iris',
                    hyperparameters={'n_estimators':50,'max_depth':5})



In [118]:
estimator.fit({'training':training_data_loc,'testing':testing_data_loc})

Using provided s3_resource


INFO:sagemaker:Creating training-job with name: sklearn-iris-2023-10-08-12-42-27-423


2023-10-08 12:42:28 Starting - Starting the training job...
2023-10-08 12:42:43 Starting - Preparing the instances for training......
2023-10-08 12:43:29 Downloading - Downloading input data...
2023-10-08 12:44:25 Training - Training image download completed. Training in progress.
2023-10-08 12:44:25 Uploading - Uploading generated training model.[34m2023-10-08 12:44:19,247 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-10-08 12:44:19,251 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-08 12:44:19,298 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-10-08 12:44:19,481 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-08 12:44:19,494 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-08 12:44:19,507 sagemaker-training-toolkit INFO     No GPUs d

### Deploy the model as endpoint 

In [119]:
predictor = estimator.deploy(initial_instance_count=1, 
                           instance_type=instance_type,endpoint_name="checkIrisV3")

INFO:sagemaker:Creating model with name: sklearn-iris-2023-10-08-12-45-10-263
INFO:sagemaker:Creating endpoint-config with name checkIrisV3
INFO:sagemaker:Creating endpoint with name checkIrisV3


----!

In [120]:
estimator.model_data

's3://ml-exp-storage/ml-iris/model/sklearn-iris-2023-10-08-12-42-27-423/output/model.tar.gz'

### Endpoint testing 

In [121]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer,CSVDeserializer

In [122]:
predictor.serializer = CSVSerializer()

In [123]:
predictor.deserializer=CSVDeserializer()

In [124]:
test_data=[{'sepal_length': 6.7,
  'sepal_width': 3.1,
  'petal_length': 5.6,
  'petal_width': 2.4},
 {'sepal_length': 5.5,
  'sepal_width': 2.3,
  'petal_length': 4.0,
  'petal_width': 1.3},
 {'sepal_length': 5.1,
  'sepal_width': 3.3,
  'petal_length': 1.7,
  'petal_width': 0.5},
 {'sepal_length': 5.7,
  'sepal_width': 2.5,
  'petal_length': 5.0,
  'petal_width': 2.0},
 {'sepal_length': 5.0,
  'sepal_width': 2.0,
  'petal_length': 3.5,
  'petal_width': 1.0}]

In [125]:
df1=pd.DataFrame(test_data)
df1

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,6.7,3.1,5.6,2.4
1,5.5,2.3,4.0,1.3
2,5.1,3.3,1.7,0.5
3,5.7,2.5,5.0,2.0
4,5.0,2.0,3.5,1.0


In [126]:
csv_body=df1.to_csv(index=False)
csv_body

'sepal_length,sepal_width,petal_length,petal_width\n6.7,3.1,5.6,2.4\n5.5,2.3,4.0,1.3\n5.1,3.3,1.7,0.5\n5.7,2.5,5.0,2.0\n5.0,2.0,3.5,1.0\n'

In [127]:
df1=pd.DataFrame(test_data)
csv_body=df1.to_csv(index=False)

In [128]:
predictor.predict(csv_body)

[['2', '1', '0', '2', '1']]

### endpoint testing using boto3

In [129]:
import os
import io
import boto3
import json
import csv
import pandas as pd
client = boto3.client(service_name='sagemaker-runtime')


def transform_data(values):
        print(values)
        for i in values:
                if i=="0":
                    values= values.replace("0","Iris-setosa")
                if i =="1":
                    values=values.replace("1",'Iris-versicolor')
                if i=="2":
                    values=values.replace("2",'Iris-virginica')
        return values               
        

def give_output(event):
        df1=pd.DataFrame(event)
        csv_body=df1.to_csv(index=False)
        
        result = client.invoke_endpoint(EndpointName='checkIrisV3', 
                               Body=csv_body,
                               ContentType='text/csv')
        result = result['Body'].read().decode('utf-8')
        return transform_data(result)

In [130]:
test_data=[{'sepal_length': 6.7,
  'sepal_width': 3.1,
  'petal_length': 5.6,
  'petal_width': 2.4},
 {'sepal_length': 5.5,
  'sepal_width': 2.3,
  'petal_length': 4.0,
  'petal_width': 1.3},
 {'sepal_length': 5.1,
  'sepal_width': 3.3,
  'petal_length': 1.7,
  'petal_width': 0.5},
 {'sepal_length': 5.7,
  'sepal_width': 2.5,
  'petal_length': 5.0,
  'petal_width': 2.0},
 {'sepal_length': 5.0,
  'sepal_width': 2.0,
  'petal_length': 3.5,
  'petal_width': 1.0}]

In [131]:
give_output(test_data)

2,1,0,2,1


'Iris-virginica,Iris-versicolor,Iris-setosa,Iris-virginica,Iris-versicolor'

### Create endpoint from endpoint configuration 

In [None]:
import boto3

sagemaker = boto3.client('sagemaker')
endpoint_name = "irisv4"

endpoint = sagemaker.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName="checkIrisV3",
)

print(f'Endpoint ARN: {endpoint["EndpointArn"]}')
