### Load the Libraries

In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = sagemaker_session.boto_session.region_name

### Load the data and do all the needed preprocessing

In [3]:
from sklearn import datasets

# Load the Iris dataset
iris = datasets.load_iris()

# Create a DataFrame with the data and target
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Save the DataFrame as a CSV file
df.to_csv('iris.csv', index=False)

In [17]:
df=pd.read_csv("iris.csv")

In [18]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [19]:
df.columns=["sepal_length","sepal_width","petal_length","petal_width","target"]

In [20]:
df.shape

(150, 5)

In [21]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
target          0
dtype: int64

In [10]:
y=df["target"]

In [22]:
train_data,test_data=train_test_split(df,test_size=0.2,stratify=y)

In [23]:
train_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
65,6.7,3.1,4.4,1.4,1
51,6.4,3.2,4.5,1.5,1
26,5.0,3.4,1.6,0.4,0
74,6.4,2.9,4.3,1.3,1
77,6.7,3.0,5.0,1.7,1
...,...,...,...,...,...
81,5.5,2.4,3.7,1.0,1
61,5.9,3.0,4.2,1.5,1
69,5.6,2.5,3.9,1.1,1
54,6.5,2.8,4.6,1.5,1


In [24]:
test_data

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
56,6.3,3.3,4.7,1.6,1
97,6.2,2.9,4.3,1.3,1
125,7.2,3.2,6.0,1.8,2
35,5.0,3.2,1.2,0.2,0
23,5.1,3.3,1.7,0.5,0
88,5.6,3.0,4.1,1.3,1
45,4.8,3.0,1.4,0.3,0
108,6.7,2.5,5.8,1.8,2
111,6.4,2.7,5.3,1.9,2
115,6.4,3.2,5.3,2.3,2


In [25]:
train_data.to_csv('iris_train.csv',index=False)

In [26]:
test_data.to_csv('iris_test.csv',index=False)

### Upload the train, test data in s3 bucket and speicfy the model directory

In [31]:
train_file = 'iris_train.csv'
test_file = 'iris_test.csv'

In [32]:
bucket_name = 'ml-exp-storage'

training_folder = r'ml-iris/train'
test_folder = r'ml-iris/test'
model_folder = r'ml-iris/model/'

training_data_loc = r's3://' + bucket_name + r'/' + training_folder
testing_data_loc = r's3://' + bucket_name + r'/' + test_folder
model_data_loc = r's3://' + bucket_name + r'/' + model_folder

In [33]:
sagemaker_session.upload_data(train_file,
                              bucket=bucket_name, 
                              key_prefix=training_folder)

's3://ml-exp-storage/ml-iris/train/iris_train.csv'

In [34]:
sagemaker_session.upload_data(test_file, 
                              bucket=bucket_name, 
                              key_prefix=test_folder)

's3://ml-exp-storage/ml-iris/test/iris_test.csv'

### Start the training

In [36]:
instance_type='ml.m5.xlarge'

In [37]:
estimator = SKLearn(entry_point='train.py',
                    framework_version = "0.23-1",
                    py_version = 'py3',
                    instance_type= instance_type,                     
                    role=role, 
                    output_path=model_data_loc,
                    base_job_name='sklearn-iris',
                    hyperparameters={'n_estimators':50,'max_depth':5})

NOTEBOOK_METADATA_FILE detected but failed to get valid domain and user from it.


In [38]:
estimator.fit({'training':training_data_loc,'testing':testing_data_loc})

INFO:sagemaker:Creating training-job with name: sklearn-iris-2023-10-07-13-15-31-385


Using provided s3_resource
2023-10-07 13:15:31 Starting - Starting the training job...
2023-10-07 13:15:47 Starting - Preparing the instances for training......
2023-10-07 13:16:53 Downloading - Downloading input data......
2023-10-07 13:17:59 Training - Training image download completed. Training in progress.
2023-10-07 13:17:59 Uploading - Uploading generated training model[34m2023-10-07 13:17:49,775 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-10-07 13:17:49,777 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-07 13:17:49,817 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-10-07 13:17:49,973 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-07 13:17:49,985 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-10-07 13:17:49,997 sagemaker-traini

### Deploy the model as endpoint 

In [39]:
predictor = estimator.deploy(initial_instance_count=1, 
                           instance_type=instance_type,endpoint_name="checkIrisV3")

INFO:sagemaker:Creating model with name: sklearn-iris-2023-10-07-13-21-07-325
INFO:sagemaker:Creating endpoint-config with name checkIrisV3
INFO:sagemaker:Creating endpoint with name checkIrisV3


----!

### Endpoint testing 

In [113]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer,CSVDeserializer

In [114]:
predictor.serializer = CSVSerializer()

In [116]:
predictor.deserializer=CSVDeserializer()

In [117]:
predictor.predict(data_str)

[['2', '2', '2']]

In [105]:
data_str = 'sepal_length,sepal_width,petal_length,petal_width\n4.9,2.5,4.5,1.7\n6.8,3.0,5.5,2.1\n6.7,3.3,5.7,2.5\n'


In [118]:
import os
import io
import boto3
import json
import csv
import pandas as pd
client = boto3.client(service_name='sagemaker-runtime')


def transform_data(values):
        print(values)
        for i in values:
                if i=="0":
                    values= values.replace("0","Iris-setosa")
                if i =="1":
                    values=values.replace("1",'Iris-versicolor')
                if i=="2":
                    values=values.replace("2",'Iris-virginica')
        return values               
        

def lambda_handler(event):
#         request = json.loads(json.dumps(event))
#         df1=pd.DataFrame(request)
#         csv_body=df1.to_csv(header=None,index=False)
        
        result = client.invoke_endpoint(EndpointName='checkIrisV2', 
                               Body=event,
                               ContentType='text/csv')
        result = result['Body'].read().decode('utf-8')
        return transform_data(result)

In [119]:
lambda_handler(data_str)

2,2,2


'Iris-virginica,Iris-virginica,Iris-virginica'