## Download the data

In [None]:

#urllib.request.urlretrieve('https://www.kaggle.com/datasets/iabhishekofficial/mobile-price-classification/download?datasetVersionNumber=1', 'mobile-price-dataset.zip') import urllib.request
urllib.request.urlretrieve('https://archive.ics.uci.edu/static/public/53/iris.zip', 'data.zip')

In [None]:
!mkdir data
!unzip data.zip -d data/

## Data Preparation

In [None]:
import pandas as pd
# read data
data = pd.read_csv('data/iris.data', header=None)
#print(data)
# convert data to nemerical values
pd.set_option('future.no_silent_downcasting', True)
data[4] = data[4].replace('Iris-setosa', 0)
data[4] = data[4].replace('Iris-virginica', 1)
data[4] = data[4].replace('Iris-versicolor', 2)
#print(data)

# shuffle
data = data.sample(frac=1).reset_index(drop=True)

# change label column index
data = data[[4, 0, 1, 2, 3]]
print(data)

# split {80% training dataset, 20% validation dataset}
train_data = data[:120]
val_data   = data[120:]




## Move data into S3 bucket

In [None]:
import boto3

bucket_name = 'paravx1-sagemaker-demo-bucket'

train_data.to_csv('data.csv', header=False, index=False) 
key = 'data/train/data'
url = 's3://{}/{}'.format(bucket_name, key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')

val_data.to_csv('data.csv', header=False, index=False) 
key = 'data/val/data'
url = 's3://{}/{}'.format(bucket_name, key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')


## Create ML model

In [None]:
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role

key = 'model/xgb_model'
s3_output_location = url = 's3://{}/{}'.format(bucket_name, key)

xgb_model = sagemaker.estimator.Estimator(
    get_image_uri(boto3.Session().region_name, 'xgboost'),
    get_execution_role(),
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    train_volume_size=5,
    output_path=s3_output_location,
    sagemaker_sesson=sagemaker.Session()
)

xgb_model.set_hyperparameters(max_depth=5,
                             eta=0.2,
                             gamma=4,
                             min_child_weight=6,
                             silent=0,
                             objective='multi:softmax',
                             num_class=3,
                             num_round=10
)

## Train Model 

In [None]:
train_data = 's3://{}/{}'.format(bucket_name, 'data/train')
val_data = 's3://{}/{}'.format(bucket_name, 'data/val')

train_channel = sagemaker.session.s3_input(train_data, content_type='text/csv')
val_channel = sagemaker.session.s3_input(val_data, content_type='text/csv')
data_channels = {'train': train_channel, 'validation': val_channel}

xgb_model.fit(inputs=data_channels)

## Deploy the ML model

In [None]:
xgb_predictor = xgb_model.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

## 1. Test the deployed model using Lambda 
## 2. Test the deployed model using API gateway and POSTMAN