Data Preparation

In [24]:
import urllib.request

urllib.request.urlretrieve('https://archive.ics.uci.edu/static/public/53/iris.zip','data.zip')

('data.zip', <http.client.HTTPMessage at 0x7fd6d7092050>)

In [25]:
!mkdir data

!unzip data.zip -d data/

mkdir: cannot create directory ‘data’: File exists
Archive:  data.zip
replace data/Index? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [26]:
#read data

import pandas as pd
data = pd.read_csv('data/iris.data', header = None)

print(data.head())

     0    1    2    3            4
0  5.1  3.5  1.4  0.2  Iris-setosa
1  4.9  3.0  1.4  0.2  Iris-setosa
2  4.7  3.2  1.3  0.2  Iris-setosa
3  4.6  3.1  1.5  0.2  Iris-setosa
4  5.0  3.6  1.4  0.2  Iris-setosa


In [27]:
data[4] = data[4].replace('Iris-setosa',0)
data[4] = data[4].replace('Iris-virginica',1)
data[4] = data[4].replace('Iris-versicolor',2)

  data[4] = data[4].replace('Iris-versicolor',2)


In [28]:
print(data.head())

     0    1    2    3  4
0  5.1  3.5  1.4  0.2  0
1  4.9  3.0  1.4  0.2  0
2  4.7  3.2  1.3  0.2  0
3  4.6  3.1  1.5  0.2  0
4  5.0  3.6  1.4  0.2  0


In [29]:
data = data.sample(frac=1).reset_index(drop=True)

print(data.tail())

       0    1    2    3  4
145  5.6  2.7  4.2  1.3  2
146  6.0  2.9  4.5  1.5  2
147  5.7  4.4  1.5  0.4  0
148  4.8  3.0  1.4  0.3  0
149  6.8  3.0  5.5  2.1  1


In [30]:
data = data[[4,0,1,2,3]]

In [31]:
train_data = data[:120]
val_data = data[120:]

In [32]:
import boto3

bucket_name = 'sagemaker-deploy-test'

train_data.to_csv('data.csv',header=False, index = False)
key = 'data/train/data'

url = 's3://{}/{}'.format(bucket_name,key)

In [33]:
train_data.to_csv('data.csv',header=False, index = False)
key = 'data/train/data'

url = 's3://{}/{}'.format(bucket_name,key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')

val_data.to_csv('data.csv',header=False, index = False)
key = 'data/val/data'

url = 's3://{}/{}'.format(bucket_name,key)
boto3.Session().resource('s3').Bucket(bucket_name).Object(key).upload_file('data.csv')

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


Model

In [37]:
import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role

bucket_name = 'sagemaker-deploy-test'
key = 'model/xgb_model'
s3_output_location = 's3://{}/{}'.format(bucket_name,key)

print(s3_output_location)

xgb_model = sagemaker.estimator.Estimator(
    get_image_uri(boto3.Session().region_name, 'xgboost'),
    get_execution_role(),
    train_instance_count =1,
    train_instance_type='ml.m4.xlarge',
    train_volume_size = 5,
    output_path = s3_output_location,
    sagemaker_session = sagemaker.Session()
)

xgb_model.set_hyperparameters(
    max_depth = 5,
    eta = 0.2,
    gamma = 4,
    min_child_weight=6,
    silent=0,
    objective = 'multi:softmax',
    num_class=3,
    num_round=10
)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


s3://sagemaker-deploy-test/model/xgb_model


### Training

In [38]:
train_data = 's3://{}/{}'.format(bucket_name,'data/train')
val_data = 's3://{}/{}'.format(bucket_name,'data/val')

train_channel = sagemaker.session.s3_input(train_data,content_type='text/csv')
val_channel = sagemaker.session.s3_input(val_data,content_type='text/csv')


data_channels = {'train':train_channel, 'validation':val_channel}
xgb_model.fit(inputs=data_channels)

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker:Creating training-job with name: xgboost-2024-07-01-14-57-51-736


2024-07-01 14:57:51 Starting - Starting the training job...
2024-07-01 14:58:05 Starting - Preparing the instances for training...
2024-07-01 14:58:36 Downloading - Downloading input data...
2024-07-01 14:59:11 Downloading - Downloading the training image......
2024-07-01 15:00:17 Training - Training image download completed. Training in progress.
2024-07-01 15:00:17 Uploading - Uploading generated training model[34mArguments: train[0m
[34m[2024-07-01:15:00:12:INFO] Running standalone xgboost training.[0m
[34m[2024-07-01:15:00:12:INFO] File size need to be processed in the node: 0.0mb. Available memory size in the node: 8450.2mb[0m
[34m[2024-07-01:15:00:12:INFO] Determined delimiter of CSV input is ','[0m
[34m[15:00:12] S3DistributionType set as FullyReplicated[0m
[34m[15:00:12] 120x4 matrix with 480 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,[0m
[34m[2024-07-01:15:00:12:INFO] Determined delimiter of CSV input is ','[0m
[34m[15:00:1

### Deploy

In [39]:
xgb_predictor = xgb_model.deploy(initial_instance_count=1,
                                instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: xgboost-2024-07-01-15-15-04-363
INFO:sagemaker:Creating endpoint-config with name xgboost-2024-07-01-15-15-04-363
INFO:sagemaker:Creating endpoint with name xgboost-2024-07-01-15-15-04-363


------!