In [1]:
import sagemaker

In [2]:
session = sagemaker.Session()
role = sagemaker.get_execution_role()

# Push data to S3 bucket

In [3]:
DATA_FILE = './data/cancer.csv'
bucket_name = 'rsjainaimlmodels'
prefix = 'cancer/data'
s3_output_path = 's3://'+bucket_name

print ('------------>uploading data to s3')
s3_data_path = session.upload_data(path=DATA_FILE, 
                                   bucket=bucket_name, 
                                   key_prefix=prefix)
print ('------------>data uploaded to -', s3_data_path)
print ('------------>model would be uploaded to -', s3_output_path)

------------>uploading data to s3
------------>data uploaded to - s3://rsjainaimlmodels/cancer/data/cancer.csv
------------>model would be uploaded to - s3://rsjainaimlmodels


# Create Sklearn Executor

In [4]:
from sagemaker.sklearn import SKLearn

executor = SKLearn(entry_point='train.py',
                  train_instance_type='ml.c4.xlarge',
                  sagemaker_session = session,
                  role = role,
                  hyperparameters = {'max_depth':15,
                                    'max_features':20},
                  output_path = s3_output_path)

In [5]:
executor.fit(s3_data_path)

2020-05-09 18:58:48 Starting - Starting the training job...
2020-05-09 18:58:50 Starting - Launching requested ML instances......
2020-05-09 18:59:55 Starting - Preparing the instances for training...
2020-05-09 19:00:42 Downloading - Downloading input data...
2020-05-09 19:01:13 Training - Downloading the training image...
2020-05-09 19:01:45 Uploading - Uploading generated training model
2020-05-09 19:01:45 Completed - Training job completed
[34m2020-05-09 19:01:33,238 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-05-09 19:01:33,240 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-05-09 19:01:33,250 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-05-09 19:01:33,519 sagemaker-containers INFO     Module train does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-05-09 19:01:33,519 sagemaker-containers INFO     Generating setup.cf

# Deploy Model

In [6]:
predictor = executor.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
print('\nModel Deployed!')

print (predictor.endpoint)

---------------!
Model Deployed!
sagemaker-scikit-learn-2020-05-09-18-58-47-922


# Predict

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

data = pd.read_csv(DATA_FILE,engine='python')
X = data.iloc[:,1:31]
y = data.iloc[:,31]

train_x, test_x, train_y, test_y = train_test_split(X,y)
pred_y = predictor.predict(test_x.values)
print (f1_score(test_y, pred_y))

0.9891304347826086


# Delete Model Endpoint

In [9]:
session.delete_endpoint(predictor.endpoint)