In [126]:
# import libraries test 1
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np                                
import pandas as pd                               
import matplotlib.pyplot as plt                   
from IPython.display import Image                 
from IPython.display import display               
from time import gmtime, strftime                 
from sagemaker.predictor import csv_serializer   
import io
import time
import json
import sagemaker.amazon.common as smac
%matplotlib inline
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
my_region = boto3.session.Session().region_name # set the region of the instance


print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")

Success - the MySageMakerInstance is in the us-east-1 region. You will use the 811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest container for your SageMaker endpoint.


In [127]:
# S3 prefix
bucket_name = 'demo-saeed'
prefix = 'fraudcredit-pipeline-boto3'

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

In [128]:
!tar -czvf sklearn_fd_featurizer.tar.gz sklearn_fd_featurizer.py

src_path = sagemaker_session.upload_data(
    path='{}'.format('sklearn_fd_featurizer.tar.gz'), 
    bucket=bucket_name,
    key_prefix='{}/{}'.format(prefix, 'src_path'))
print(src_path)

sklearn_fd_featurizer.py
s3://demo-saeed/fraudcredit-pipeline-boto3/src_path/sklearn_fd_featurizer.tar.gz


In [129]:
import datetime
time  = str(datetime.datetime.today()).replace(' ', '-').replace(':', '-').rsplit('.')[0]
training_job_name = 'fd-preprocess-model-job-{}'.format(time)
sm = boto3.client('sagemaker')
resp = sm.create_training_job(
        TrainingJobName = training_job_name, 
        AlgorithmSpecification={
            'TrainingInputMode': 'File',
            'TrainingImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3',
        }, 
        RoleArn=role,
        InputDataConfig=[
                            {
                                'ChannelName': 'train',
                                'DataSource': {
                                    'S3DataSource': {
                                        'S3DataType': 'S3Prefix',
                                        'S3Uri': 's3://{}/{}/raw'.format(bucket_name, prefix),
                                        'S3DataDistributionType': 'FullyReplicated',
                                    }
                                },
                            },
                        ], 
        OutputDataConfig={
                            'S3OutputPath': 's3://{}/{}/preprocessed-model'.format(bucket_name, prefix)
                        },
        ResourceConfig={
                        'InstanceType': 'ml.m4.xlarge',
                        'InstanceCount': 1,
                        'VolumeSizeInGB': 30,
                    }, 
        StoppingCondition={
                            'MaxRuntimeInSeconds': 600
                        },
        HyperParameters={
            'sagemaker_program' : "sklearn_fd_featurizer.py",
            'sagemaker_region': "us-east-1",
            'sagemaker_job_name': training_job_name,
            'sagemaker_submit_directory': src_path
        },
        Tags=[]

)

In [130]:
training_job_name

'fd-preprocess-model-job-2019-05-29-17-51-58'

In [131]:
# s3_model_dir = sagemaker.s3_input(s3_data='s3://{}/{}/preprocessed-output/'.format(bucket_name, prefix))
# s3_model_dir.config

In [132]:
sm = boto3.client('sagemaker')
container = '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3'
myModelName = 'fd-preprocessing-model-{}'.format(time)
try:
    sm.create_model(
        ModelName= myModelName,
        PrimaryContainer={
            'Image': container,
            'ModelDataUrl': 's3://{}/{}/preprocessed-model/{}/output/model.tar.gz'.format(bucket_name, prefix,training_job_name),
                'Environment': {
                'SAGEMAKER_PROGRAM': 'sklearn_fd_featurizer.py',
                'SAGEMAKER_REGION':'us-east-1',
                'SAGEMAKER_SUBMIT_DIRECTORY': 's3://{}/{}/src_path/sklearn_fd_featurizer.tar.gz'.format(bucket_name, prefix)
    
            },
        },
        ExecutionRoleArn=role
    )
except Exception as e:
    print(e)
    print('Unable to create model.')
    raise(e)

In [133]:
myModelName

'fd-preprocessing-model-2019-05-29-17-51-58'

In [134]:
sm = boto3.client('sagemaker')
response = sm.create_transform_job(
    TransformJobName='fd-TransformJobName2-{}'.format(time),
    ModelName = myModelName,
    MaxConcurrentTransforms=1,
    MaxPayloadInMB=6,
    BatchStrategy='MultiRecord',

    TransformInput={
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3Uri': 's3://{}/{}/raw'.format(bucket_name, prefix)
            }
        },
        'ContentType': 'text/csv',
        'CompressionType': 'None',
        'SplitType': 'Line'
    },
    TransformOutput={
        'S3OutputPath': 's3://{}/{}/transformer_output/'.format(bucket_name, prefix),
        'Accept': 'text/csv',
        'AssembleWith': 'Line'
    },
    TransformResources={
        'InstanceType': 'ml.m5.4xlarge',
        'InstanceCount': 1
    },
    Tags=[]
)

In [137]:
data_location = 's3://{}/{}/transformer_output/{}'.format(bucket_name, prefix,'creditcard_noheader.csv.out')
data_location

's3://demo-saeed/fraudcredit-pipeline-boto3/transformer_output/creditcard_noheader.csv.out'

In [138]:
data = pd.read_csv(data_location, header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,0.0,-1.996583,-0.694242,-0.044075,1.672773,0.973366,-0.245117,0.347068,0.193679,0.082637,...,0.326118,-0.024923,0.382854,-0.176911,0.110507,0.246585,-0.39217,0.330892,-0.063781,0.244964
1,0.0,-1.996583,0.608496,0.161176,0.109797,0.316523,0.043483,-0.06182,-0.0637,0.071253,...,-0.089611,-0.307377,-0.880077,0.162201,-0.561131,0.320694,0.261069,-0.022256,0.044608,-0.342475
2,0.0,-1.996562,-0.6935,-0.811578,1.169468,0.268231,-0.364572,1.351454,0.639776,0.207373,...,0.680975,0.337632,1.063358,1.45632,-1.138092,-0.628537,-0.288447,-0.137137,-0.181021,1.160686
3,0.0,-1.996562,-0.493325,-0.112169,1.182516,-0.609727,-0.007469,0.93615,0.192071,0.316018,...,-0.269855,-0.147443,0.007267,-0.304777,-1.941027,1.241904,-0.460217,0.155396,0.186189,0.140534
4,0.0,-1.996541,-0.59133,0.531541,1.021412,0.284655,-0.295015,0.071999,0.479302,-0.22651,...,0.529939,-0.012839,1.100011,-0.220123,0.23325,-0.395202,1.041611,0.54362,0.651816,-0.073403


In [139]:
X = data.iloc[:, 1:]
y = data.loc[:, 0]

In [140]:
X.shape

(284807, 30)

In [141]:
from sklearn.model_selection import train_test_split

# Whole dataset
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 0)

X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size = 0.2 ,random_state = 0)

print("Number transactions train dataset: ", len(X_train))
print("Number transactions Validation dataset: ", len(X_val))
print("Number transactions test dataset: ", len(X_test))
print("Total number of transactions: ", len(X_train)+len(X_val)+len(X_test))




Number transactions train dataset:  182276
Number transactions Validation dataset:  45569
Number transactions test dataset:  56962
Total number of transactions:  284807


### Upload training set

In [147]:
import sagemaker

In [148]:
f = io.BytesIO()
smac.write_numpy_to_dense_tensor(f, X_train.values.astype('float32'), y_train.values.reshape(-1).astype('float32'))
f.seek(0)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train','linear_train.data')).upload_fileobj(f)
s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix))
s3_input_train.config

{'DataSource': {'S3DataSource': {'S3DataDistributionType': 'FullyReplicated',
   'S3DataType': 'S3Prefix',
   'S3Uri': 's3://demo-saeed/fraudcredit-pipeline-boto3/train'}}}

### Upload validation set

In [149]:
f = io.BytesIO()
smac.write_numpy_to_dense_tensor(f, X_val.values.astype('float32'), y_val.values.reshape(-1).astype('float32'))
f.seek(0)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'validation','linear_val.data')).upload_fileobj(f)
s3_input_validation = sagemaker.s3_input(s3_data='s3://{}/{}/validation'.format(bucket_name, prefix))
s3_input_validation.config


{'DataSource': {'S3DataSource': {'S3DataDistributionType': 'FullyReplicated',
   'S3DataType': 'S3Prefix',
   'S3Uri': 's3://demo-saeed/fraudcredit-pipeline-boto3/validation'}}}

### Upload test set


In [150]:
test_file = 'linear_test.data'

f = io.BytesIO()
smac.write_numpy_to_dense_tensor(f, X_test.values.astype('float32'), y_test.values.reshape(-1).astype('float32'))
f.seek(0)

boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test',test_file)).upload_fileobj(f)
s3_input_test = sagemaker.s3_input(s3_data='s3://{}/{}/test'.format(bucket_name, prefix))
s3_input_test.config

{'DataSource': {'S3DataSource': {'S3DataDistributionType': 'FullyReplicated',
   'S3DataType': 'S3Prefix',
   'S3Uri': 's3://demo-saeed/fraudcredit-pipeline-boto3/test'}}}

# training

In [151]:
from sagemaker.amazon.amazon_estimator import get_image_uri
container = get_image_uri(boto3.Session().region_name, 'linear-learner')
container

'382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1'

In [152]:
output_location = 's3://{}/{}/output'.format(bucket_name, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))

training artifacts will be uploaded to: s3://demo-saeed/fraudcredit-pipeline-boto3/output


In [153]:
X_train.shape

(182276, 30)

In [154]:
import boto3
import sagemaker

sess = sagemaker.Session()

linear = sagemaker.estimator.Estimator(container,
                                       role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.m4.2xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess)
linear.set_hyperparameters(feature_dim=30,
                           predictor_type='binary_classifier',
                           epochs = 1,
                           mini_batch_size=200)

linear.fit({'train': s3_input_train,  'validation': s3_input_validation, 'test': s3_input_test})

# train_max_run = 3600,

2019-05-29 18:11:43 Starting - Starting the training job...
2019-05-29 18:11:45 Starting - Launching requested ML instances......
2019-05-29 18:12:55 Starting - Preparing the instances for training...
2019-05-29 18:13:39 Downloading - Downloading input data...
2019-05-29 18:13:58 Training - Downloading the training image..
[31mDocker entrypoint called with argument(s): train[0m
[31m[05/29/2019 18:14:20 INFO 140035592775488] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_lr': u'auto', u'target_recall

In [155]:
linear._current_job_name

'linear-learner-2019-05-29-18-11-43-043'

In [159]:
linear.model_data

's3://demo-saeed/fraudcredit-pipeline-boto3/output/linear-learner-2019-05-29-18-11-43-043/output/model.tar.gz'

In [161]:
sm = boto3.client('sagemaker')
try:
    sm.create_model(
        ModelName='fd-linear-model-{}'.format(time),
        PrimaryContainer={
            'Image': container,
            'ModelDataUrl': linear.model_data
        },
        ExecutionRoleArn=role
    )
except Exception as e:
    print(e)
    print('Unable to create model.')
    raise(e)

In [162]:
container_preprocess = '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3'


In [163]:
container

'382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:1'

In [164]:
myModelName

'fd-preprocessing-model-2019-05-29-17-51-58'

In [168]:
sm.describe_model(ModelName=myModelName)

{'ModelName': 'fd-preprocessing-model-2019-05-29-17-51-58',
 'PrimaryContainer': {'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.20.0-cpu-py3',
  'ModelDataUrl': 's3://demo-saeed/fraudcredit-pipeline-boto3/preprocessed-model/fd-preprocess-model-job-2019-05-29-17-51-58/output/model.tar.gz',
  'Environment': {'SAGEMAKER_PROGRAM': 'sklearn_fd_featurizer.py',
   'SAGEMAKER_REGION': 'us-east-1',
   'SAGEMAKER_SUBMIT_DIRECTORY': 's3://demo-saeed/fraudcredit-pipeline-boto3/src_path/sklearn_fd_featurizer.tar.gz'}},
 'ExecutionRoleArn': 'arn:aws:iam::079329190341:role/service-role/AmazonSageMaker-ExecutionRole-20190404T141667',
 'CreationTime': datetime.datetime(2019, 5, 29, 17, 57, 14, 965000, tzinfo=tzlocal()),
 'ModelArn': 'arn:aws:sagemaker:us-east-1:079329190341:model/fd-preprocessing-model-2019-05-29-17-51-58',
 'EnableNetworkIsolation': False,
 'ResponseMetadata': {'RequestId': '98ba1b27-9868-4df4-934b-806e7d1f6bad',
  'HTTPStatusCode': 200,
  'HTTPHeader

In [173]:
sm.describe_model(ModelName=myModelName)['PrimaryContainer']['Environment']['SAGEMAKER_SUBMIT_DIRECTORY']

's3://demo-saeed/fraudcredit-pipeline-boto3/src_path/sklearn_fd_featurizer.tar.gz'

In [174]:
sm = boto3.client('sagemaker')
pipline_model_name = 'fd-pipline-model-{}'.format(time)
response = sm.create_model(
    ModelName= pipline_model_name,
    Containers=[
         {
            'Image': container_preprocess,
            'ModelDataUrl': sm.describe_model(ModelName=myModelName)['PrimaryContainer']['ModelDataUrl'],
             'Environment': {
                'SAGEMAKER_PROGRAM': 'sklearn_fd_featurizer.py',
                'SAGEMAKER_REGION':'us-east-1',
                'SAGEMAKER_SUBMIT_DIRECTORY': sm.describe_model(ModelName=myModelName)['PrimaryContainer']['Environment']['SAGEMAKER_SUBMIT_DIRECTORY']

            },
        },
        {
            'Image': container,
            'ModelDataUrl': linear.model_data
        }
    ],
    ExecutionRoleArn=role
)

In [175]:
response

{'ModelArn': 'arn:aws:sagemaker:us-east-1:079329190341:model/fd-pipline-model-2019-05-29-17-51-58',
 'ResponseMetadata': {'RequestId': 'daa28d96-d795-4408-88a8-ff6c02890d20',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'daa28d96-d795-4408-88a8-ff6c02890d20',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '98',
   'date': 'Wed, 29 May 2019 18:33:59 GMT'},
  'RetryAttempts': 0}}

In [178]:
pipline_model_name

'fd-pipline-model-2019-05-29-17-51-58'

In [180]:
myEndpointConfigName='fd-endpoint-conf-pipline-{}'.format(time)
try:
    sm.create_endpoint_config(
            EndpointConfigName=myEndpointConfigName,
            ProductionVariants=[
                {
                    'VariantName': 'prod',
                    'ModelName': pipline_model_name,
                    'InitialInstanceCount': 1,
                    'InstanceType': 'ml.c4.xlarge'
                }
            ]
        )
except Exception as e:
        print(e)
        print('Unable to create endpoint configuration.')
        raise(e)

In [181]:
myEndpointConfigName

'fd-endpoint-conf-pipline-2019-05-29-17-51-58'

In [185]:
myEndpointName = 'fd-scikit-est-model-pipe-inference'
try:
        sm.create_endpoint(
            EndpointName=myEndpointName,
            EndpointConfigName=myEndpointConfigName
        )
except Exception as e:
        print(e)
        print('Unable to create endpoint.')
        raise(e)

In [186]:
input_Data = ",".join( map( str, X_test.iloc[0] ) )
input_Data 

'0.6529459981462263,-0.16507624070578195,0.6403752242707034,-0.03188199643678793,-0.4288571589285194,0.9127522858844566,-0.06887553039697115,0.9369570220144032,-0.1041022496739883,-0.1589611706518778,-1.5102207708622541,-1.096160189043005,0.20280963151820516,1.1514082506299608,-1.8802117384099803,-0.2700470249973237,-0.06955236253966818,0.9967859173885858,0.4527151313181526,1.0408127499936592,0.2418001005150753,-0.2819494345602899,-0.5978918301090319,-0.4189437568262021,-0.07702619337285886,0.4057578551212853,0.017206059542163483,0.2687955551390033,0.4881781880588871,-0.1933059452093052'

In [189]:
sess = sagemaker.Session()
from sagemaker.predictor import json_serializer, csv_serializer, json_deserializer, RealTimePredictor
from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON
payload = input_Data
actual_rings = 10
predictor = RealTimePredictor(
    endpoint=myEndpointName,
    sagemaker_session=sess,
    serializer=csv_serializer,
    content_type=CONTENT_TYPE_CSV,
    accept=CONTENT_TYPE_JSON)

print(predictor.predict(payload))


b'{"predictions": [{"score": 0.00483148917555809, "predicted_label": 0.0}]}'


In [201]:
linear.output_path

's3://demo-saeed/fraudcredit-local/output'

In [203]:
#linear.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
linear_predictor = linear.deploy(initial_instance_count=1, instance_type='ml.t2.medium', endpoint_name='aws-linear-learner', update_endpoint=True)




In [9]:
xx =X_test.loc[83053]

NameError: name 'X_test' is not defined

In [8]:
X_test

NameError: name 'X_test' is not defined

In [10]:
X_test.loc[83053].values

NameError: name 'X_test' is not defined

In [231]:
from sagemaker.predictor import csv_serializer, json_deserializer

linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer
#linear_predictor.predict(X_test.iloc[0])

linear_predictor.predict(X_test.loc[83053])

{'predictions': [{'score': 0.9563339948654175, 'predicted_label': 1.0}]}

'aws-linear-learner'

In [209]:
input_Data = ",".join( map( str, X_test.iloc[0] ) )
input_Data

import io
import boto3
import json
import csv

# grab environment variables
ENDPOINT_NAME = 'aws-linear-learner'
runtime= boto3.client('runtime.sagemaker')

input_Data = ",".join( map( str, X_test.iloc[0] ) )
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                   ContentType='text/csv',
                                   Body=input_Data)
print(response)
result = json.loads(response['Body'].read().decode())
print(result)


{'ResponseMetadata': {'RequestId': 'c4f995d6-db76-4473-bbb3-02254f7a4491', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c4f995d6-db76-4473-bbb3-02254f7a4491', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Wed, 22 May 2019 13:47:03 GMT', 'content-type': 'application/json', 'content-length': '73'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7ff5d6227908>}
{'predictions': [{'score': 0.11748139560222626, 'predicted_label': 0.0}]}
