In [9]:
import numpy as np
import os
import boto3
import tensorflow as tf

In [10]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

In [11]:
bucket = 'demo-saeed'
prefix = 'fraudcredit-keras1_8m_9'
dataset_train_name = 'creditcard1_8m_train.csv'
dataset_test_name='creditcard1_8m_test.csv'

In [12]:
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/train/{}'.format(dataset_train_name))).upload_file('data/{}'.format(dataset_train_name))


In [13]:
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/test/{}'.format(dataset_test_name))).upload_file('data/{}'.format(dataset_test_name))


In [14]:
%%bash

for i in {0..8}; do
    aws s3 cp s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_${i}.csv 
done

copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_0.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_1.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_2.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_3.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_4.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test.csv to s3://demo-saeed/fraudcredit-keras1_8m_9/data/test/creditcard1_8m_test_5.csv
copy: s3://demo-saeed/fraudcredit-keras1_8m_9/

In [None]:
# train_input = sagemaker_session.upload_data('data', key_prefix="{}/{}/{}".format(bucket_name, prefix, 'data') )
# train_input

In [15]:
data_location = 's3://{}/{}/{}'.format(bucket, prefix, 'data/train',dataset_train_name)
s3_input_train = sagemaker.s3_input(s3_data = data_location, content_type='csv')
s3_input_train.config

{'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
   'S3Uri': 's3://demo-saeed/fraudcredit-keras1_8m_9/data/train',
   'S3DataDistributionType': 'FullyReplicated'}},
 'ContentType': 'csv'}

In [16]:
train_input = 's3://{}/{}/data/train/{}'.format(bucket,prefix,dataset_train_name)
train_input

's3://demo-saeed/fraudcredit-keras1_8m_9/data/train/creditcard1_8m_train.csv'

In [17]:
# import pandas as pd
# df = pd.read_csv(train_input, sep=',',  nrows=1)
# df.head()

In [18]:
from sagemaker.tensorflow import TensorFlow

tf_estimator = TensorFlow(entry_point='main.py', 
                          role=role,
                          train_instance_count=1, 
                          train_instance_type='ml.p3.2xlarge',
                          framework_version='1.12', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters={'epochs': 1}
                         )  


In [19]:
tf_estimator.fit({'train': s3_input_train})

2020-03-08 01:37:15 Starting - Starting the training job...
2020-03-08 01:37:17 Starting - Launching requested ML instances......
2020-03-08 01:38:21 Starting - Preparing the instances for training...
2020-03-08 01:39:16 Downloading - Downloading input data.........
2020-03-08 01:40:42 Training - Downloading the training image...
2020-03-08 01:41:03 Training - Training image download completed. Training in progress.[34m2020-03-08 01:41:07,149 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-03-08 01:41:07,594 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "model_dir": "s3://sagemaker-us-east-1-07932919

In [20]:
tf_estimator.model_dir

's3://sagemaker-us-east-1-079329190341/sagemaker-tensorflow-scriptmode-2020-03-08-01-37-15-402/model'

# Prediciton using endpoint high-level sdk

In [21]:
# import time

# tf_endpoint_name = 'keras-tf-fmnist-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
# tf_predictor = tf_estimator.deploy(initial_instance_count=1,
#                          instance_type='local',        # $0.134/hour in eu-west-1
#                          endpoint_name=tf_endpoint_name)     # = 80% discount!

In [22]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.model_selection import train_test_split
# RANDOM_SEED = 42
# LABELS = ["Normal", "Fraud"]
# data = df.drop(['Time'], axis=1)

# data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))
# X_train, X_test = train_test_split(data, test_size=0.2, random_state=RANDOM_SEED)
# X_train = X_train[X_train.Class == 0]
# X_train = X_train.drop(['Class'], axis=1)

# y_test = X_test['Class']
# X_test = X_test.drop(['Class'], axis=1)

# batch_file = 'batch_data.csv'
# X_test.to_csv(batch_file,index=False,header=False)
# #sess.upload_data(batch_file, key_prefix='{}/batch'.format(prefix))
# boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'batch/batch_data.csv')).upload_file('batch_data.csv')

# X_train = X_train.values
# X_test = X_test.values
# print(X_train.shape)

In [23]:
# predictions = tf_predictor.predict(X_test)['predictions']
# mse = np.mean(np.power(X_test - predictions, 2), axis=1)
# error_df = pd.DataFrame({'reconstruction_error': mse,
#                         'true_class': y_test})
# error_df.head(20)

# Prediciton using Tensorflow Serving

In [24]:
# !aws s3 cp s3://sagemaker-us-east-1-079329190341/sagemaker-tensorflow-scriptmode-2020-03-06-16-23-30-671/model.tar.gz model.tar.gz
# !tar -xf model.tar.gz

In [25]:
# sess = tf.compat.v1.Session()
# loaded = tf.saved_model.load(export_dir='SavedModels/1/', tags={'serve'},sess=sess )
# loaded.signature_def

In [26]:
# !saved_model_cli show --dir SavedModels/1/ --tag_set serve --all

In [27]:
# %%bash
# mkdir -p tmp/tfserving
# cd tmp/tfserving
# git clone --depth=1 https://github.com/tensorflow/serving
# cd tmp/tfserving
# docker pull tensorflow/serving:latest

In [28]:
# !docker run --rm -p 8501:8501 \
#     --mount type=bind,source=$(pwd),target=$(pwd) \
#     -e MODEL_BASE_PATH=$(pwd)/SavedModels \
#     -e MODEL_NAME=autoencodermodel -t tensorflow/serving:latest

In [29]:
# %%bash
# cd /tmp/tfserving/
# curl http://localhost:8501/v1/models/autoencodermodel

In [30]:
# !curl -d '{"instances": [[1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0], \
# [1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0,0.0, 1.0,2.0,5.0,2.0]]}' -X POST http://localhost:8501/v1/models/autoencodermodel:predict

# Prediction using Batch Transform

In [31]:
output_path='s3://{}/{}/batch_output/'.format(bucket, prefix)
output_path

's3://demo-saeed/fraudcredit-keras1_8m_9/batch_output/'

In [35]:
input_location

's3://demo-saeed/fraudcredit-keras1_8m_9/data/test'

In [41]:
boto3.Session().resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'data/test2/df1_5m.ext')).upload_file('data/df1_5m.ext')




In [40]:
prefix

'fraudcredit-keras1_8m_9'

In [46]:
!aws s3 ls s3://demo-saeed/fraudcredit-keras1_8m_9/data/test

                           PRE test/
                           PRE test2/


In [51]:
env = {'SAGEMAKER_TFS_ENABLE_BATCHING': 'true', 'SAGEMAKER_TFS_BATCH_TIMEOUT_MICROS': '50000','SAGEMAKER_TFS_MAX_BATCH_SIZE': '16'}

sm_transformer = tf_estimator.transformer(instance_count=2,
                                          instance_type='ml.p3.2xlarge', 
                                          max_concurrent_transforms=2,
                                          max_payload=40,
                                          strategy='MultiRecord', #
                                          output_path='s3://{}/{}/batch_output/'.format(bucket, prefix),
                                          env=env)
# start a transform job
input_location = 's3://{}/{}/data/test'.format(bucket, prefix) # use input data without ID column
sm_transformer.transform(input_location, 
                         data_type='S3Prefix',
                         content_type='text/csv', 
                         #content_type='application/x-recordio-protobuf', 
                         split_type='Line' #RecordIO
                        )
sm_transformer.wait()



.......................[34mINFO:__main__:starting services[0m
[34mINFO:__main__:using default model name: model[0m
[34mINFO:__main__:tensorflow serving model config: [0m
[34mmodel_config_list: {
  config: {
    name: "model",
    base_path: "/opt/ml/model",
    model_platform: "tensorflow"
  }[0m
[34m}

[0m
[34mINFO:__main__:nginx config: [0m
[34mload_module modules/ngx_http_js_module.so;
[0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr info;
[0m
[34mworker_rlimit_nofile 4096;
[0m
[34mevents {
  worker_connections 2048;[0m
[34m}
[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/json;
  access_log /dev/stdout combined;
  js_include tensorflow-serving.js;

  upstream tfs_upstream {
    server localhost:10001;
  }

  upstream gunicorn_upstream {
    server unix:/tmp/gunicorn.sock fail_timeout=1;
  }

  server {
    listen 8080 deferred;
    client_max_body_size 0;
    client_bod

In [None]:
output_path

In [None]:
# import json
# import io
# from urllib.parse import urlparse

# def get_csv_output_from_s3(s3uri, file_name):
#     parsed_url = urlparse(s3uri)

#     bucket_name = parsed_url.netloc

#     prefix = parsed_url.path[1:]

#     s3 = boto3.resource('s3')
#     obj = s3.Object(bucket_name, '{}/{}'.format(prefix, file_name))

#     return obj.get()["Body"].read().decode('utf-8')

In [None]:
# output = get_csv_output_from_s3(output_path[:-1], dataset_test_name+'.out')
# j =json.loads(output)
# n =j['predictions']
# df = pd.DataFrame.from_records(n)
# df.head()