In [1]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

# S3 prefix
bucket = sagemaker_session.default_bucket()
prefix = "fraud-detection-isolation-forest"

In [2]:
! mkdir data
! aws s3 cp s3://flagright-fraud-detection-data/flagright-test-transactions.json ./data

mkdir: cannot create directory ‘data’: File exists
download: s3://flagright-fraud-detection-data/flagright-test-transactions.json to data/flagright-test-transactions.json


In [3]:
WORK_DIRECTORY = "data"

train_input = sagemaker_session.upload_data(
    path="{}/{}".format(WORK_DIRECTORY, "flagright-test-transactions.json"),
    bucket=bucket,
    key_prefix="{}/{}".format(prefix, "train"),
)

In [4]:
! aws s3 cp s3://flagright-fraud-detection-data/preprocess.py ./data

download: s3://flagright-fraud-detection-data/preprocess.py to data/preprocess.py


In [5]:
from sagemaker.sklearn.estimator import SKLearn

script_path = "./data/preprocess.py"

FRAMEWORK_VERSION = "1.0-1"

sklearn_preprocessor = SKLearn(
    entry_point=script_path,
    role=role,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.c4.xlarge",
    sagemaker_session=sagemaker_session,
)

In [6]:
sklearn_preprocessor.fit({"train": train_input})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-04-09-13-08-51-962


2023-04-09 13:08:52 Starting - Starting the training job...
2023-04-09 13:09:08 Starting - Preparing the instances for training......
2023-04-09 13:10:18 Downloading - Downloading input data
2023-04-09 13:10:18 Training - Downloading the training image......
2023-04-09 13:11:14 Uploading - Uploading generated training model[34m2023-04-09 13:11:06,126 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-04-09 13:11:06,129 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:11:06,138 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-04-09 13:11:06,323 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:11:06,334 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:11:06,348 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gp

In [7]:
# Define a SKLearn Transformer from the trained SKLearn Estimator
transformer = sklearn_preprocessor.transformer(
    instance_count=1, instance_type="ml.m5.xlarge", assemble_with="Line", accept="application/json"
)

INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2023-04-09-13-12-05-232


In [8]:
# Preprocess training input
transformer.transform(train_input, content_type="application/json")
print("Waiting for transform job: " + transformer.latest_transform_job.job_name)
transformer.wait()
preprocessed_train = transformer.output_path

INFO:sagemaker:Creating transform job with name: sagemaker-scikit-learn-2023-04-09-13-12-05-758


.........................[34m2023-04-09 13:16:07,636 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:16:07,638 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:16:07,639 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
     

In [9]:
! aws s3 cp s3://flagright-fraud-detection-data/train.py ./

download: s3://flagright-fraud-detection-data/train.py to ./train.py


In [10]:
from sagemaker.sklearn.estimator import SKLearn

train_script_path = "train.py"

sklearn = SKLearn(
    entry_point=train_script_path,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.c4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
)

In [11]:
if_train_data = sagemaker.inputs.TrainingInput(
    preprocessed_train,
    distribution="FullyReplicated",
    content_type="application/json",
    s3_data_type="S3Prefix",
)

In [12]:
sklearn.fit({"train": if_train_data})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-04-09-13-16-51-275


2023-04-09 13:16:51 Starting - Starting the training job...
2023-04-09 13:17:05 Starting - Preparing the instances for training......
2023-04-09 13:18:05 Downloading - Downloading input data...
2023-04-09 13:18:30 Training - Downloading the training image...
2023-04-09 13:19:21 Uploading - Uploading generated training model[34m2023-04-09 13:19:12,682 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-04-09 13:19:12,685 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:19:12,695 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-04-09 13:19:12,897 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:19:12,908 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-09 13:19:12,923 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gp

In [13]:
from sagemaker.model import Model
from sagemaker.pipeline import PipelineModel
import boto3
from time import gmtime, strftime

timestamp_prefix = strftime("%Y-%m-%d-%H-%M-%S", gmtime())

scikit_learn_inferencee_model = sklearn_preprocessor.create_model()
sk_learn_if_model = sklearn.create_model()

model_name = "inference-pipeline-" + timestamp_prefix
endpoint_name = "inference-pipeline-ep-" + timestamp_prefix
sm_model = PipelineModel(
    name=model_name, role=role, models=[scikit_learn_inferencee_model, sk_learn_if_model]
)

sm_model.deploy(initial_instance_count=1, instance_type="ml.c4.xlarge", endpoint_name=endpoint_name)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker:Creating model with name: inference-pipeline-2023-04-09-13-20-04
INFO:sagemaker:Creating endpoint-config with name inference-pipeline-ep-2023-04-09-13-20-04
INFO:sagemaker:Creating endpoint with name inference-pipeline-ep-2023-04-09-13-20-04


------!

In [14]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer

payload = {
  "destinationAmountDetails": {
    "country": "IN",
    "transactionCurrency": "INR",
    "transactionAmount": 10132.8
  },
  "transactionState": "CREATED",
  "destinationPaymentDetails": {
    "method": "GENERIC_BANK_ACCOUNT"
  },
  "originPaymentDetails": {
    "method": "GENERIC_BANK_ACCOUNT"
  },
  "originAmountDetails": {
    "country": "IN",
    "transactionCurrency": "INR",
    "transactionAmount": 10132.8
  },
  "timestamp": {
    "$numberLong": "1662358419786"
  },
  "transactionId": "bd70fcaebc254c23b07b29fd994ba5f2",
  "originUserId": "29529892-22d3-4a74-b6f2-fbe1d5ee8b6f"
}
predictor = Predictor(
    endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, serializer=JSONSerializer()
)

print(predictor.predict(payload))

b'{"predictions": [1], "scores": [-0.29669666920822557]}'
