# Anomaly Detection Algorithm Resource for Marketplace

## Import Libraries

In [1]:
import base64
import boto3
import docker
import json
import pandas as pd
import requests
import sagemaker
from sagemaker import get_execution_role
import socket
import time
from urllib.parse import urlparse
from joblib import dump, load
import matplotlib.pyplot as plt
import numpy as np


session = sagemaker.Session()
region = session.boto_region_name
account_id = boto3.client("sts").get_caller_identity().get("Account")
role = get_execution_role()

sagemaker = boto3.client("sagemaker")
s3_client = session.boto_session.client("s3")
ecr = boto3.client("ecr")
sm_runtime = boto3.client("sagemaker-runtime")


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## Build and push Docker Image to ECR

In [2]:
cd docker

/home/sagemaker-user/docker


In [3]:
# !pip install sagemaker-studio-image-build

In [4]:
!sm-docker build . --file Dockerfile

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
Created ECR repository sagemaker-studio
................[Container] 2024/08/05 18:57:45.276134 Running on CodeBuild On-demand

[Container] 2024/08/05 18:57:45.276296 Waiting for agent ping
[Container] 2024/08/05 18:57:48.488378 Waiting for DOWNLOAD_SOURCE
[Container] 2024/08/05 18:57:48.732879 Phase is DOWNLOAD_SOURCE
[Container] 2024/08/05 18:57:48.733859 CODEBUILD_SRC_DIR=/codebuild/output/src2639887840/src
[Container] 2024/08/05 18:57:48.734370 YAML location is /codebuild/output/src2639887840/src/buildspec.yml
[Container] 2024/08/05 18:57:48.736355 Setting HTTP client timeout to higher timeout for S3 source
[Container] 2024/08/05 18:57:48.736755 Processing environment variables
[Container] 2024/08/05 18:57:48.778262 No runtime version selected in buildspec.
[Container] 2024/

## Create Algorithm Resource

In [5]:
cd ..

/home/sagemaker-user


In [41]:
sagemaker_client=boto3.client('sagemaker')


response = sagemaker_client.create_algorithm(
    AlgorithmName="algo-anomaly-detection",
    AlgorithmDescription="Anomaly detection algorithm for training and inference",
    TrainingSpecification={
        "TrainingImage": "361054136331.dkr.ecr.us-east-1.amazonaws.com/sagemaker-studio:latest",
        "SupportedTrainingInstanceTypes":[
            "ml.m5.large",
            "ml.m5.xlarge",
            "ml.m5.2xlarge",
            "ml.m5.4xlarge",
            "ml.m5.12xlarge",
            "ml.m5.24xlarge",
            "ml.m4.2xlarge",
            "ml.m4.4xlarge",
            "ml.m4.10xlarge",
            "ml.m4.16xlarge",
        ],
        "SupportsDistributedTraining": False,
        "MetricDefinitions": [],
        "TrainingChannels": [
            {
                "Name": "train",
                "Description": "Training data",
                "SupportedContentTypes": ["application/json"],
                "SupportedInputModes": ["File"]
            }
        ]
    },
    InferenceSpecification={
        "Containers": [
            {
                "Image": "361054136331.dkr.ecr.us-east-1.amazonaws.com/sagemaker-studio:latest"
            }
        ],
        "SupportedContentTypes": ["application/json"],
        "SupportedResponseMIMETypes": ["application/json"],
        "SupportedRealtimeInferenceInstanceTypes": [
            "ml.m5.large",
            "ml.c5.large",
            "ml.c5.xlarge",
            "ml.c5.2xlarge",
            "ml.m5.xlarge",
            "ml.m5.2xlarge",
            "ml.m5.4xlarge",
            "ml.m5.12xlarge",
            "ml.m5.24xlarge",
            "ml.m4.2xlarge",
            "ml.m4.10xlarge",
            "ml.m4.4xlarge",
            "ml.m4.16xlarge",
            "ml.c5.4xlarge",
            "ml.c5.9xlarge",
            "ml.c5.18xlarge",
            "ml.c4.8xlarge",
            "ml.c4.4xlarge",
            "ml.c4.2xlarge"
        ],
        "SupportedTransformInstanceTypes": [
            "ml.m5.large",
            "ml.m4.4xlarge",
            "ml.m5.xlarge",
            "ml.m5.2xlarge",
            "ml.c4.2xlarge",
            "ml.c4.4xlarge",
            "ml.c4.8xlarge",
            "ml.m5.4xlarge",
            "ml.m5.12xlarge",
            "ml.m5.24xlarge",
            "ml.m4.2xlarge",
            "ml.m4.4xlarge",
            "ml.m4.10xlarge",
            "ml.m4.16xlarge"      
        ]
    },
    ValidationSpecification={ 
        "ValidationRole": "arn:aws:iam::361054136331:role/service-role/AmazonSageMaker-ExecutionRole-20240715T131631",
        "ValidationProfiles": [
            {
                "ProfileName": "custom-training-profile",
                "TrainingJobDefinition": {
                    "TrainingInputMode": "File",
                    "InputDataConfig": [
                        {
                            "ChannelName": "train",
                            "DataSource": {
                                "S3DataSource": {
                                    "S3DataType": "S3Prefix",
                                    "S3Uri": "s3://marketplace-anomaly-detection/inputs/NONANOAMLY_FEATURE_train.json",
                                    "S3DataDistributionType": "FullyReplicated"
                                }
                            },
                            "ContentType": "application/json",
                            "CompressionType": "None"
                        }
                    ],
                    "OutputDataConfig": {
                        "S3OutputPath": "s3://marketplace-anomaly-detection/models/"
                    },
                    "ResourceConfig": {
                        "InstanceType": supported_realtime_training_instance_types[0],
                        "InstanceCount": 1,
                        "VolumeSizeInGB": 50
                    },
                    "StoppingCondition": {
                        "MaxRuntimeInSeconds": 1800
                    }
                },
                "TransformJobDefinition": {
                    "MaxConcurrentTransforms": 4,
                    "MaxPayloadInMB": 6,
                    "BatchStrategy": "MultiRecord",
                    "TransformInput": {
                        "DataSource": {
                            "S3DataSource": {
                                "S3Uri": "s3://marketplace-anomaly-detection/val_data/",
                                "S3DataType": "S3Prefix"
                            }
                        },
                        "ContentType": "application/json"
                    },
                    "TransformOutput": {
                        "S3OutputPath": "s3://marketplace-anomaly-detection/output/",
                        "Accept": "application/json"
                    },
                    "TransformResources": {
                        "InstanceType": supported_realtime_transform_instance_types[0],
                        "InstanceCount": 1
                    },
                    "Environment": {
                      "SAGEMAKER_PROGRAM": "serve.py"
                    }
                }
            }
        ]
    },
    CertifyForMarketplace=True,
)


## Invoke Endpoint

In [15]:

runtime_client = boto3.client('sagemaker-runtime')
s3 = boto3.client('s3')

endpoint_name = "a-d-endpoint"
s3_uri = "s3://marketplace-anomaly-detection/inputs/HIDDENANOAMLY_FEATURE.json"


bucket, key = s3_uri[len("s3://"):].split('/', 1)

obj = s3.get_object(Bucket=bucket, Key=key)
data = obj['Body'].read().decode('utf-8')


input_data = json.loads(data)
input_data_json = json.dumps(input_data)


response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType='application/json',
    Body=input_data_json
)
result = json.loads(response['Body'].read().decode())



## Save results to S3

In [16]:
def save_results_to_s3(results, bucket_name, base_key):
    s3 = boto3.client('s3')

    for key, value in results.items():
        json_data = json.dumps(value)
        s3_key = f"{base_key}/{key}.json"
        try:
            s3.put_object(Bucket=bucket_name, Key=s3_key, Body=json_data, ContentType='application/json')
            print(f"Successfully uploaded {s3_key} to S3.")
        except Exception as e:
            print(f"Error uploading {s3_key} to S3: {e}")


bucket_name = "marketplace-anomaly-detection"
base_key = "output"
save_results_to_s3(result, bucket_name, base_key)

Successfully uploaded output/Robust_Covar.json to S3.
Successfully uploaded output/OC_SVM.json to S3.
Successfully uploaded output/OC_SVM_SGD.json to S3.
Successfully uploaded output/Isolation_Forest.json to S3.
Successfully uploaded output/Ensemble.json to S3.
