In [None]:
# default_exp core

# Inference 

> The objective that we want to achieve is to establish the right playbooks that would help us in
bringing more efficiencies in our ML training and deployment processes, by leveraging various
components of Sagemaker Studio.

## Imports

In [None]:
import sys
import sagemaker
import boto3
import json
import pandas as pd
import numpy as np
import logging
import random
import argparse
import time
import os

from datetime import datetime
from sagemaker import get_execution_role
from sagemaker.spark.processing import PySparkProcessor
from argparse import Namespace, ArgumentParser

# to get the latest sagemaker python sdk
#!pip install -U sagemaker

## Clients

In [None]:
role = get_execution_role()
sm = boto3.Session().client(service_name='sagemaker')
smfs_runtime = boto3.Session().client(service_name='sagemaker-featurestore-runtime')

s3_client = boto3.Session().client('s3')
cf_client = boto3.client('cloudformation')
kinesis_client = boto3.client('kinesis')
lambda_client = boto3.client('lambda')
kda_client = boto3.client('kinesisanalytics')

##### S3_DATA lOCATION (For Testing the Streaming)

In [None]:
s3_bucket = sagemaker.Session().default_bucket()
s3_transformed_data_prefix = 'heidelberg/data'
s3_transformed_data_filename = "transformed.csv"

## Test out the solution, end to end

### Utility functions

1. To show the results on CLoudwatch Urls( https://console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:log-groups/log-group/$252Faws$252Flambda$252FHeidelbergInvokeFraudEndpointLambda3 )

1. Putting the incoming data in stream


In [None]:
%store -r
def get_cloudwatch_logs_url(start_time, end_time):
    log_group_name = '/aws/lambda/' + predict_lambda_name 
    # get the latest log stream for our Lambda that makes fraud predictions
    cw_client = boto3.client('logs')
    last_cw_evt = 0
    while last_cw_evt < int(start_test_time * 1000):
        streams = cw_client.describe_log_streams(logGroupName=log_group_name,
                                                 orderBy='LastEventTime',
                                                 descending=True)['logStreams']
        last_cw_evt = streams[0]['lastIngestionTime'] #'lastEventTimestamp']
        latest_stream = str(streams[0]['logStreamName']).replace('/', '$252F').replace('[$LATEST]', '$255B$2524LATEST$255D')
        if last_cw_evt < int(start_test_time * 1000):
            print('waiting for updated log stream...')
            time.sleep(10)

    # produce a valid URL to get to that log stream
    region = boto3.session.Session().region_name
    log_group_escaped = log_group_name.replace('/', '$252F')
    cw_url = f'https://console.aws.amazon.com/cloudwatch/home?region={region}#logsV2:log-groups/log-group/{log_group_escaped}'
    time_filter = f'$26start$3D{int(start_test_time * 1000) - 10000}$26end$3D{int(end_test_time * 1000) + 40000}'
    full_cw_url = f'{cw_url}/log-events/{latest_stream}$3FfilterPattern$3DPrediction+{time_filter}'
    print('Updated log stream is ready.')
    return full_cw_url

In [None]:
%store -r
def put_to_stream(payload, timestamp):
    ret_status = True
    data = json.dumps(payload)
    print(f'Sending a new payload: ')
    response = kinesis_client.put_record(StreamName = STREAM_NAME ,
                                             Data = data,
                                             PartitionKey = 'shard1')
    
    if (response['ResponseMetadata']['HTTPStatusCode'] != 200):
        print("ERROR: Kinesis put_record failed: \n{}".format(json.dumps(response)))
        ret_status = False
        
    return ret_status

### Send some payloads, and see the results (Testing purpose)

In [None]:
import io

s3 = boto3.resource('s3')
obj = s3.Object(s3_bucket,s3_transformed_data_prefix + '/' + s3_transformed_data_filename )
body = obj.get()['Body'].read()
df=pd.read_csv(io.BytesIO(body))

print("Expected Outputs")
for index,row in df.iterrows():
    print(row['target'])
    if index==5:
        break

del df['target']
payloads = []
for index,row in df.iterrows():
    d = {}
    for i in row.keys():
        d[i] = row[i]
    payloads.append(d)
    if index==5:
        break

Expected Outputs
2.23
2.63
2.29
1.97
1.12
1.26


In [None]:
import time
start_test_time = time.time() 

for payload in payloads:
    put_to_stream(payload, time.time())

end_test_time = time.time() 

Sending a new payload: 
Sending a new payload: 
Sending a new payload: 
Sending a new payload: 
Sending a new payload: 
Sending a new payload: 


### Results of above test
The following cell dynamically creates a link to view the results. It waits for the CloudWatch log stream to have the output events from the transactions we just sent. The URL also hones in on the output from the specific timeframe of the transactions.

In [None]:
from IPython.core.display import display, HTML

full_cw_url = get_cloudwatch_logs_url(start_test_time, end_test_time)
display(HTML(f'<b>Review results in this log stream <a target="blank" href="{full_cw_url}">Lambda fraud detection results</a></b>'))

waiting for updated log stream...
Updated log stream is ready.


### (OPTIONAL) Feed a stream of transactions from API 

In [None]:
import time

# This code need to run for whole time, so we will put this code in a pipeline
def callAPI():
    while True:
        api_data = get_data_from_api()
        api_trasformed_data = get_trasformed_data(api_data)
        status = put_to_stream(api_transformed_data, time.time())
        if (not status):
            print('Error found during write to Kinesis Stream')
        time.sleep(10)
  


In [None]:
#callAPI()