In [None]:
#Example notebook to predict next EmailEngagement for a user. This examples assumes you have setup the Salesforce CDP and connected with Salesforce Marketing Cloud 
# Prerequsit 1 : AWS SageMaker Jupyter notebook with conda_mxnet_p38 Kernel
# Prerequsit 2 : Create the connected App in Salesforce by following this article https://developer.salesforce.com/docs/atlas.en-us.c360a_api.meta/c360a_api/c360a_api_python_connector.htm
# Prerequsit 3 : Below example assumes your Salesforce CDP is connected to Marketing Cloud and you have access to EmailEngagement object
# Prerequsit 4 : Upload inference.py to your notebook folder from https://github.com/salesforce-marketingcloud/cdp-calculated-insights/blob/main/inference.py
# Prerequsit 5 : In Salesfroce Genie, you have created folloiwng Calculated Insights with name "email_engagement_insight__cio"  

#SELECT COUNT(ssot__EmailEngagement__dlm.ssot__Id__c) AS engagement_count__c,
#ssot__EmailEngagement__dlm.ssot__IndividualId__c AS customer_id__c, 
#ssot__EmailEngagement__dlm.ssot__EngagementChannelActionId__c AS 
#engagement_type__c 
#FROM ssot__EmailEngagement__dlm 
#GROUP BY customer_id__c, engagement_type__c


In [None]:
!pip install salesforce-cdp-connector

In [None]:
# Get sklearn version and make sure that image version supports it
import sklearn
print(sklearn.__version__)
!pip install sagemaker --upgrade
# https://docs.aws.amazon.com/sagemaker/latest/dg/sklearn.html

In [None]:
from salesforcecdpconnector.connection import SalesforceCDPConnection
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import sagemaker
import joblib
import boto3
from botocore.exceptions import ClientError
import json
import os
#import pickle
#import tarfile
#from sagemaker.estimator import Estimator
import time
from time import gmtime, strftime
import subprocess


In [None]:
#please update the IAM role with your own
role = "arn:aws:iam::240852588578:role/service-role/AmazonSageMaker-ExecutionRole-20221014T121197"
sagemaker_session = sagemaker.Session()
#Bucket for model artifacts 
bucket = sagemaker_session.default_bucket()
print(bucket)

project_name = "genie_email_rf_demo"
end_point_prefix = "genie-email-rf-demo"

secret_name = "genie/jdbc"
region_name = "us-east-1"

# Create a Secrets Manager client
session = boto3.session.Session()
region_name = session.region_name

s3 = session.resource('s3')

sagemaker_client = session.client(
    service_name='sagemaker',
    region_name=region_name
)

runtime_client = session.client(
    service_name ='sagemaker-runtime',
    region_name=region_name
)



#Optional - Incase you want to store the secretes in AWS Secret Manager 
secretsmanager_client = session.client(
    service_name='secretsmanager',
    region_name=region_name
)

try:
    get_secret_value_response = secretsmanager_client.get_secret_value(
        SecretId=secret_name
    )
except ClientError as e:
    # For a list of exceptions thrown, see
    # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    raise e

# Decrypts secret using the associated KMS key.
cred = json.loads(get_secret_value_response['SecretString'])


conn = SalesforceCDPConnection(
        cred['AppUrl'], 
        cred['user'], 
        cred['password'], 
        cred['clientId'], 
        cred['clientSecret']
    )

In [None]:
conn = SalesforceCDPConnection(
       'https://login.salesforce.com/', 
        'pleasechangethis@salesforce.com', 
        'yourpassword',  
        'clientIdhp7If35rb6Q77EoLPos_ESHlSLZebs834IWvvvxS54f7JxFgucuydEA.oK4oSYl4ZvmS',
        'clientSecretEDAC047CFFF82B1C8B0A4D07C15256E12BFB66FF062A98BAAC0E')

In [None]:
curs = conn.cursor()
query = "SELECT engagement_type__c, engagement_count__c FROM email_engagement_insight__cio limit 900000"
curs.execute(query)
df = pd.DataFrame(curs.fetchall());
df.columns = ['engagement_type__c','engagement_count__c']
df.head(3)

In [None]:
X = df.drop(["engagement_type__c"], axis=1)
y = df["engagement_type__c"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)



accuracy = accuracy_score(y_test, y_pred)
print(accuracy)


In [None]:
# Save model artifacts to S3

with open('model.joblib', 'wb') as f:
    joblib.dump(model,f)

#Build tar file with model data + inference code 
bashCommand = "tar -cvpzf model.tar.gz model.joblib inference.py"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

#Upload tar.gz to bucket
model_artifacts = f"s3://{bucket}/{project_name}/model.tar.gz"
response = s3.meta.client.upload_file('model.tar.gz', bucket, project_name+'/model.tar.gz')


In [None]:
# retrieve sklearn image 
image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=region_name,
    version="1.0-1",
    py_version="py3",
    instance_type="ml.m5.xlarge",
)

In [None]:
#Step 1: Model Creation 
model_name = end_point_prefix + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name: " + model_name)
create_model_response = sagemaker_client.create_model(
    ModelName=model_name,
    Containers=[
        {
            "Image": image_uri,
            "Mode": "SingleModel",
            "ModelDataUrl": model_artifacts,
            "Environment": {'SAGEMAKER_SUBMIT_DIRECTORY': model_artifacts,
                           'SAGEMAKER_PROGRAM': 'inference.py'} 
        }
    ],
    ExecutionRoleArn=role,
)
print("Model Arn: " + create_model_response["ModelArn"])


In [None]:
#Step 2: EPC Creation 
sklearn_epc_name = end_point_prefix + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=sklearn_epc_name,
    ProductionVariants=[
        {
            "VariantName": "sklearnvariant",
            "ModelName": model_name,
            "InstanceType": "ml.c5.large",
            "InitialInstanceCount": 1
        },
    ],
)
print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])


In [None]:
#Step 3: SageMaker Inference EndPoint Creation 
endpoint_name = end_point_prefix + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
create_endpoint_response = sagemaker_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=sklearn_epc_name,
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])


#Monitor creation
describe_endpoint_response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)
print(describe_endpoint_response)


In [None]:
#Test SageMaker Inference EndPoint  
#runtime_client = boto3.client('sagemaker-runtime')
content_type = "application/json"
request_body = {"Input": [['2.000000000000000000']]}
data = json.loads(json.dumps(request_body))
payload = json.dumps(data)
endpoint_name = describe_endpoint_response['EndpointName']


response = runtime_client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=payload)
result = json.loads(response['Body'].read().decode())['Output']
print(result)


In [None]:
#Create API Gateway 
#Create API gateway to expose SageMaker inference endpoint using a lambda function. Lambda function can also perform any required preprocessing.
#https://www.youtube.com/watch?v=-iU36P8hizs. To use pandas a lambda layer is needed.

In [None]:
#Next We will use Salesforce CDP to consume inference enpoints