# Fraud Detector - Model Training and Detector Creation

---

## Setup

Create S3 bucket and upload related training data for model training.

## Implementation

Create the resoures, model and detector with boto3 python SDK

The the data set is [project_1_data](./project_1_newaccounts_100k.csv).

#### Environment Setup

In [None]:
!pip install -U pip
!pip install boto3

In [None]:
!pip show boto3

In [None]:
# Setup proper Role and copy Role ARN in below
DATA_ACCESS_ROLE_ARN = ""

### S3 bucket setup

In [None]:
import boto3
from datetime import datetime
import time

In [None]:
session = boto3.Session()
region = session.region_name
account_id = session.client('sts').get_caller_identity().get('Account')
bucket_name = f"{account_id}-fraud-detector-lab-{region}"
s3 = boto3.client('s3')
try:
    if ("us-east-1" == region):
        s3.create_bucket(Bucket=bucket_name)
    else:
        s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={
            'LocationConstraint': region})
except:
    pass


In [None]:
# Upload data file into S3 bucket
s3.upload_file('./project_1_newaccounts_100k.csv', bucket_name, 'project_1_newaccounts_100k.csv')
s3.upload_file('./project_1_newaccounts_5k.csv', bucket_name, 'project_1_newaccounts_5k.csv')

In [None]:
train_data_uri = f"s3://{bucket_name}/project_1_newaccounts_100k.csv"
train_data_uri

In [None]:
evaluation_data_uri = f"s3://{bucket_name}/project_1_newaccounts_5k.csv"
evaluation_data_file = "project_1_newaccounts_5k.csv"

### Create Fraud Detector Resources (Variable, label, event and event type)

In [None]:
# -- fraud detector client --
fraud_detector = boto3.client('frauddetector',)

# -- use this to append to files 
suffix   = datetime.now().strftime("%Y%m%d")

#### Labels Creation
A label classifies an event as fraudulent or legitimate. Once you have created a label, add the label to the event type by calling the PutEventType API.

In [None]:
response = fraud_detector.get_labels()
existing_label_names = [label['name'] for label in response['labels']]

# setup fraudulent and legitimate labels
LABEL_FRAUD = '1'
LABEL_LEGIT = '0'

if LABEL_FRAUD not in existing_label_names:
    fraud_detector.put_label(
        name = LABEL_FRAUD,
        description = 'label for fraud events'
    )

if LABEL_LEGIT not in existing_label_names:
    fraud_detector.put_label(
        name = LABEL_LEGIT,
        description = 'label for legit events'
    )


In [None]:
# all the labels
response = fraud_detector.get_labels()
response

#### Variables creation
Variables represent data elements that you want to use in a fraud prediction, such as data from the event that is being evaluated or risk score outputs from Amazon Fraud Detector models or Amazon SageMaker models.



In [None]:
response = fraud_detector.get_variables()
response

In [None]:
existing_variable_names = [variable['name'] for variable in response['variables']]
existing_variable_names

In [None]:
# Variable names
VARIABLE_IP_ADDRESS = 'ip_address'
VARIABLE_EMAIL_ADDRESS = 'email_address'
VARIABLE_PHONE_NUMBER = 'phone_number'
VARIABLE_USER_AGENT = 'user_agent'
VARIABLE_CUSTOMER_STATE = 'customer_state'
VARIABLE_CUSTOMER_POSTAL = 'customer_postal'
VARIABLE_CUSTOMER_ADDRESS = 'customer_address'
VARIABLE_CUSTOMER_CITY = 'customer_city'

#Create variable ip_address
if VARIABLE_IP_ADDRESS not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_IP_ADDRESS,
        variableType = 'IP_ADDRESS',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )
    
#Create variable email_address
if VARIABLE_EMAIL_ADDRESS not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_EMAIL_ADDRESS,
        variableType = 'EMAIL_ADDRESS',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )
    
#Create variable phone_number
if VARIABLE_PHONE_NUMBER not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_PHONE_NUMBER,
        variableType = 'PHONE_NUMBER',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )    
    
#Create variable user_agent
if VARIABLE_USER_AGENT not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_USER_AGENT,
        variableType = 'USERAGENT',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )
    
#Create variable customer_state
if VARIABLE_CUSTOMER_STATE not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_CUSTOMER_STATE,
        variableType = 'BILLING_STATE',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )    
    
#Create variable customer_postal
if VARIABLE_CUSTOMER_POSTAL not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_CUSTOMER_POSTAL,
        variableType = 'BILLING_ZIP',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )    
    
#Create variable customer_address
if VARIABLE_CUSTOMER_ADDRESS not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_CUSTOMER_ADDRESS,
        variableType = 'SHIPPING_ADDRESS_L1',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )   
    
#Create variable customer_city
if VARIABLE_CUSTOMER_CITY not in existing_variable_names:
    fraud_detector.create_variable(
        name = VARIABLE_CUSTOMER_CITY,
        variableType = 'BILLING_CITY',
        dataType = 'STRING',
        dataSource = 'EVENT'
    )    

#### Entity type

An entity represents who is performing the event. As part of a fraud prediction, you can pass the entity ID to indicate the specific entity who performed the event.

An entity type classifies the entity. Example classifications include customer, merchant, or account.

In [None]:
response = fraud_detector.get_entity_types()
response

In [None]:
existing_entity_type_names = [entity_type['name'] for entity_type in response['entityTypes']]
existing_entity_type_names

In [None]:
ENTITY_TYPE_NAME = "account"

if ENTITY_TYPE_NAME not in existing_entity_type_names:
    fraud_detector.put_entity_type(
        name = ENTITY_TYPE_NAME,
        description = 'Sample account entity type'
    )


#### Event Type

With Amazon Fraud Detector, you generate fraud predictions for events. An event type defines the structure for an individual event sent to Amazon Fraud Detector. Once defined, you can build models and detectors that evaluate the risk for specific event types.

In [None]:
response = fraud_detector.get_event_types()
response

In [None]:
existing_event_type_names = [event_type['name'] for event_type in response['eventTypes']]
existing_event_type_names

In [None]:
# Create Event Type
EVENT_TYPE_NAME = "my_new_account_registration"
if EVENT_TYPE_NAME not in existing_event_type_names:
    fraud_detector.put_event_type (
        name = EVENT_TYPE_NAME,
        description='Sample event type',
        eventVariables = [
            VARIABLE_IP_ADDRESS,
            VARIABLE_EMAIL_ADDRESS,
            VARIABLE_PHONE_NUMBER,
            VARIABLE_USER_AGENT,
            VARIABLE_CUSTOMER_STATE,
            VARIABLE_CUSTOMER_POSTAL,
            VARIABLE_CUSTOMER_ADDRESS,
            VARIABLE_CUSTOMER_CITY
        ],
        labels = [
            LABEL_LEGIT, 
            LABEL_FRAUD
        ],
        entityTypes = [ 
            ENTITY_TYPE_NAME 
        ]
    )

#### Model
A model version is the term Amazon Fraud Detector uses for a trained fraud detection machine learning model. All steps in the model training process are fully automated including data validation, data transformation, feature engineering, algorithm selection, training, and model optimization. Creating a model requires selecting the model type and specifying the model version configuration. The model type specifies the algorithms and transformations used to build the model.

In [None]:
response = fraud_detector.get_models()
response

In [None]:
existing_model_ids = [ model['modelId'] for model in response['models'] ]
existing_model_ids

In [None]:
# Create model (which is a container for model version)
MODEL_ID = "sample_project_x"
if MODEL_ID not in existing_model_ids:
    fraud_detector.create_model (
        modelId = MODEL_ID,
        eventTypeName = EVENT_TYPE_NAME,
        modelType = 'ONLINE_FRAUD_INSIGHTS'
    )

In [None]:
# Create model version, which start the model training; 
# PLEASE DON'T RUN IT MULTIPLE TIMES UNLESS IT'S NECESSARY!
# model version creation (model training) takes approx. 50-60mins with existing sample data - the 100k one.
response = fraud_detector.create_model_version (
    modelId = MODEL_ID,
    modelType = 'ONLINE_FRAUD_INSIGHTS',
    trainingDataSource = 'EXTERNAL_EVENTS',
    trainingDataSchema = {
        'modelVariables' : [
            VARIABLE_IP_ADDRESS,
            VARIABLE_EMAIL_ADDRESS,
            VARIABLE_PHONE_NUMBER,
            VARIABLE_USER_AGENT,
            VARIABLE_CUSTOMER_STATE,
            VARIABLE_CUSTOMER_POSTAL,
            VARIABLE_CUSTOMER_CITY
        ],
        'labelSchema' : {
            'labelMapper' : {
                'FRAUD' : [ LABEL_FRAUD ],
                'LEGIT' : [ LABEL_LEGIT ]
            }
        }
    }, 
    externalEventsDetail = {
        'dataLocation' : train_data_uri,
        'dataAccessRoleArn' : DATA_ACCESS_ROLE_ARN
    }
)
modelVersionNumber = response['modelVersionNumber']

In [None]:
def check_model_version_status(status = 'TRAINING_COMPLETE'):
    print(f"to check model version status till [{status}]")
    while True:
        response = fraud_detector.get_model_version(
            modelId = MODEL_ID,
            modelType = 'ONLINE_FRAUD_INSIGHTS',
            modelVersionNumber = modelVersionNumber
        )
        print(f"Model version {MODEL_ID}:{modelVersionNumber} status check: {response['status']}")
        if status == response['status']:
            break
        else:
            time.sleep(60)    

In [None]:
%%time

# checking model version training progress
check_model_version_status()

In [None]:
# activate/deploy model version
fraud_detector.update_model_version_status (
    modelId = MODEL_ID,
    modelType = 'ONLINE_FRAUD_INSIGHTS',
    modelVersionNumber = modelVersionNumber,
    status = 'ACTIVE'
)

In [None]:
%%time

check_model_version_status('ACTIVE')

#### Detector 
A detector contains the detection logic, such as the models and rules, for a particular event that you want to evaluate for fraud. Each detector can evaluate one event type.

A detector can have multiple versions, with each version having a status of DRAFT, ACTIVE, or INACTIVE. Only one detector version can be in ACTIVE status at a time.

In [None]:
response = fraud_detector.get_detectors()
response

In [None]:
existing_detector_ids = [detector['detectorId'] for detector in response['detectors']]
existing_detector_ids

In [None]:
DETECTOR_ID = "my_new_account_fraud_detector"
if DETECTOR_ID not in existing_detector_ids:
    fraudDetector.put_detector (
        detectorId = DETECTOR_ID,
        eventTypeName = EVENT_TYPE_NAME
    )

##### Create Outcome
An outcome is the result of a fraud prediction. Create an outcome for each possible fraud prediction result. For example, you may want outcomes to represent risk levels (high_risk, medium_risk, and low_risk) or actions (approve, review). Once created, you can add one or more outcomes to a rule. As part of the GetEventPrediction response, Amazon Fraud Detector will return the defined outcomes for any matched rule.

In [None]:
response = fraud_detector.get_outcomes()
response

In [None]:
existing_outcome_names = [ outcome['name'] for outcome in response['outcomes'] ]
existing_outcome_names

In [None]:
OUTCOME_BLOCK = 'block'
OUTCOME_REVIEW = 'review'
OUTCOME_FRICTION = 'friction'
OUTCOME_APPROVE = 'approve'

if OUTCOME_BLOCK not in existing_outcome_names:
    fraud_detector.put_outcome(
        name = OUTCOME_BLOCK,
        description = 'this outcome blocks the event'
    )

if OUTCOME_REVIEW not in existing_outcome_names:
    fraud_detector.put_outcome(
        name = OUTCOME_REVIEW,
        description = 'this outcome sidelines event for review'
    )

if OUTCOME_FRICTION not in existing_outcome_names:
    fraud_detector.put_outcome(
        name = OUTCOME_FRICTION,
        description = 'this outcome frictions the event'
    )

if OUTCOME_APPROVE not in existing_outcome_names:
    fraud_detector.put_outcome(
        name = OUTCOME_APPROVE,
        description = 'this outcome approves the event'
    )

##### Create Rules
A rule is a condition that tells Amazon Fraud Detector how to interpret variable values during a fraud prediction. A rule consists of one or more variables, a logic expression, and one or more outcomes. A detector must have at least one associated rule. Rules in a detector are evaluated as part of a fraud prediction.

In [None]:
response = fraud_detector.get_rules(detectorId = DETECTOR_ID)
response

In [None]:
existing_rule_names = [ rule['name'] for rule in response['ruleDetails'] ]
existing_rule_names

In [None]:
RULE_ID_BLOCK = 'rule_block'
RULE_ID_REVIEW = 'rule_review'
RULE_ID_FRICTION = 'rule_friction'
RULE_ID_APPROVE = 'rule_approve'

# model insightscore expression is base on "{model_name}_insightscore"

if RULE_ID_BLOCK not in existing_rule_names:
    fraud_detector.create_rule(
        ruleId = RULE_ID_BLOCK,
        detectorId = DETECTOR_ID,
        expression = f"${MODEL_ID}_insightscore >= 950",
        language = 'DETECTORPL',
        outcomes = [ OUTCOME_BLOCK ]
    )

if RULE_ID_REVIEW not in existing_rule_names:
    fraud_detector.create_rule(
        ruleId = RULE_ID_REVIEW,
        detectorId = DETECTOR_ID,
        expression = f"${MODEL_ID}_insightscore >= 855 and ${MODEL_ID}_insightscore < 950",
        language = 'DETECTORPL',
        outcomes = [ OUTCOME_REVIEW ]
    )

if RULE_ID_FRICTION not in existing_rule_names:
    fraud_detector.create_rule(
        ruleId = RULE_ID_FRICTION,
        detectorId = DETECTOR_ID,
        expression = f"${MODEL_ID}_insightscore >= 600 and ${MODEL_ID}_insightscore < 855",
        language = 'DETECTORPL',
        outcomes = [ OUTCOME_FRICTION ]
    )
    
if RULE_ID_APPROVE not in existing_rule_names:
    fraud_detector.create_rule(
        ruleId = RULE_ID_APPROVE,
        detectorId = DETECTOR_ID,
        expression = f"${MODEL_ID}_insightscore < 600",
        language = 'DETECTORPL',
        outcomes = [ OUTCOME_APPROVE ]
    )

##### Create Detector Version
A detector version defines the specific models and rules that will be run as part of the GetEventPrediction request. You can add any of the rules defined within a detector to the detector version. You can also add any model trained on the evaluated event type.

Each detector version has a status of DRAFT, ACTIVE, or INACTIVE. Only one detector version can be in ACTIVE status at a time. During the GetEventPrediction request, Amazon Fraud Detector will use the ACTIVE detector if no DetectorVersion is specified.

In [None]:
# use the default rule version
ruleVersion = '1'

# FIRST MATCHED by default 
ruleExecutionMode = 'FIRST_MATCHED'

response = fraud_detector.create_detector_version(
    detectorId = DETECTOR_ID,
    rules = [
        {
            'detectorId' : DETECTOR_ID,
            'ruleId' : RULE_ID_BLOCK,
            'ruleVersion' : ruleVersion
        },
        {
            'detectorId' : DETECTOR_ID,
            'ruleId' : RULE_ID_REVIEW,
            'ruleVersion' : ruleVersion
        },
        {
            'detectorId' : DETECTOR_ID,
            'ruleId' : RULE_ID_FRICTION,
            'ruleVersion' : ruleVersion
        },
        {
            'detectorId' : DETECTOR_ID,
            'ruleId' : RULE_ID_APPROVE,
            'ruleVersion' : ruleVersion
        },
    ],
    modelVersions = [{
        'modelId' : MODEL_ID,
        'modelType': 'ONLINE_FRAUD_INSIGHTS',
        'modelVersionNumber' : modelVersionNumber
    }],
    ruleExecutionMode = ruleExecutionMode
)

detectorVersionId = response['detectorVersionId']

In [None]:
# activate fraud detector version
fraud_detector.update_detector_version_status(
    detectorId = DETECTOR_ID,
    detectorVersionId = detectorVersionId,
    status = 'ACTIVE'
)

#### Store variable for prediction notebook's reference

In [None]:
%store ENTITY_TYPE_NAME EVENT_TYPE_NAME 
%store DETECTOR_ID detectorVersionId 
%store MODEL_ID modelVersionNumber
%store bucket_name evaluation_data_uri evaluation_data_file

In [None]:
for var in [ENTITY_TYPE_NAME, EVENT_TYPE_NAME, DETECTOR_ID, detectorVersionId, MODEL_ID, modelVersionNumber, bucket_name, evaluation_data_uri, evaluation_data_file]:
    print(var)

##### Variables to be stored
account
my_new_account_registration
my_new_account_fraud_detector
1
sample_project_x
1.0
593380422482-fraud-detector-lab-ap-southeast-2
s3://593380422482-fraud-detector-lab-ap-southeast-2/project_1_newaccounts_5k.csv
project_1_newaccounts_5k.csv