In [None]:
#default_exp live.predict

In [None]:
!nbdev_build_lib

Converted 00_jobs.ipynb.
Converted 01-create-sample-data.ipynb.
Converted 02-preprocess.ipynb.
Converted 03-feature-calc.ipynb.
Converted 04-training-data.ipynb.
Converted 05-train-model.ipynb.
Converted 06-submit-training-pipeline.ipynb.
Converted 07-prepare-db.ipynb.
Converted 08-predict.ipynb.
Converted 99-tools.ipynb.
Converted index.ipynb.


In [None]:
#export
from typing import Dict, Optional
from datetime import datetime, timezone, timedelta
import os
import pickle
import aioredis
import asyncio

import pandas as pd
import xgboost as xgb
from dataclasses import dataclass
from hopeit.dataobjects import dataobject
from hopeit.server.serialization import serialize, Serialization, deserialize
from hopeit.server.compression import Compression
from hopeit.app.context import EventContext, PostprocessHook
from hopeit.app.events import Spawn, SHUFFLE
from hopeit.app.api import event_api
from hopeit.app.logger import app_logger

In [None]:
#export
@dataobject
@dataclass
class OrderInfo:
    order_id: str
    customer_id: str
    order_date: datetime
    email: str
    ip_addr: str
    amount: float
    location_lat: float
    location_long: float


In [None]:
#export
__steps__ = ['lookup_features', 'predict']

__api__ = event_api(
    title="Live: Predict Fraud",
    payload=(OrderInfo, "Order Information"),
    responses={
        200: (dict, "features used for prediction contatining `is_fraud` field as result of prediction"),
        404: (str, "customer or email not found (this example only works for known customer_id and email)")
    }
)

logger = app_logger()

model = None
db = None

features = ['order_amount', 
            'num_email_by_customer_id', 'same_email_by_customer_id', 'known_email_by_customer_id', 
            'num_ip_addr_by_customer_id', 'same_ip_addr_by_customer_id', 'known_ip_addr_by_customer_id',
            'num_customer_id_by_email', 'same_customer_id_by_email', 'known_customer_id_by_email',
            'order_amount_mean_by_customer_id',
            'order_amount_std_by_customer_id', 'order_amount_min_by_customer_id', 'order_amount_max_by_customer_id',
            'order_amount_sum_by_customer_id', 
            'order_amount_mean_by_email',
            'order_amount_std_by_email', 'order_amount_min_by_email', 'order_amount_max_by_email',
            'order_amount_sum_by_email']

In [None]:
#export
async def __init_event__(context: EventContext):
    global model, db
    if model is None:
        file_name = os.path.join(context.env['model']['path'], context.env['model']['name'])
        logger.info(context, f"Loading model for prediction from {file_name}...")
        with open(file_name, 'rb') as fb:
            model = pickle.load(fb)
    if db is None:
        address = context.env['db']['url']
        logger.info(context, f"Connecting to database {address}...")
        db = await aioredis.create_redis_pool(address)
        

In [None]:
#export
async def _lookup_db(key: str):
    item = await db.get(key)
    if item is None:
        return None
    return deserialize(item, Serialization.PICKLE4, Compression.LZ4, dict)

In [None]:
#export
async def lookup_features(payload: OrderInfo, context: EventContext) -> Optional[dict]:
    logger.info(context, "Looking up features in database...")
    assert db, "Connection to database missing."
    customer_id_features, email_features = await asyncio.gather(
        _lookup_db(payload.customer_id),
        _lookup_db(payload.email)
    )
    if customer_id_features is None or email_features is None:
        return None
    return {**email_features, **customer_id_features, **payload.to_dict()}

In [None]:
#export
async def predict(data: dict, context: EventContext) -> dict:
    df = pd.DataFrame([data], columns=features)
    x = xgb.DMatrix(df)
    y = model.predict(x)
    data['is_fraud'] = y[0].item()
    return data
    

In [None]:
#export
async def __postprocess__(payload: Optional[dict], context: EventContext, response: PostprocessHook) -> dict:
    if payload is None:
        response.status = 404
        return "customer or email not found (this example only works for known customer_id and email)"
    return payload
        

### Test from notebook

In [None]:
from hopeit.testing.apps import config, execute_event
from fraud_poc.live.predict import OrderInfo
import uuid
 
def new_key(): return str(uuid.uuid4())

app_config = config('config/fraud-service.json')
payload = OrderInfo(
    order_id=new_key(),
    customer_id='d555b585-5511-4a16-9f22-819834110239',
    order_date=datetime.now(tz=timezone.utc),
    email='1f5d34b02ef1975d5a82dcfe2e53fad6182e118c',
    ip_addr='test',
    amount=100.0,
    location_lat=0.0,
    location_long=0.0
) 

result = await execute_event(app_config, 'live.predict', payload)
result

2020-07-08 10:23:19,778 | INFO | fraud-poc 0.0.1-service live.predict leo-legion 27044 | __init_event__ module=fraud_poc.live.predict... | track.operation_id=test_operation_id | track.request_id=test_request_id | track.request_ts=2020-07-08T10:23:19.778396+00:00
2020-07-08 10:23:19,778 | INFO | fraud-poc 0.0.1-service live.predict leo-legion 27044 | Loading model for prediction from ./data/model/xgb/latest-ok.pkl... | track.operation_id=test_operation_id | track.request_id=test_request_id | track.request_ts=2020-07-08T10:23:19.778396+00:00
2020-07-08 10:23:19,784 | INFO | fraud-poc 0.0.1-service live.predict leo-legion 27044 | Connecting to database redis://localhost:6379... | track.operation_id=test_operation_id | track.request_id=test_request_id | track.request_ts=2020-07-08T10:23:19.778396+00:00
2020-07-08 10:23:19,786 | INFO | fraud-poc 0.0.1-service live.predict leo-legion 27044 | Looking up features in database... | track.operation_id=test_operation_id | track.request_id=test_req

{'order_id': '06989410-e724-4e0e-ac4e-dd7803be453c',
 'order_date': '2020-07-08T10:23:19.777759+00:00',
 'customer_id': 'd555b585-5511-4a16-9f22-819834110239',
 'ip_addr': 'test',
 'order_amount': 879.7319538388477,
 'email': '1f5d34b02ef1975d5a82dcfe2e53fad6182e118c',
 'customer_id_by_email': "['d555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239', 'd555b585-5511-4a16-9f22-819834110239']",
 'num_customer_id_by_email': 1,
 'last_customer_id_by_email': 'd555b585-5511-4a16-9f22-819834110239',
 'same_customer_id_by_email': 1,
 'known_customer_id_by_email': 1,
 'order_amount_mean_by_email': 516.7076306505403,
 'order_amount_std_by_email': 278.38456075392605,
 'order_amount_min_by_email': 13