# Data Monitoring Automation

This notebook documents the code and progress as I work my way towards setting up an automated process to monitor the performance of the fraud model(s), across partners and products. 

In [1]:
# Library Imports

import pandas as pd
import numpy as np
import trellis
import os
from avant_python_utils.email import send_email
from datalaketools.connectors.presto_db import PrestoDB
presto = PrestoDB()
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc,recall_score,precision_score,accuracy_score
###

In [2]:
#trellis.start()
# fraud = trellis.connect('us_fraud_follower')
#parent_dir_path = os.path.dirname(os.path.abspath(__file__)) - REMOVE COMMENT IN PYTHON SCRIPT
parent_dir_path = os.getcwd()
subject = 'Avant Model Monitor Weekly Report (Data Only)'
credentials = {'username': trellis.keys('automate_email')['email'], 'password': trellis.keys('automate_email')['pw']}


In [3]:
#SQL query parameters
loan_window = 'week'

In [4]:
df_raw = presto.execute_df('''
SELECT
  l.id as loan_id
, l.loan_processing_start_time
, date_trunc('{LOAN_WINDOW}', l.loan_processing_start_time) as entered_lp_week
, l.status
, case when l.status in ('current','late','paid_off','charged_off') then 1 else 0 end as issued
, case when c.high_confidence_fraud_indicator=true or cfl.id is not null then 1 else 0 end as high_confidence_fraud_indicator
, cast(fd.score_4 as double) as score_4
, cast(fd.score_5 as double) as score_5
, coalesce(cast(fd.score_5 as double), cast(fd.score_4 as double)) as hard_score
, l.state
, l.payment_method
, ca.product_type
FROM avant.dw.customer_applications ca
LEFT JOIN avant.dw.loans l on l.customer_application_id = ca.id
JOIN avant.dw.customers c
  ON c.id = l.customer_id
LEFT JOIN (
  SELECT
    l.id as loan_id
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/4.1.0"]["score"]') as score_4
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/5.0.0"]["score"]') as score_5
  , fd.id as fraud_decision_id
  , row_number() over (partition by l.id order by fd.created_at desc) as row_num
  FROM avant.dw.loans l
  JOIN avant.avant_basic.fraud_decisions fd
    ON fd.customer_application_id = l.customer_application_id
    AND fd.created_at AT TIME ZONE 'America/Chicago' >= l.loan_processing_start_time
WHERE l.loan_processing_start_time BETWEEN date_add('week', -53, current_timestamp) AND date_trunc('week',current_timestamp) 
) fd 
  ON fd.loan_id = l.id 
  AND fd.row_num=1
LEFT JOIN avant.avant_basic.confirmed_fraud_logs cfl 
  ON cfl.customer_id = c.id
  JOIN avant.dw.loan_performance_by_installment lp 
  ON lp.loan_id = l.id 
  AND lp.installment_number = 1
  AND lp.installment_date <= date_add('day', -64, current_timestamp)
WHERE l.loan_processing_start_time > date '2019-06-30'
'''.format(LOAN_WINDOW = loan_window))


In [6]:
df_raw.groupby('product_type').count()

Unnamed: 0_level_0,loan_id,loan_processing_start_time,entered_lp_week,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method
product_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
installment,108660,108660,108660,108660,108660,108660,62138,70759,70759,108660,108660
refinance,17933,17933,17933,17933,17933,17933,10198,12771,12771,17933,17933


In [7]:
#Remove records with no fraud score
df = df_raw[df_raw.score_5.notnull()]

In [10]:
df

Unnamed: 0,loan_id,loan_processing_start_time,entered_lp_week,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method,product_type
0,3854172,2019-12-27 10:24:22.013,2019-12-23 00:00:00.000,paid_off,1,0,0.085483,0.007445,0.007445,AL,ach,refinance
1,3879117,2020-01-23 16:18:37.224,2020-01-20 00:00:00.000,current,1,0,0.283923,0.090194,0.090194,PA,ach,installment
2,3825105,2019-11-27 17:07:09.245,2019-11-25 00:00:00.000,current,1,0,0.947521,0.005129,0.005129,IN,ach,installment
3,3881800,2020-01-27 13:59:38.330,2020-01-27 00:00:00.000,current,1,0,0.797386,0.118190,0.118190,KS,ach,installment
4,3837798,2019-12-09 11:25:46.680,2019-12-09 00:00:00.000,current,1,0,0.127043,0.008085,0.008085,FL,ach,installment
...,...,...,...,...,...,...,...,...,...,...,...,...
126586,3952876,2020-05-28 16:05:30.086,2020-05-25 00:00:00.000,current,1,0,,0.010510,0.010510,FL,ach,refinance
126587,3881550,2020-01-27 09:53:35.056,2020-01-27 00:00:00.000,current,1,0,0.414563,0.007201,0.007201,SC,ach,installment
126588,3929167,2020-03-26 17:19:20.245,2020-03-23 00:00:00.000,current,1,0,,0.015599,0.015599,AZ,ach,installment
126590,3746627,2019-09-23 19:22:12.804,2019-09-23 00:00:00.000,current,1,0,0.002501,0.003177,0.003177,TN,ach,installment
