# Data Monitoring Automation

This notebook documents the code and progress as I work my way towards setting up an automated process to monitor the performance of the fraud model(s), across partners and products. 

In [1]:
# Library Imports

import pandas as pd
import numpy as np
import trellis
import os
from avant_python_utils.email import send_email
from datalaketools.connectors.presto_db import PrestoDB
presto = PrestoDB()
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc,recall_score,precision_score,accuracy_score
###

In [2]:
#trellis.start()
# fraud = trellis.connect('us_fraud_follower')
#parent_dir_path = os.path.dirname(os.path.abspath(__file__)) - REMOVE COMMENT IN PYTHON SCRIPT
parent_dir_path = os.getcwd()
subject = 'Avant Model Monitor Weekly Report (Data Only)'
credentials = {'username': trellis.keys('automate_email')['email'], 'password': trellis.keys('automate_email')['pw']}


In [3]:
#SQL query parameters
loan_window = 'week'

Query to obtain data at a loan ID level. The query filters for loans that reached the date of their first installment at least two months before the current date (date on which the query is run)

In [16]:
df_raw = presto.execute_df('''
SELECT
  l.id as loan_id
, l.loan_processing_start_time
, date_trunc('{LOAN_WINDOW}', l.loan_processing_start_time) as entered_lp_week
, l.status
, case when l.status in ('current','late','paid_off','charged_off') then 1 else 0 end as issued
, case when c.high_confidence_fraud_indicator=true or cfl.id is not null then 1 else 0 end as high_confidence_fraud_indicator
, cast(fd.score_4 as double) as score_4
, cast(fd.score_5 as double) as score_5
, coalesce(cast(fd.score_5 as double), cast(fd.score_4 as double)) as hard_score
, l.state
, l.payment_method
, ca.product_type
, vrdt.risk_summary_identity_high
, vrdt.risk_summary_identity_medium
, vrdt.risk_summary_identity_low
FROM avant.dw.customer_applications ca
LEFT JOIN avant.dw.loans l on l.customer_application_id = ca.id
JOIN avant.dw.customers c
  ON c.id = l.customer_id
  
  -- getting fraud scores
LEFT JOIN (
  SELECT
    l.id as loan_id
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/4.1.0"]["score"]') as score_4
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/5.0.0"]["score"]') as score_5
  , fd.id as fraud_decision_id
  , row_number() over (partition by l.id order by fd.created_at desc) as row_num
  FROM avant.dw.loans l
  JOIN avant.avant_basic.fraud_decisions fd
    ON fd.customer_application_id = l.customer_application_id
    AND fd.created_at AT TIME ZONE 'America/Chicago' >= l.loan_processing_start_time
WHERE l.loan_processing_start_time BETWEEN date_add('week', -53, current_timestamp) AND date_trunc('week',current_timestamp) 
) fd 
  ON fd.loan_id = l.id 
  AND fd.row_num=1
  -- getting fraud indicator
LEFT JOIN avant.avant_basic.confirmed_fraud_logs cfl 
  ON cfl.customer_id = c.id
  
  -- filtering for valid loans to evaluate performance on
  JOIN avant.dw.loan_performance_by_installment lp 
  ON lp.loan_id = l.id 
  AND lp.installment_number = 1
  AND lp.installment_date <= date_add('day', -64, current_timestamp)
  
  -- adding identity tier a loan was assigned to
  LEFT JOIN avant.dw_temp_newver.verifications_risks_decisions_test vrdt
  on ca.id = vrdt.customer_application_id and vrdt.row_num_recent = 1
WHERE l.loan_processing_start_time > date '2019-06-30'
'''.format(LOAN_WINDOW = loan_window))


In [17]:
#Remove records with no fraud score
df = df_raw[df_raw.score_5.notnull()]

In [18]:
df

Unnamed: 0,loan_id,loan_processing_start_time,entered_lp_week,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method,product_type,risk_summary_identity_high,risk_summary_identity_medium,risk_summary_identity_low
0,3900111,2020-02-19 12:00:35.580,2020-02-17 00:00:00.000,current,1,0,0.072424,0.003987,0.003987,AL,ach,installment,False,False,True
1,3886252,2020-02-01 13:46:46.965,2020-01-27 00:00:00.000,current,1,0,0.385041,0.012574,0.012574,FL,ach,installment,False,False,True
3,3887925,2020-02-03 14:56:49.582,2020-02-03 00:00:00.000,paid_off,1,0,0.243956,0.004618,0.004618,CA,ach,installment,False,False,True
5,3881791,2020-01-27 13:45:57.572,2020-01-27 00:00:00.000,current,1,0,0.339094,0.005295,0.005295,NJ,ach,installment,False,False,True
6,3771630,2019-10-12 10:45:23.981,2019-10-07 00:00:00.000,current,1,0,0.001624,0.003473,0.003473,NJ,ach,installment,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126581,3816121,2019-11-19 15:01:21.012,2019-11-18 00:00:00.000,current,1,0,0.000764,0.007814,0.007814,VA,ach,installment,False,False,True
126582,3773277,2019-10-17 14:46:57.282,2019-10-14 00:00:00.000,current,1,0,0.008590,0.003719,0.003719,NE,ach,installment,False,False,True
126586,3753133,2019-09-28 16:03:17.724,2019-09-23 00:00:00.000,charged_off,1,0,0.012544,0.013673,0.013673,MS,ach,installment,False,False,True
126587,3862140,2020-01-04 16:46:50.828,2019-12-30 00:00:00.000,current,1,0,0.114821,0.002712,0.002712,AL,paper_check,installment,False,False,True


Want to add info on the following:
- Manual Reviews
- Total $ amount of loan