# Data Monitoring Automation

This notebook documents the code and progress as I work my way towards setting up an automated process to monitor the performance of the fraud model(s), across partners and products. 

In [1]:
# Library Imports

import pandas as pd
import numpy as np
import trellis
import os
from avant_python_utils.email import send_email
from datalaketools.connectors.presto_db import PrestoDB
presto = PrestoDB()
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc,recall_score,precision_score,accuracy_score
###

In [2]:
#trellis.start()
# fraud = trellis.connect('us_fraud_follower')
#parent_dir_path = os.path.dirname(os.path.abspath(__file__)) - REMOVE COMMENT IN PYTHON SCRIPT
parent_dir_path = os.getcwd()
subject = 'Avant Model Monitor Weekly Report (Data Only)'
credentials = {'username': trellis.keys('automate_email')['email'], 'password': trellis.keys('automate_email')['pw']}


In [3]:
#SQL query parameters
loan_window = 'week'

In [18]:
df_raw = presto.execute_df('''
SELECT
  l.id as loan_id
, l.loan_processing_start_time
, date_trunc('{LOAN_WINDOW}', l.loan_processing_start_time) as entered_lp_week
, l.status
, case when l.status in ('current','late','paid_off','charged_off') then 1 else 0 end as issued
, case when c.high_confidence_fraud_indicator=true or cfl.id is not null then 1 else 0 end as high_confidence_fraud_indicator
, cast(fd.score_4 as double) as score_4
, cast(fd.score_5 as double) as score_5
, coalesce(cast(fd.score_5 as double), cast(fd.score_4 as double)) as hard_score
, l.state
, l.payment_method
, ca.product_type
FROM avant.dw.customer_applications ca
LEFT JOIN avant.dw.loans l on l.customer_application_id = ca.id
JOIN avant.dw.customers c
  ON c.id = l.customer_id
LEFT JOIN (
  SELECT
    l.id as loan_id
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/4.1.0"]["score"]') as score_4
  , json_extract_scalar(fd.model_scores, '$["fraud/en-US/5.0.0"]["score"]') as score_5
  , fd.id as fraud_decision_id
  , row_number() over (partition by l.id order by fd.created_at desc) as row_num
  FROM avant.dw.loans l
  JOIN avant.avant_basic.fraud_decisions fd
    ON fd.customer_application_id = l.customer_application_id
    AND fd.created_at AT TIME ZONE 'America/Chicago' >= l.loan_processing_start_time
WHERE l.loan_processing_start_time BETWEEN date_add('week', -53, current_timestamp) AND date_trunc('week',current_timestamp) 
) fd 
  ON fd.loan_id = l.id 
  AND fd.row_num=1
LEFT JOIN avant.avant_basic.confirmed_fraud_logs cfl 
  ON cfl.customer_id = c.id
  JOIN avant.dw.loan_performance_by_installment lp 
  ON lp.loan_id = l.id 
  AND lp.installment_number = 1
  AND lp.installment_date <= date_add('day', -64, current_timestamp)
WHERE l.loan_processing_start_time BETWEEN date_add('week', -53, current_timestamp) AND date_trunc('week',current_timestamp)  
'''.format(LOAN_WINDOW = loan_window))


In [19]:
df_raw

Unnamed: 0,loan_id,loan_processing_start_time,entered_lp_week,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method,product_type
0,3767850,2019-10-09 15:14:33.439,2019-10-07 00:00:00.000,current,1,0,0.001115,0.004296,0.004296,RI,ach,refinance
1,3826097,2019-11-29 07:47:07.405,2019-11-25 00:00:00.000,current,1,0,0.001469,0.004623,0.004623,IN,ach,refinance
2,3847989,2019-12-18 19:22:45.416,2019-12-16 00:00:00.000,late,1,0,0.375087,0.013874,0.013874,TX,ach,installment
3,3844643,2019-12-15 13:59:07.252,2019-12-09 00:00:00.000,paid_off,1,0,0.039189,0.003345,0.003345,PA,ach,installment
4,3954660,2020-06-02 17:54:40.767,2020-06-01 00:00:00.000,current,1,0,,0.008276,0.008276,TN,ach,installment
...,...,...,...,...,...,...,...,...,...,...,...,...
84799,3881203,2020-01-26 17:23:22.610,2020-01-20 00:00:00.000,current,1,0,0.223164,0.005698,0.005698,MI,ach,installment
84800,3756446,2019-10-10 04:03:09.732,2019-10-07 00:00:00.000,current,1,0,0.014203,0.004328,0.004328,WY,ach,installment
84801,3753412,2019-10-02 15:21:45.588,2019-09-30 00:00:00.000,current,1,0,0.058567,0.006587,0.006587,CA,ach,installment
84802,3927463,2020-03-24 18:58:11.947,2020-03-23 00:00:00.000,current,1,0,,0.004856,0.004856,GA,ach,installment


In [11]:
df_raw['loan_id'].nunique()

167172

In [15]:
df_raw.groupby('loan_id').filter(lambda x: len(x) > 1)

Unnamed: 0,loan_id,loan_processing_start_time,entered_lp,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method,product_type
96243,3937681,2020-04-17 07:51:15.885,2020-04-13 00:00:00.000,rejected,0,1,,1.172209,1.172209,MN,ach,installment
96244,3937681,2020-04-17 07:51:15.885,2020-04-13 00:00:00.000,rejected,0,1,,1.172209,1.172209,MN,ach,installment
146740,3943558,2020-05-04 08:37:19.736,2020-05-04 00:00:00.000,rejected,0,1,,0.27134,0.27134,MI,ach,installment
146741,3943558,2020-05-04 08:37:19.736,2020-05-04 00:00:00.000,rejected,0,1,,0.27134,0.27134,MI,ach,installment


In [11]:
df_perf = pd.merge(df_perf_raw, df_raw, how = 'inner', on = 'loan_id')

#Remove records with no fraud score
df = df_raw[df_raw.score_5.notnull()]

In [10]:
df_perf

Unnamed: 0,loan_id,first_30,first_60,first_120,lp3_balance_30,lp6_balance_30,third_30,third_60,sixth_30,sixth_60,...,entered_lp,status,issued,high_confidence_fraud_indicator,score_4,score_5,hard_score,state,payment_method,product_type
0,3827954,0,0,0,0.00,0.00,0.0,0.0,0.0,0.0,...,2019-11-25 00:00:00.000,late,1,0,0.835633,0.010077,0.010077,IL,paper_check,installment
1,3759229,0,0,0,0.00,0.00,0.0,0.0,0.0,0.0,...,2019-09-30 00:00:00.000,current,1,0,0.006809,0.035864,0.035864,FL,ach,installment
2,3890272,0,0,0,0.00,,0.0,0.0,,,...,2020-02-03 00:00:00.000,current,1,0,0.081351,0.006434,0.006434,OH,ach,installment
3,3934824,0,0,0,0.00,,0.0,0.0,,,...,2020-04-06 00:00:00.000,current,1,0,,0.007644,0.007644,KS,ach,installment
4,3867881,0,0,0,0.00,,0.0,0.0,,,...,2020-01-06 00:00:00.000,current,1,0,0.366846,0.005101,0.005101,MN,ach,installment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94884,3798625,0,0,0,0.00,0.00,0.0,0.0,0.0,0.0,...,2019-10-28 00:00:00.000,current,1,0,0.021911,0.004563,0.004563,AZ,paper_check,installment
94885,3803320,0,0,0,0.00,0.00,0.0,0.0,0.0,0.0,...,2019-11-04 00:00:00.000,paid_off,1,0,0.026916,0.030377,0.030377,CT,ach,installment
94886,3891874,0,0,0,0.00,,0.0,0.0,,,...,2020-02-03 00:00:00.000,current,1,0,0.486099,0.026409,0.026409,OH,ach,installment
94887,3805067,0,0,0,0.00,0.00,0.0,0.0,0.0,0.0,...,2019-11-04 00:00:00.000,current,1,0,0.023162,0.016406,0.016406,CA,ach,installment
