In [19]:
import json
import pandas as pd
import numpy as np
import datetime
from airflow.providers.postgres.hooks.postgres import PostgresHook
from airflow.providers.mysql.hooks.mysql import MySqlHook
import seaborn as sns
warehouse_hook = PostgresHook(postgres_conn_id='rds_afsg_ds_prod_postgresql_dwh', schema='afsg_ds_prod_postgresql_dwh')
mifos_hook = MySqlHook(mysql_conn_id='mifos_db', schema='mifostenant-safaricom')
airflow_hook = MySqlHook(mysql_conn_id='mysql_airflow', schema='bloom_pipeline')
middleware_hook = PostgresHook(postgres_conn_id='asante_mfs_middleware', schema='asante_datawarehouse')
jubilee_hook = MySqlHook(mysql_conn_id='mifos_db', schema='mifostenant-jubilee')
solv_hook = MySqlHook(mysql_conn_id='solv_ke', schema='solvke_staging')

#### Get whitelists

In [20]:
warehouse_whitelist = warehouse_hook.get_pandas_df(sql="select * from bloomlive.scoring_results where model_version = (select max(model_version) from bloomlive.scoring_results)")
current_whitelist = airflow_hook.get_pandas_df(sql="select * from bloom_pipeline.current_whitelist_mock")

[[34m2023-06-08 11:07:11,883[0m] {[34mbase.py:[0m73} INFO[0m - Using connection ID 'rds_afsg_ds_prod_postgresql_dwh' for task execution.[0m


[[34m2023-06-08 11:08:28,674[0m] {[34mbase.py:[0m73} INFO[0m - Using connection ID 'mysql_airflow' for task execution.[0m


#### Get missing data

In [54]:
missing_on_warehouse = current_whitelist[~current_whitelist['Store_Number'].astype(str).isin(warehouse_whitelist['store_number'].tolist())]
print(missing_on_warehouse[missing_on_warehouse['Asante_Credit_Limit_1_Day'] > 0])
print(missing_on_warehouse[missing_on_warehouse['Asante_Credit_Limit_7_Day'] > 0])
print(missing_on_warehouse[missing_on_warehouse['Asante_Credit_Limit_21_Day'] > 0])
missing_on_warehouse

Empty DataFrame
Columns: [Store_Number, Asante_Blacklist_Flag, Asante_Credit_Limit_1_Day, Asante_Credit_Limit_7_Day, Asante_Credit_Limit_21_Day, CreatedOn_Date, ModifiedOn_Date, id, record_created_on_timestamp]
Index: []
Empty DataFrame
Columns: [Store_Number, Asante_Blacklist_Flag, Asante_Credit_Limit_1_Day, Asante_Credit_Limit_7_Day, Asante_Credit_Limit_21_Day, CreatedOn_Date, ModifiedOn_Date, id, record_created_on_timestamp]
Index: []
Empty DataFrame
Columns: [Store_Number, Asante_Blacklist_Flag, Asante_Credit_Limit_1_Day, Asante_Credit_Limit_7_Day, Asante_Credit_Limit_21_Day, CreatedOn_Date, ModifiedOn_Date, id, record_created_on_timestamp]
Index: []


Unnamed: 0,Store_Number,Asante_Blacklist_Flag,Asante_Credit_Limit_1_Day,Asante_Credit_Limit_7_Day,Asante_Credit_Limit_21_Day,CreatedOn_Date,ModifiedOn_Date,id,record_created_on_timestamp
1685,7249107,0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407149169,2023-06-08 07:58:43
1697,7322856,0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407149181,2023-06-08 07:58:43
1703,7363190,0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407149187,2023-06-08 07:58:43
1713,7388600,0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407149197,2023-06-08 07:58:43
1715,7390586,0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407149199,2023-06-08 07:58:43
...,...,...,...,...,...,...,...,...,...
188497,7200214,0,0.0,0.0,0.0,2023-05-06 15:37:51,2023-05-06 15:37:51,407335981,2023-06-08 07:59:19
188506,7303256,0,0.0,0.0,0.0,2023-05-06 15:37:51,2023-05-06 15:37:51,407335990,2023-06-08 07:59:19
188525,7387388,0,0.0,0.0,0.0,2023-05-06 15:37:51,2023-05-06 15:37:51,407336009,2023-06-08 07:59:19
188558,7949001,0,0.0,0.0,0.0,2023-05-06 15:37:51,2023-05-06 15:37:51,407336042,2023-06-08 07:59:19


In [22]:
missing_on_saf = warehouse_whitelist[~warehouse_whitelist['store_number'].astype(str).isin(current_whitelist['Store_Number'].tolist())]
missing_on_saf

Unnamed: 0,surrogate_id,store_number,national_id,final_21_limit,final_7_limit,final_1_limit,idm_recommendation,limit_factor_21,limit_factor_7,limit_factor_1,model_version,blacklist_flag,created_at,record_added_to_warehouse_on_timestamp,limit_reason
12702,8113338,003227,24125861,0.0,0.0,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",1,2023-05-31,2023-05-31 13:42:30:231841,A2
29973,8130621,000030,201300000004442884,0.0,0.0,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",1,2023-05-31,2023-05-31 13:42:30:231841,A2
30813,8131461,003215,29333437,0.0,0.0,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",1,2023-05-31,2023-05-31 13:42:30:231841,A2
41087,8141739,058133,9554143,0.0,40300.0,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,G1
42485,8143138,000085,,0.0,0.0,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,E1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183951,8284664,074600,C82002/074600,0.0,0.0,0.0,,,,,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,B3
184867,8285580,067800,C82002/067800,0.0,0.0,0.0,,,,,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,B3
186596,8287309,070600,C82002/070600,0.0,0.0,0.0,,,,,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,B3
186740,8287453,016000,C82002/016000,0.0,0.0,0.0,,,,,"2023-012[2023-05-18, 2023-05-31]",0,2023-05-31,2023-05-31 13:42:30:231841,B3


#### Merge datasets

In [44]:
merged = current_whitelist.merge(
    warehouse_whitelist,
    left_on='Store_Number',
    right_on='store_number',
    how='outer'
).dropna(subset=['Store_Number', 'store_number'], how='any')

In [45]:
merged.head()

Unnamed: 0,Store_Number,Asante_Blacklist_Flag,Asante_Credit_Limit_1_Day,Asante_Credit_Limit_7_Day,Asante_Credit_Limit_21_Day,CreatedOn_Date,ModifiedOn_Date,id,record_created_on_timestamp,surrogate_id,...,final_1_limit,idm_recommendation,limit_factor_21,limit_factor_7,limit_factor_1,model_version,blacklist_flag,created_at,record_added_to_warehouse_on_timestamp,limit_reason
0,7346057,0.0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407147484.0,2023-06-08 07:58:42,8161226.0,...,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0.0,2023-05-31,2023-05-31 13:42:30:231841,E1
1,6026001,0.0,0.0,0.0,0.0,2023-05-25 11:27:57,2023-05-25 11:27:57,407147485.0,2023-06-08 07:58:42,8146702.0,...,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0.0,2023-05-31,2023-05-31 13:42:30:231841,E1
2,7348921,0.0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407147486.0,2023-06-08 07:58:42,8161297.0,...,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0.0,2023-05-31,2023-05-31 13:42:30:231841,E1
3,7349309,0.0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407147487.0,2023-06-08 07:58:42,8161314.0,...,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0.0,2023-05-31,2023-05-31 13:42:30:231841,E1
4,7351485,0.0,0.0,0.0,0.0,2023-05-13 05:34:57,2023-05-13 05:34:57,407147488.0,2023-06-08 07:58:42,8161371.0,...,0.0,Approve,0.5,0.17,0.17,"2023-012[2023-05-18, 2023-05-31]",0.0,2023-05-31,2023-05-31 13:42:30:231841,E1


In [46]:
merged[merged['blacklist_flag'] != merged['Asante_Blacklist_Flag']][['store_number', 'blacklist_flag', 'Asante_Blacklist_Flag', 'Store_Number', 'store_number']]

Unnamed: 0,store_number,blacklist_flag,Asante_Blacklist_Flag,Store_Number,store_number.1


In [47]:
merged[merged['final_1_limit'] != merged['Asante_Credit_Limit_1_Day']][['store_number', 'final_1_limit', 'Asante_Credit_Limit_1_Day']]

Unnamed: 0,store_number,final_1_limit,Asante_Credit_Limit_1_Day


In [48]:
merged[merged['final_7_limit'] != merged['Asante_Credit_Limit_7_Day']][['store_number', 'final_7_limit', 'Asante_Credit_Limit_7_Day']]

Unnamed: 0,store_number,final_7_limit,Asante_Credit_Limit_7_Day


In [49]:
merged[merged['final_21_limit'] != merged['Asante_Credit_Limit_21_Day']][['store_number', 'final_21_limit', 'Asante_Credit_Limit_21_Day']]

Unnamed: 0,store_number,final_21_limit,Asante_Credit_Limit_21_Day


In [51]:
print(merged[merged['blacklist_flag'] == 0].shape[0] - merged[merged['Asante_Blacklist_Flag'] == 0].shape[0])
print(merged[merged['blacklist_flag'] == 1].shape[0] - merged[merged['Asante_Blacklist_Flag'] == 1].shape[0])

0
0
