In [1]:
import sys
import json
import logging
import numpy as np
import datetime
import pandas as pd
from airflow.models import Variable
from pandas.tseries.offsets import MonthEnd
from dateutil.relativedelta import relativedelta
from airflow.providers.mysql.hooks.mysql import MySqlHook
from airflow.providers.postgres.hooks.postgres import PostgresHook

# remita_hook = MySqlHook(mysql_conn_id='remita_server', database='remita_staging')
mifos_hook = MySqlHook(mysql_conn_id='mifos_db', schema='mifostenant-pronto')
warehouse_hook = PostgresHook(postgres_conn_id='rds_afsg_ds_prod_postgresql_dwh', schema='afsg_ds_prod_postgresql_dwh')
log_format = "%(asctime)s: %(message)s"
logging.basicConfig(format=log_format, level=logging.WARNING, datefmt="%H:%M:%S")

In [7]:
TEP = warehouse_hook.get_pandas_df(
    sql="""
        select fund_movement_transaction_id, true as is_repayment, false as is_disbursement, transaction_date_time from bloomlive.raw_tep_repayments where transaction_date_time > '2023-06-08'
        union
        select fund_movement_transaction_id, false as is_repayment, true as is_disbursement, application_date_time as transaction_date_time from bloomlive.raw_tep_disbursements where application_date_time > '2023-06-08'
        """
)
MPESA_rep = pd.read_csv('~/data/ORG_4018601_Loan Repayment Account_Completed_20230612160944.csv', skiprows=6)
MPESA_disb = pd.read_csv('~/data/ORG_4018601_Loan Disbursement Account_Completed_20230612160904.csv', skiprows=6)

[[34m2023-06-13 08:28:34,845[0m] {[34mbase.py:[0m73} INFO[0m - Using connection ID 'rds_afsg_ds_prod_postgresql_dwh' for task execution.[0m


In [27]:
MPESA_rep.rename(columns={
    'Receipt No.': 'receipt_number', 'Completion Time': 'completion_time', 'Initiation Time': 'initiation_time',
    'Transaction Status': 'transaction_status', 'Paid In': 'paid_in', 'Balance Confirmed': 'balance_confirmed',
    'Reason Type': 'reason_type', 'Other Party Info': 'other_party_info', 'Linked Transaction ID': 'linked_transaction_id',
    'A/C No.': 'account_number',
}).columns

Unnamed: 0,receipt_number,completion_time,initiation_time,Details,transaction_status,paid_in,Withdrawn,Balance,balance_confirmed,reason_type,other_party_info,linked_transaction_id,account_number
0,RFC6VAC184,12-06-2023 16:07:17,12-06-2023 16:07:17,Loan Soko Repayment,Completed,900.00,,4523729.06,True,Merchant Loan Soko Repayment,7544114 - WEGA BIZNA ENTERPRISE,,
1,RFC2VABUFA,12-06-2023 16:07:15,12-06-2023 16:07:15,Loan Soko Repayment,Completed,12374.40,,4522829.06,True,Merchant Loan Soko Repayment,908842 - DUKA MOJA SHOP B,,
2,RFC6VA07ZI,12-06-2023 16:04:09,12-06-2023 16:04:09,Loan Soko Repayment,Completed,1000.00,,4510454.66,True,Merchant Loan Soko Repayment,417946 - Wakaguku malimaliconnections,,
3,RFC2V9LIUS,12-06-2023 15:59:52,12-06-2023 15:59:52,Loan Soko Repayment,Completed,58.00,,4509454.66,True,Merchant Loan Soko Repayment,7303354 - SUSCET WANJA 5,,
4,RFC7V9HWSZ,12-06-2023 15:58:55,12-06-2023 15:58:55,Loan Soko Repayment,Completed,2213.29,,4509396.66,True,Merchant Loan Soko Repayment,7015065 - NANCY KEMUMA GECHIKO,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,RF97L2YSQV,09-06-2023 00:17:50,09-06-2023 00:17:50,Loan Soko Repayment,Completed,200.00,,6036846.73,True,Merchant Loan Soko Repayment,7403114 - DENNIS BOSUBEN,,
1082,RF90L2SHIO,09-06-2023 00:09:05,09-06-2023 00:09:05,Loan Soko Repayment,Completed,6500.00,,6036646.73,True,Merchant Loan Soko Repayment,7009774 - BEATRICE MUMBI,,
1083,RF99L2S33H,09-06-2023 00:07:53,09-06-2023 00:07:53,Loan Soko Repayment,Completed,100.00,,6030146.73,True,Merchant Loan Soko Repayment,7436670 - BENJAMINE KIMUYU KYUNGU,,
1084,RF91L2PKR1,09-06-2023 00:04:21,09-06-2023 00:04:21,Loan Soko Repayment,Completed,409.36,,6030046.73,True,Merchant Loan Soko Repayment,7415050 - NICODEMUS MUTIA SYENGO,,


In [26]:
MPESA_disb.head()

Unnamed: 0,Receipt No.,Completion Time,Initiation Time,Details,Transaction Status,Paid In,Withdrawn,Balance,Balance Confirmed,Reason Type,Other Party Info,Linked Transaction ID,A/C No.
0,RFC4VAFG0C,12-06-2023 16:08:16,12-06-2023 16:08:16,SP Commission for Loan Soko,Completed,,-3467.5,331060.7,True,21 days Merchant Loan Soko Disbursement,,,
1,RFC4VAFG0C,12-06-2023 16:08:16,12-06-2023 16:08:16,Merchant Loan Soko Disbursement to 276946 - RO...,Completed,,-95000.0,334528.2,True,21 days Merchant Loan Soko Disbursement,276946 - ROYAL SENATOR PALACE…..,,
2,RFC8VAB2YK,12-06-2023 16:07:08,12-06-2023 16:07:08,SP Commission for Loan Soko,Completed,,-866.25,429528.2,True,7 days Merchant Loan Soko Disbursement,,,
3,RFC8VAB2YK,12-06-2023 16:07:08,12-06-2023 16:07:08,7 days Merchant Loan Soko Disbursement to 7284...,Completed,,-69300.0,430394.45,True,7 days Merchant Loan Soko Disbursement,7284044 - Eunice Kathetha Kinga,,
4,RFC1VA98UD,12-06-2023 16:06:41,12-06-2023 16:06:41,SP Commission for Loan Soko,Completed,,-35.0,499694.45,True,7 days Merchant Loan Soko Disbursement,,,


#### missing REP on TEP

In [8]:
TEP_rep = TEP[TEP['is_repayment']]
missing_rep_on_TEP = MPESA_rep[~MPESA_rep['Receipt No.'].isin(TEP_rep['fund_movement_transaction_id'].tolist())]
missing_rep_on_TEP['Initiation Time2'] = pd.to_datetime(missing_rep_on_TEP['Initiation Time'], format='%d-%m-%Y %H:%M:%S').dt.date
missing_rep_on_TEP.drop_duplicates(subset=['Receipt No.']).groupby('Initiation Time2').size().rename('count_of_missing')

Initiation Time2
2023-06-09     6
2023-06-10     6
2023-06-11    61
2023-06-12    51
Name: count_of_missing, dtype: int64

#### missing DISB on TEP

In [9]:
TEP_disb = TEP[may_june_TEP['is_disbursement']]
missing_disb_on_TEP = MPESA_disb[~MPESA_disb['Receipt No.'].isin(TEP_disb['fund_movement_transaction_id'].tolist())]
missing_disb_on_TEP['Initiation Time2'] = pd.to_datetime(missing_disb_on_TEP['Initiation Time'], format='%d-%m-%Y %H:%M:%S').dt.date
missing_disb_on_TEP.drop_duplicates(subset=['Receipt No.']).groupby('Initiation Time2').size().rename('count_of_missing')

Initiation Time2
2023-06-09    11
2023-06-10     9
2023-06-11    41
2023-06-12    36
Name: count_of_missing, dtype: int64

In [21]:
missing_rep_on_TEP[missing_rep_on_TEP['Initiation Time2'] == datetime.datetime.strptime('2023-06-12', '%Y-%m-%d').date()][['Receipt No.', 'Completion Time', 'Initiation Time', 'Transaction Status']].drop_duplicates(subset=['Receipt No.']).to_csv('~/data/missing_REP_on_TEP_20230612.csv', index=False)

In [22]:
missing_disb_on_TEP[missing_disb_on_TEP['Initiation Time2'] == datetime.datetime.strptime('2023-06-12', '%Y-%m-%d').date()][['Receipt No.', 'Completion Time', 'Initiation Time', 'Transaction Status']].drop_duplicates(subset=['Receipt No.']).to_csv('~/data/missing_DISB_on_TEP_20230612.csv', index=False)

#### missing REP on Dumps

In [None]:
may_june_Dump_rep = may_june_DataDumps[may_june_DataDumps['is_repayment']]
missing_rep_on_DUMP = may_june_MPESA_rep[
    (~may_june_MPESA_rep['Receipt No.'].isin(may_june_Dump_rep['id_trxn_linkd'].tolist()))
]
missing_rep_on_DUMP['Initiation Time2'] = pd.to_datetime(missing_rep_on_DUMP['Initiation Time'], format='%d-%m-%Y %H:%M:%S').dt.date
missing_rep_on_DUMP.groupby('Initiation Time2').size()

#### missing DISB on dumps

In [None]:
may_june_Dump_disb = may_june_DataDumps[may_june_DataDumps['is_disbursement']]
missing_disb_on_DUMP = may_june_MPESA_disb[
    (~may_june_MPESA_disb['Receipt No.'].isin(may_june_Dump_disb['id_trxn_linkd'].tolist()))
]
missing_disb_on_DUMP['Initiation Time2'] = pd.to_datetime(missing_disb_on_DUMP['Initiation Time'], format='%d-%m-%Y %H:%M:%S').dt.date
missing_disb_on_DUMP.groupby('Initiation Time2').size()