#### Import Libraries

In [1]:
import pandas as pd
import urllib
import sys
import json
import datetime
import os
from os import listdir
from os.path import isfile, join
import numpy as np
from dateutil.relativedelta import relativedelta

pd.options.display.max_columns = 200
pd.options.display.max_rows = 200

## Extract CIR files (only applicable for ADF)
#### Input: List of Loan IDs and their XML Links

In [1]:
# Directory where you want to store the CIRs
import pandas as pd
import os

# File which contains two columns, loan_ref_no and xml_data_file (XML Link)
user_info = pd.read_csv('all_bureau_adf_jan19_L6m.csv')

user_info = user_info.sort_values('created', ascending = False)

user_info = user_info.drop_duplicates(subset = 'msisdn')

DIR = "C:/Users/Seynse/Desktop/Airtel/CIRs/all_bureau_adf_jan19_L6M/"

if not os.path.exists(DIR):
    os.makedirs(DIR)

def getXMLdata(loan_id, xml_url):
    import requests
    response = requests.get(xml_url)
    with open(DIR + loan_id + '.txt', 'wb') as file:
        file.write(response.content)

user_info.apply(lambda x: (getXMLdata(str(x[('msisdn')]).replace('.0',''), x[('cir_file_url')])), axis=1)

In [2]:
mypath = 'C:/Users/Seynse/Desktop/Airtel/CIRs/all_bureau_adf_jan19_L6M/'

onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

loan_names = onlyfiles
loan_ids = pd.DataFrame({'loan_ref_no':loan_names})
loan_ids['loan_ref_no'] = loan_ids.apply(lambda x: str(x['loan_ref_no']).replace('.txt',''), axis=1)
loan_ids

#### Extract and/or Convert parsed response to JSON for data extraction

In [3]:
import pandas as pd
import urllib
import sys
import json
import datetime
import os
from os import listdir
from os.path import isfile, join
import numpy as np
from dateutil.relativedelta import relativedelta

user = os.getenv('username')

running_for = input('ADF/LOS: ') # Options: (LOS/ADF)

if running_for == 'ADF':
    # Location of the CIR text files
    mypath = 'C:/Users/'+user+'/Desktop/LOS Data/Monday Report/XML_extract/new_adf1/'

    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    loan_names = onlyfiles
    loan_ids = pd.DataFrame({'loan_ref_no':loan_names})

    i=0
    while i < len(onlyfiles):
        onlyfiles[i] = mypath+onlyfiles[i]
        i+=1

    file_names = pd.DataFrame({'path':onlyfiles})

    combine = pd.merge(loan_ids, file_names, left_index=True, right_index=True)

    # Date Reported

    def read_cir(path,loan_ref_no):
        file = open(path, 'r')
        return file.read() 

    def read_response(trial,loan_ref_no):
        try:
            abc = str(trial)
            length = abc.find('{')
            cut = abc[length:-1]
            return cut
        except:
            return 0

    combine['parsed_response'] = combine.apply(lambda x: (read_cir(x[('path')],x[('loan_ref_no')])),axis=1)
    combine['final_response'] = combine.apply(lambda x: (read_response(x[('parsed_response')],x[('loan_ref_no')])),axis=1)

elif running_for == 'LOS':
    combine = pd.read_csv('C:/Users/'+user+'/Seynse/parsed_cibil_24jul.csv')
    combine = combine.drop_duplicates('loan_ref_no')
    combine = combine.rename(columns={"parsed_response": "final_response"})
    
else:
    print("Sorry, invalid input; Cannot Proceed Further!")
    sys.exit()

print(len(combine))
combine.head()

## CIR Data Extraction

In [4]:
# Date Reported
def date_process(trial,loan_ref_no):
    global date_processed
    try:
        if running_for == 'ADF':
            trial = trial + "}"
            cut = str(trial)
        elif running_for == 'LOS':
            cut = trial
        j = json.loads(cut)
        date = j['TUEF']['date_processed']
    except:
        date = ''
    list_date = [loan_ref_no, date]
    df = pd.DataFrame(np.array(list_date).reshape(1,2), columns = ['Loan ID','Date Processed'])
    if len(date_processed) == 0:
        date_processed = df
    else:
        date_processed = date_processed.append(df)
    return 0

# Total Loans Ever
def total_loans(trial,loan_ref_no):
    global all_loans
    try:
        if running_for == 'ADF':
            trial = trial + "}"
            cut = str(trial)
        elif running_for == 'LOS':
            cut = trial
        j = json.loads(cut)
        total_loans_ever = len((j['TL']))
        k = 0
        while k < total_loans_ever:
            loan_type = j['TL'][k]['account_type']['display']
            loan_status = j['TL'][k]['live_or_close']['display']
            secured_or_unsecured = j['TL'][k]['secured_or_unsecured']
            credit_limit = j['TL'][k]['credit_limit']
            sanctioned_limit = j['TL'][k]['sanctioned_amount']
            date_opened = j['TL'][k]['date_opened']
            amount_overdue = j['TL'][k]['amount_overdue']
            settlement_amount = j['TL'][k]['settlement_amount']
            written_off_amount_total = j['TL'][k]['written_off_amount_total']
            suit_filed = j['TL'][k]['suit_filed']['display']
            local_loan = [loan_ref_no, k, loan_type, loan_status, secured_or_unsecured, credit_limit, sanctioned_limit, date_opened, amount_overdue, settlement_amount, written_off_amount_total, suit_filed]
            df = pd.DataFrame(np.array(local_loan).reshape(1,12), columns = ['Loan ID','Count','Type','Loan Status','Secured or Unsecured','Credit Limit','Sanctioned Limit','Date Opened',' Amount Overdue', 'Settlement Amount','Write-off Amount','Suit Filed'])
            if len(all_loans) == 0:
                all_loans = df
            else:
                all_loans = all_loans.append(df)
            k+=1
        return 0
    except Exception as e: 
        print(e)
        return 0


# Total Inquiries Ever
def total_enq(trial,loan_ref_no):
    global all_enq
    try:
        if running_for == 'ADF':
            trial = trial + "}"
            cut = str(trial)
        elif running_for == 'LOS':
            cut = trial
        j = json.loads(cut)
        total_inq_ever = len((j['IQ']))
        k = 0
        while k < total_inq_ever:
            loan_type = j['IQ'][k]['enquiry_purpose']['display']
            loan_amount = j['IQ'][k]['enquiry_amount']
            date_of_enquiry = j['IQ'][k]['date_of_enquiry']
            local_loan = [loan_ref_no, loan_type, loan_amount, date_of_enquiry]
            df = pd.DataFrame(np.array(local_loan).reshape(1,4), columns = ['Loan ID','Type','Loan Amount','Date of Enquiry'])
            if len(all_enq) == 0:
                all_enq = df
            else:
                all_enq = all_enq.append(df)
            k+=1
        return 0
    except:
        return 0

# DPD Status for all loans
def total_dpd(trial,loan_ref_no):
    global i
    print('DPD ',i)
    i+=1
    global all_dpd
    try:
        if running_for == 'ADF':
            trial = trial + "}"
            cut = str(trial)
        elif running_for == 'LOS':
            cut = trial
        j = json.loads(cut)
        total_loans_ever = len((j['TL']))
        k = 0
        while k < total_loans_ever:
            loan_type = j['TL'][k]['account_type']['display']
            total_dpd_values = len(j['TL'][k]['dpd_values'])
            date_opened = j['TL'][k]['date_opened']
            x = 0
            while x < total_dpd_values:
                dpd = j['TL'][k]['dpd_values'][x]['dpd']
                month = j['TL'][k]['dpd_values'][x]['month']
                local_loan = [loan_ref_no, k, date_opened, loan_type, total_dpd_values, x, dpd, month]
                df = pd.DataFrame(np.array(local_loan).reshape(1,8), columns = ['Loan ID','Count','Date Opened','Type','Total DPD Values','Counter','DPD','Month'])
                if len(all_dpd) == 0:
                    all_dpd = df
                else:
                    all_dpd = all_dpd.append(df)
                x+=1
            k+=1
        return 0
    except:
        return 0

i=0
date_processed = pd.DataFrame()
combine['extract'] = combine.apply(lambda x: (date_process(x[('final_response')],x[('loan_ref_no')])),axis=1)

i=0
all_enq = pd.DataFrame()
combine['extract'] = combine.apply(lambda x: (total_enq(x[('final_response')],x[('loan_ref_no')])),axis=1)

i=0
all_loans = pd.DataFrame()
combine['extract'] = combine.apply(lambda x: (total_loans(x[('final_response')],x[('loan_ref_no')])),axis=1)

i=0
all_dpd = pd.DataFrame()
combine['extract'] = combine.apply(lambda x: (total_dpd(x[('final_response')],x[('loan_ref_no')])),axis=1)

def date_convert(inp_date):
    try:
        new_date = datetime.datetime.strptime(inp_date,'%d-%m-%Y').date()
    except:
        try:
            new_date = datetime.datetime.strptime(inp_date,'%d-%m-%y').date()
        except:
            new_date = inp_date
    return new_date

date_processed['Date Processed'] = date_processed.apply(lambda x: date_convert(x['Date Processed']), axis=1)
all_enq['Date of Enquiry'] = all_enq.apply(lambda x: date_convert(x['Date of Enquiry']), axis=1)
all_loans['Date Opened'] = all_loans.apply(lambda x: date_convert(x['Date Opened']), axis=1)
all_dpd['Date Opened'] = all_dpd.apply(lambda x: date_convert(x['Date Opened']), axis=1)

all_dpd['Month'] = '01-'+all_dpd['Month']
all_dpd['Month'] = all_dpd.apply(lambda x: date_convert(x['Month']), axis=1)
all_dpd['Month'] = all_dpd['Month'] + relativedelta(day=1, months=+1, days=-1)
all_dpd['End of Date'] = all_dpd['Date Opened'] + relativedelta(day=1, months=+1, days=-1)

def flag_create(diff, x):
    try:
        x = int(x)
        if diff > x:
            return 0
        else:
            return 1
    except:
        if x == 'XXX':
            return 0
        elif x == 'STD':
            return 0
        elif x in ['SUB','DBT','LSS']:
            return 1
            

all_dpd['diff'] = all_dpd['Month'] - all_dpd['End of Date']
all_dpd['12_month'] = all_dpd.apply(lambda x: flag_create(x['diff'].days,365), axis=1)
all_dpd['30+_flag'] = all_dpd.apply(lambda x: flag_create(30, x['DPD']), axis=1)
all_dpd['90+_flag'] = all_dpd.apply(lambda x: flag_create(90, x['DPD']), axis=1)

all_enq = pd.merge(all_enq, date_processed, on = 'Loan ID', how = 'left')
all_enq['diff'] = all_enq['Date Processed'] - all_enq['Date of Enquiry']
all_enq['30day_flag'] = all_enq.apply(lambda x: flag_create(x['diff'].days,30), axis=1)
all_enq['60day_flag'] = all_enq.apply(lambda x: flag_create(x['diff'].days,60), axis=1)
all_enq['90day_flag'] = all_enq.apply(lambda x: flag_create(x['diff'].days,90), axis=1)

loan_cat = pd.read_csv('loan_category_cibil.csv')
all_loans = pd.merge(all_loans, loan_cat, on = 'Type', how = 'left')
all_loans['Category'] = all_loans['Category'].fillna('Other')

all_enq = all_enq.replace({'.txt': ''}, regex=True)
all_loans = all_loans.replace({'.txt': ''}, regex=True)
all_dpd = all_dpd.replace({'.txt': ''}, regex=True)
date_processed = date_processed.replace({'.txt': ''}, regex=True)

In [7]:
curr_date = datetime.datetime.today().strftime('%d%b%y') #'01Feb18'

all_enq.to_excel('enq_all' + curr_date + '.xlsx', index = False)
all_loans.to_excel('loans_all' + curr_date + '.xlsx', index = False)
all_dpd.to_excel('dpd_all' + curr_date + '.xlsx', index = False)
date_processed.to_excel('date_process' + curr_date + '.xlsx', index = False)

### (Deprecated) Checks For ADF Pre-Approved
PS - This is redundant since this eligibility data is already readily available in the DB for the customers with CIBIL details

In [None]:
all_dpd = pd.read_excel('dpd_score_inp_1212_v1.xlsx')
all_enq = pd.read_excel('enq_score_inp_1212_v1.xlsx')
all_loans = pd.read_excel('loans_score_inp_1212_v1.xlsx')
date_processed = pd.read_excel('date_process_score_inp_1212_v1.xlsx')

In [5]:
def str_concat(a,b):
    return str(a) + str(b)

all_dpd['ID'] = all_dpd.apply(lambda x: str_concat(x['Loan ID'],x['Count']),axis=1)
all_loans['ID'] = all_loans.apply(lambda x: str_concat(x['Loan ID'],x['Count']),axis=1)
loan_merge = all_loans.drop(['Unnamed: 0','Loan ID','Count','Date Opened','Type'],axis=1)
dpd_loans = pd.merge(all_dpd, loan_merge, on = 'ID', how = 'left')

def flag_create(diff, x):
    try:
        x = int(x)
        if diff > x:
            return 0
        else:
            return 1
    except:
        if x == 'XXX':
            return 0
        elif x == 'STD':
            return 0
        elif x in ['SUB','DBT','LSS']:
            return 1
            
def days_removal(days):
    try:
        loc = days.find('d')-1
        day = int(days[:loc])
        return day
    except Exception as e: 
        print(days,e)
        return None
            
dpd_loans['days'] = dpd_loans.apply(lambda x: days_removal(str(x['diff'])),axis=1)
dpd_loans['3_month'] = dpd_loans.apply(lambda x: flag_create(x['days'],91), axis=1)
dpd_loans['6_month'] = dpd_loans.apply(lambda x: flag_create(x['days'],182), axis=1)

def exemption_flag(type_loan, amt_ovd):
    if type_loan == 'Credit Card':
        if amt_ovd > 5000:
            return 0
        else:
            return 1
    else:
        if amt_ovd > 500:
            return 0
        else:
            return 1        

def curr_del(exempt, status, dpd):
    if exempt == 1:
        return 0
    else:
        if status == 'Live Account':
            if dpd in ['SUB','DBT','LSS','SMA']:
                return 1
            elif dpd in ['XXX','STD']:
                return 0
            else:
                try:
                    dpd=int(dpd)
                    if dpd > 0:
                        return 1
                    else:
                        return 0
                except Exception as e:
                    print(dpd,e)
        else:
            return 0
                    
def settle_etc(settle,write_off,suit_filed):
    if settle > 0 or write_off > 0 or suit_filed in ['Suit filed','Wilful default','Suit filed (Wilful default)']:
        return 1
    else:
        return 0

dpd_loans['settle_etc'] = dpd_loans.apply(lambda x: settle_etc(x['Settlement Amount'],x['Write-off Amount'],x['Suit Filed']),axis=1)
dpd_loans['exempt'] = dpd_loans.apply(lambda x: exemption_flag(x['Type'],x[' Amount Overdue']),axis=1)
dpd_loans['curr_del'] = dpd_loans.apply(lambda x: curr_del(x['exempt'],x['Loan Status'],x['DPD']),axis=1)
curr_del_df = dpd_loans[dpd_loans['curr_del']!=0][['Loan ID','curr_del']]
curr_del_df = curr_del_df.drop_duplicates()
L3_30 = dpd_loans[(dpd_loans['30+_flag'] == 1) & (dpd_loans['3_month'] == 1) & (dpd_loans['exempt'] == 0)][['Loan ID','30+_flag']].drop_duplicates()
L12_90 = dpd_loans[(dpd_loans['90+_flag'] == 1) & (dpd_loans['12_month'] == 1) & (dpd_loans['exempt'] == 0)][['Loan ID','90+_flag']].drop_duplicates()
settle_etc_df = dpd_loans[(dpd_loans['settle_etc'] == 1) & (dpd_loans['12_month'] == 1)][['Loan ID','settle_etc']].drop_duplicates()
settle_etc_df = settle_etc_df.drop_duplicates()
final_table = pd.merge(date_processed, curr_del_df, on = 'Loan ID', how = 'left').fillna(0).drop(['Unnamed: 0'],axis=1)
final_table = pd.merge(final_table, L3_30, on = 'Loan ID', how = 'left').fillna(0)
final_table = pd.merge(final_table, L12_90, on = 'Loan ID', how = 'left').fillna(0)
final_table = pd.merge(final_table, settle_etc_df, on = 'Loan ID', how = 'left').fillna(0)
# final_table.to_excel('ADF_pre_app_analysis.xlsx',index=False)