In [262]:
import json
import pprint
import uuid
import xmltodict
import datetime
from dateutil import parser
import pandas as pd

pd.set_option('display.max_rows', 10)
RESULT_TABLE_NAMES = ['prequal_results', 'xml_details', 'products', 'credit_profiles', 'headers', 'risk_models', 'consumer_identities', 'addr_infos', 'employ_infos', 'trade_lines', 'inquiries', 'info_msgs', 'enhanced_pm_data', 'tradeline_amounts']
with open("./data/prequalresult.json", 'r') as f:
    json_file = json.load(f)

Parse XML data string and build the internal objects into the designed tables

In [263]:
# initialize result object lists
RESULT_OBJ_DICT = dict((key, []) for key in RESULT_TABLE_NAMES)
ATTR_CODE_DICT = {
    'risk_models': ['ModelIndicator', 'Evaluation'],
    'addr_infos': ['Origination', 'DwellingType', 'HomeOwnership'],
    'employ_infos': ['Origination'],
    'trade_lines': ['SpecialComment', 'Evaluation', 'AccountType', 'TermsDuration', 'Status', 'ECOA', 'OpenOrClosed', 'RevolvingOrInstallment', 'MonthlyPaymentType', 'KOB'],
    'tradeline_amounts': ['Qualifier'],
    'enhanced_pm_data': ['AccountCondition', 'PaymentStatus', 'AccountType', 'SpecialComment'],
    'inquiries': ['Type', 'Terms', 'KOB'],
}
DATE_ATTR_DICT = {
    'prequal_results': {'long': ['created_at', 'bkdate']},
    'headers': {'short': ['ReportDate']},
    'addr_infos': {'short': ['FirstReportedDate', 'LastUpdatedDate']},
    'employ_infos': {'short': ['FirstReportedDate', 'LastUpdatedDate']},
    'trade_lines': {'short': ['OpenDate', 'StatusDate', 'MaxDelinquencyDate', 'BalanceDate', 'LastPaymentDate']},
    'enhanced_pm_data': {'short': ['InitialPaymentLevelDate']},
    'inquiries': {'short': ['Date']}
}

# parse string into date
def parse_to_date(input_str, mode='short'):
    if mode == 'short':
        year, month, day = int(input_str[-4:]), int(input_str[:2]), int(input_str[2:4])
        return datetime.datetime(year, month, day)
    else:
        return parser.parse(input_str)

prequal_results = json_file
for prequal_result in prequal_results:
    prequal_result.update(prequal_result.pop('fields'))
    prequal_result['created_at'] = parser.parse(prequal_result['created_at'])
    xml_detail_id = "xd_{}".format(uuid.uuid4())
    prequal_result['xml_detail_id'] = xml_detail_id
    RESULT_OBJ_DICT['prequal_results'].append(prequal_result)
    
    # initializing XML_Detail object
    xml_dict = xmltodict.parse(result['xml_data'], dict_constructor=dict)['NetConnectResponse']
    xml_dict['id'] = xml_detail_id
    RESULT_OBJ_DICT['xml_details'].append(xml_dict)
    product_id = "pd_{}".format(uuid.uuid4())
    xml_dict['product_id'] = product_id
    
    # initializing Product object
    product_obj = xml_dict.pop('Products')
    product_obj['id'] = product_id
    RESULT_OBJ_DICT['products'].append(product_obj)
    credit_profile_id = "cp_{}".format(uuid.uuid4())
    product_obj['credit_profile_id'] = credit_profile_id
    
    # initializing CreditProfile object
    credit_profile_obj = product_obj.pop('CreditProfile')
    credit_profile_obj['id'] = credit_profile_id
    RESULT_OBJ_DICT['credit_profiles'].append(credit_profile_obj)
    
    
    ## handling RiskModels in CreditProfile
    for risk_model in credit_profile_obj.pop('RiskModel'):
        risk_model['id'] = "rm_{}".format(uuid.uuid4())
        risk_model['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['risk_models'].append(risk_model)
    
    # initializing Header object in CreditProfile
    header_id = "hd_{}".format(uuid.uuid4())
    credit_profile_obj['header_id'] = header_id
    header_obj = credit_profile_obj.pop('Header')
    header_obj['id'] = header_id
    RESULT_OBJ_DICT['headers'].append(header_obj)
    
    ## handling ConsumerIdentities in CreditProfile
    name_attrs = ['Surname', 'First', 'Middle', 'Gen']
    for consumer_identity in credit_profile_obj.pop('ConsumerIdentity'):
        consumer_identity['id'] = "ci_{}".format(uuid.uuid4())
        for name_attr in name_attrs:
            if name_attr in consumer_identity['Name']:
                attr_v = consumer_identity['Name'][name_attr]
            else:
                attr_v = ''
            consumer_identity[name_attr] = attr_v
        consumer_identity['YOB'] = consumer_identity['YOB']
        if 'Type' in consumer_identity['Name']:
            name_type_code = consumer_identity['Name']['Type']['@code']
        else:
            name_type_code = ''
        consumer_identity['NameType_code'] = name_type_code
        del consumer_identity['Name']
        consumer_identity['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['consumer_identities'].append(consumer_identity)
    
    ## handling AddressInformations in CreditProfile
    for addr_info in credit_profile_obj.pop('AddressInformation'):
        addr_info['id'] = "ai_{}".format(uuid.uuid4())
        addr_info['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['addr_infos'].append(addr_info)
       
    ## handling EmploymentInformations in CreditProfile
    for employ_info in credit_profile_obj.pop('EmploymentInformation'):
        employ_info['id'] = "ei_{}".format(uuid.uuid4())
        employ_info['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['employ_infos'].append(employ_info)
        
    ## handling TradeLines in CreditProfile
    code_attrs = ['SpecialComment', 'Evaluation', 'AccountType', 'TermsDuration', 'Status', 'OpenOrClosed', 'RevolvingOrInstallment', 'MonthlyPaymentType', 'KOB']
    for trade_line in credit_profile_obj.pop('TradeLine'):
        trade_line['id'] = "tl_{}".format(uuid.uuid4())
        trade_line['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['trade_lines'].append(trade_line)
        
        # initializing EnhancedPaymentData object in TradeLine
        enhanced_pm_data_id = "ep_{}".format(uuid.uuid4())
        trade_line['enhanced_pm_data_id'] = enhanced_pm_data_id
        enhanced_pm_data_obj = trade_line.pop('EnhancedPaymentData')
        enhanced_pm_data_obj['id'] = enhanced_pm_data_id
        RESULT_OBJ_DICT['enhanced_pm_data'].append(enhanced_pm_data_obj)
        
        ## handling Amount object in TradeLine
        for amt_obj in trade_line.pop('Amount'):
            amt_obj['id'] = "am_{}".format(uuid.uuid4())
            amt_obj['TradeLine_id'] = trade_line['id']
            RESULT_OBJ_DICT['tradeline_amounts'].append(amt_obj)
    
    # initializing Inquiry object in CreditProfile
    inquiry_id = "iq_{}".format(uuid.uuid4())
    credit_profile_obj['inquiry_id'] = inquiry_id
    inquiry_obj = credit_profile_obj.pop('Inquiry')
    inquiry_obj['id'] = inquiry_id
    RESULT_OBJ_DICT['inquiries'].append(inquiry_obj)
    
    ## handling InformationalMessages in CreditProfile
    for info_msg in credit_profile_obj.pop('InformationalMessage'):
        info_msg['id'] = "im_{}".format(uuid.uuid4())
        info_msg['CreditProfile_id'] = credit_profile_id
        RESULT_OBJ_DICT['info_msgs'].append(info_msg)
    
# converting sub-attrs into underscore attrs
for attr, code_attr_list in ATTR_CODE_DICT.items():
    attr_obj_list = RESULT_OBJ_DICT[attr]
    for attr_obj in attr_obj_list:
        for attr in code_attr_list:
            if attr in attr_obj:
                code = attr_obj[attr]['@code']
            else:
                code = ''
            attr_obj["{}_code".format(attr)] = code
            attr_obj.pop(attr, None)

# converting date strings into datetime obj
for table_name in DATE_ATTR_DICT:
    for mode, attr_list in DATE_ATTR_DICT[table_name].items():
        for attr in attr_list:
            for row_obj in RESULT_OBJ_DICT[table_name]:
                if attr not in row_obj:
                    row_obj[attr] = None
                elif (row_obj[attr] is not None) and (not isinstance(row_obj[attr], datetime.datetime)):
                    row_obj[attr] = parse_to_date(row_obj[attr], mode)
    
#RESULT_OBJ_DICT

Take a preview of table PrequalResult

In [264]:
pd.DataFrame(RESULT_OBJ_DICT['prequal_results'])

Unnamed: 0,model,pk,loanapp_id,result,error_reason,detail_results,fico_v2,report_type,username,created_at,...,business_credit_card_total_limits,business_credit_card_total_balances,business_total_mortgage_monthly_payments,business_total_monthly_debt_payments,bkdate,fico_v3,total_mortgage_monthly_payments,version,role,xml_detail_id
0,common.prequalresult,1,194,True,no reasons,,695,CreditProfile,cho,2016-08-04 00:29:03.067000+00:00,...,,,,,,,,CF Microloan,applicant,xd_25b9c19e-5220-4c19-adcd-9c34da236866


Take a preview of table Product

In [225]:
pd.DataFrame(RESULT_OBJ_DICT['products'])

Unnamed: 0,@xmlns,id,credit_profile_id
0,http://www.experian.com/ARFResponse,pd_c9983288-da1c-433e-8826-7c0877c5c4da,cp_79a06a61-acce-417a-8db6-712c16e34fb4


Take a preview of table CreditProfile

In [226]:
pd.DataFrame(RESULT_OBJ_DICT['credit_profiles'])

Unnamed: 0,id,header_id,inquiry_id
0,cp_79a06a61-acce-417a-8db6-712c16e34fb4,hd_1bef0e93-ea82-47ac-a61a-f1b4619c2e88,iq_9852c176-8ed8-4469-a10a-e1ee7ee3bfb8


Take a preview of table Header

In [265]:
pd.DataFrame(RESULT_OBJ_DICT['headers'])

Unnamed: 0,ReportDate,ReportTime,Preamble,ARFVersion,id
0,2017-10-03,140607,TWA1,7,hd_373f2ec2-e90c-4d00-b8b4-8a377cbb5590


Take a preview of table RiskModel

In [156]:
pd.DataFrame(RESULT_OBJ_DICT['risk_models'])

Unnamed: 0,Score,ScoreFactorCodeOne,ScoreFactorCodeTwo,ScoreFactorCodeThree,ScoreFactorCodeFour,id,CreditProfile_id,ModelIndicator_code,Evaluation_code
0,808,10,9,5,,rm_3f81f6bf-6b10-47f4-b21c-06bd3242fec8,cp_83188663-1c2f-41ae-8fed-2e46b31ecaea,F,P
1,807,30,5,11,,rm_17da3e39-4320-4f1e-a642-7f3da2160e71,cp_83188663-1c2f-41ae-8fed-2e46b31ecaea,AA,P


Take a preview of table ConsumerIdentity

In [266]:
pd.DataFrame(RESULT_OBJ_DICT['consumer_identities'])

Unnamed: 0,YOB,id,Surname,First,Middle,Gen,NameType_code,CreditProfile_id
0,,ci_e477df8b-4008-4639-b645-cd4982efd4e8,SCALICI,WILLIAM,,,,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a
1,,ci_7c84ffcb-04ec-4263-a7a5-e1da6462d28e,SCALICI,AS,WILLIAM,,N,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a
2,,ci_64ba02f7-b7b0-49a1-a4d6-83f1c83ae99c,SCALICI,WILLIAM,,JR,,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a
3,,ci_1b1702ba-a7aa-4c6d-8ff9-2fe8489d29e3,ANTHONY,WILLIAM,R,,A,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a


Take a preview of table AddressInfo

In [267]:
pd.DataFrame(RESULT_OBJ_DICT['addr_infos'])

Unnamed: 0,FirstReportedDate,LastUpdatedDate,TimesReported,LastReportingSubcode,StreetPrefix,StreetName,StreetSuffix,City,State,Zip,CensusGeoCode,id,CreditProfile_id,Origination_code,DwellingType_code,HomeOwnership_code
0,2003-10-27,2015-12-20,0,7370142.0,7324,84TH STREET,CT SW,TACOMA,WA,984986380,,ai_a1762f48-f9b3-4547-94de-4263e3fee583,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a,2,S,
1,2011-01-01,2011-01-01,0,,3260 S,TACOMA,WAY,TACOMA,WA,984094725,,ai_ae22d847-544b-4ec3-b498-db3ca21bee84,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a,1,S,
2,2006-05-28,2010-11-02,12,,3216 S,TACOMA,WAY,TACOMA,WA,984094725,,ai_ad2613e3-0e16-4b26-9b59-fb11e602152e,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a,1,S,


Take a preview of table EmploymentInfo

In [268]:
pd.DataFrame(RESULT_OBJ_DICT['employ_infos'])

Unnamed: 0,FirstReportedDate,LastUpdatedDate,Name,AddressFirstLine,AddressSecondLine,AddressExtraLine,Zip,id,CreditProfile_id,Origination_code
0,2011-04-08,2011-04-08,GM RETIRED,,,,,ei_2fa7b12b-461d-4eef-962f-2859fdfe7e32,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a,2
1,2009-12-28,2010-06-11,RETIRED,,,,,ei_5defe1d1-f0dd-4601-9bf6-886f5a7c2387,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a,2


Take a preview of table TradeLine

In [269]:
pd.DataFrame(RESULT_OBJ_DICT['trade_lines'])

Unnamed: 0,OpenDate,StatusDate,MaxDelinquencyDate,BalanceDate,BalanceAmount,AmountPastDue,ConsumerComment,MonthsHistory,DelinquenciesOver30Days,DelinquenciesOver60Days,...,Evaluation_code,AccountType_code,TermsDuration_code,Status_code,ECOA_code,OpenOrClosed_code,RevolvingOrInstallment_code,MonthlyPaymentType_code,KOB_code,AccountNumber
0,1981-06-01,2017-05-01,,2017-05-19,,,,15,00,00,...,N,18,REV,05,1,C,R,,OC,
1,2007-08-28,2012-03-01,,2012-03-18,,,,56,00,00,...,N,18,REV,05,1,C,R,,BC,
2,2006-06-10,2017-07-01,,2017-07-23,00000000,,,96,00,00,...,N,18,REV,11,2,C,R,,BC,
3,2007-08-28,2012-03-01,,2012-04-28,00000000,,,03,00,00,...,N,18,REV,11,1,C,R,,BC,
4,2002-03-01,2009-04-01,,2009-04-01,00000000,,,86,00,00,...,N,18,REV,11,0,C,R,,NZ,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31,1983-06-01,2017-08-01,,2017-08-07,00000052,,,01,00,00,...,P,18,REV,11,1,O,R,,BC,
32,1984-05-01,2017-08-01,,2017-08-07,00000599,,,01,00,00,...,P,18,001,11,1,O,R,,BC,
33,1984-05-01,2017-08-01,,2017-08-07,00001960,,,01,00,00,...,P,18,REV,11,1,O,R,,BC,
34,2016-08-18,2017-08-01,,2017-08-07,00000000,,,12,00,00,...,P,07,REV,11,1,O,R,,CG,


Take a preview of table Inquiry

In [270]:
pd.DataFrame(RESULT_OBJ_DICT['inquiries'])

Unnamed: 0,Date,Amount,Subcode,SubscriberDisplayName,id,Type_code,Terms_code,KOB_code
0,2016-08-18,UNKNOWN,1640007,SYNCB/BROOKS BROS,iq_6bb7ea2f-e476-45bc-b39b-ee7be1d420e0,31,UNK,FF


Take a preview of table InformationMessage

In [271]:
pd.DataFrame(RESULT_OBJ_DICT['info_msgs'])

Unnamed: 0,MessageNumber,MessageText,id,CreditProfile_id
0,92,0092 REQUESTED PRODUCT OPTION NOT ALLOWED,im_aa4ef9ca-1a32-495b-9881-58b4ed86b851,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a
1,84,0084 SSN MATCHES,im_d59e435e-effa-4940-9dbf-f7e292a4e3fc,cp_652e5d52-7edb-4ce2-8ccd-99b07cf3425a


Take a preview of table EnhancedPaymentData

In [273]:
pd.DataFrame(RESULT_OBJ_DICT['enhanced_pm_data'])

Unnamed: 0,InitialPaymentLevelDate,id,AccountCondition_code,PaymentStatus_code,AccountType_code,SpecialComment_code
0,2017-05-01,ep_d3659dad-8013-4b07-9d53-9537e2d84150,05,11,18,51
1,2012-03-01,ep_82301724-e270-47d6-abf5-3995e0a5c578,05,11,18,40
2,2017-07-01,ep_0cc414ef-dd0f-47d1-850c-b4a32385f4e5,A3,11,18,19
3,2012-03-01,ep_6c2ba90c-bf55-466b-8a8a-612639085aca,A3,11,18,18
4,2009-04-01,ep_3366a684-4f76-43b4-b9fa-5138ed5f33f1,A3,11,18,19
...,...,...,...,...,...,...
31,2017-08-01,ep_971e74f0-0c04-4f45-aa15-bdce405b8995,A1,11,18,
32,2017-08-01,ep_a0157013-f0ce-44f1-8f30-b69cadeac5bf,A1,11,18,
33,2017-08-01,ep_19ab2196-e0ed-4d51-a5b5-7396932116e6,A1,11,18,
34,2017-08-01,ep_7d5881cd-5cb4-41a1-b4c9-83f1339242f9,A1,11,07,


Take a preview of table TradeLineAmount

In [275]:
pd.DataFrame(RESULT_OBJ_DICT['tradeline_amounts'])

Unnamed: 0,Value,id,TradeLine_id,Qualifier_code
0,00000350,am_69c1ecd4-c479-4abc-96e6-bafa06ccb755,tl_f3617344-788f-4a6b-a7e8-6354a64933ad,L
1,00000056,am_9313b748-fd75-4b01-ac11-7cff9240fcaf,tl_f3617344-788f-4a6b-a7e8-6354a64933ad,H
2,00011500,am_9d37b91c-c4ef-4e15-9b5b-933c9b9fa62d,tl_2ab34807-4028-4d44-b61f-b943c7714049,L
3,00011440,am_aafa2902-7734-4e99-b73e-2c531917ef8e,tl_2ab34807-4028-4d44-b61f-b943c7714049,H
4,00012900,am_54ad1a0e-390e-4bbf-b94b-26c303538609,tl_e457a829-adca-4bf4-a37a-9e844c407205,L
...,...,...,...,...
67,,am_9bb702ab-0b49-453e-b85c-3259387d4524,tl_92161510-5de1-478e-a355-bb88cd54bb20,
68,00001500,am_f42a7835-0314-4076-a22e-a6e033935fb4,tl_4bbfcc1f-732e-471b-8cd5-92d081206acb,L
69,00000089,am_5d543cf6-4d18-4e68-b914-2a41e7dbd405,tl_4bbfcc1f-732e-471b-8cd5-92d081206acb,H
70,00150000,am_dab66759-a91d-431d-8354-bfcced9cb14f,tl_dfbaba8e-be8e-49e2-8571-29bed7995836,L
