In [1]:
import time
import pandas as pd
import numpy as np
import math

# internal tools
from dsgtools.reporting import make_format
from dsgtools.reporting import TableWriter
from dsgtools.reporting import freq
from dsgtools.reporting import bivariate

Matplotlib is building the font cache; this may take a moment.


In [3]:
from dsgtools import azure
sample = pd.read_adls('Analytics/RnD Projects/Product RnD/Business/_ARMBS_Tickets/2023/AIR_231 (Multi_rep)/input_sample_DS.parquet')
sample.columns

Index(['AccountNumber', 'CompanyName', 'AlternateCompanyName',
       'StreetAddressLine1', 'StreetAddressLine2', 'City1', 'State1', 'Zip1',
       'BusinessPhone', 'TaxIdNumber',
       ...
       'BusinessStreetAddress', 'Rep1_StreetAddress', 'Rep2_StreetAddress',
       'Rep3_StreetAddress', 'sufficient_Business', 'sufficient_input_rep1',
       'sufficient_input_rep2', 'sufficient_input_rep3', 'group', 'unique_id'],
      dtype='object', length=127)

In [4]:
baseline = pd.read_adls("Analytics/RnD Projects/Product RnD/Business/_ARMBS_Tickets/2023/AIR_231 (Multi_rep)/new_data/1010_business_shell_v31_air231_results_with1rep_w20230928-162407_sas_layout_busshell.csv", 
                        verbose = False, low_memory = False, encoding = 'iso-8859-1')
after = pd.read_adls("Analytics/RnD Projects/Product RnD/Business/_ARMBS_Tickets/2023/AIR_231 (Multi_rep)/new_data/1010_business_shell_v31_air231_results_with3rep_w20230928-103755_sas_layout_busshell.csv", 
                     verbose = False, low_memory = False, encoding = 'iso-8859-1')
print(baseline.shape)
print(after.shape)

(10001, 3115)
(10001, 3115)


In [5]:
after.columns

Index(['account', 'seq', 'id_powid', 'id_proxid', 'id_seleid', 'id_orgid',
       'id_ultid', 'id_seleid_change_flag', 'id_seleid_change_code',
       'id_weight',
       ...
       'be_b2bfltrecflagbymonsum24mc', 'be_b2bmatrecflagbymonsum24mc',
       'be_b2bopsrecflagbymonsum24mc', 'be_b2bothrecflagbymonsum24mc',
       'be_b2bbalvol24mc', 'be_b2bcarrbalvol24mc', 'be_b2bfltbalvol24mc',
       'be_b2bmatbalvol24mc', 'be_b2bopsbalvol24mc', 'be_b2bothbalvol24mc'],
      dtype='object', length=3115)

In [6]:
from dsgtools.reporting import ImpactAnalysisReport
sbfe_attr = [x for x in baseline.columns if "sbfe" in x]
b2b = [x for x in baseline.columns if "b2b" in x]
rv_attr = [x for x in baseline.columns if x not in sbfe_attr]

exception_dict = dict()
for x in baseline.columns:
    if x in sbfe_attr:
        exception_dict[x] = [-99, -98, -97]
    if x in b2b:
        exception_dict[x] = [-99999, -99998, -99997]
    if x in rv_attr:
        exception_dict[x] = [-1]
        
cr = ImpactAnalysisReport(baseline, after, keys="account",
           exceptions=exception_dict,
           dataframe_names = ("baseline", "after"))
cr.to_xlsx("./_temp/comparison_shell.xlsx", overwrite= True)

<dsgtools.reporting.tablewriter.TableWriter at 0x7f674e784700>

In [12]:
temp_before = baseline[field].copy()
temp_after = after[field].copy()

sample.unique_id = sample.unique_id.astype(str)
temp_before = temp_before.merge(temp_after, on = "account")
temp_before = temp_before.merge(sample[["unique_id", "group"]], left_on = "account", right_on = "unique_id")
print(temp_before.shape)
temp_before = temp_before.fillna("")

(10000, 321)


In [14]:
col_2 = [x for x in temp_before.columns if ("_flag" in x) & ("rep2" in x) & ("in_rep2_dob" not in x)] ## rep dob is provided as 0 somehow
col_3 = [x for x in temp_before.columns if ("_flag" in x) & ("rep3" in x) & ("in_rep3_dob" not in x)]
print(len(col_2))
print(len(col_3))
temp_before['sum_diff_rep2']=temp_before[col_2]. sum(axis=1)
temp_before['sum_diff_rep3']=temp_before[col_3]. sum(axis=1)

78
78


In [15]:
fmt = make_format(cuts = [-np.inf, 0, np.inf])
freq("group", "sum_diff_rep3", df = temp_before, format = [None, fmt], cross = False, observed = True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Pct,Cuml Count,Cuml Pct
group,sum_diff_rep3,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3rep,1+,7000.0,0.7,7000.0,0.7
0rep,<= 0,1000.0,0.1,8000.0,0.8
1rep,<= 0,1000.0,0.1,9000.0,0.9
2rep,<= 0,1000.0,0.1,10000.0,1.0


In [16]:
fmt = make_format(cuts = [-np.inf, 0, np.inf])
freq("group", "sum_diff_rep2", df = temp_before, format = [None, fmt], cross = False, observed = True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Pct,Cuml Count,Cuml Pct
group,sum_diff_rep2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3rep,1+,7000.0,0.7,7000.0,0.7
0rep,<= 0,999.0,0.0999,7999.0,0.7999
0rep,1+,1.0,0.0001,8000.0,0.8
1rep,<= 0,1000.0,0.1,9000.0,0.9
2rep,1+,1000.0,0.1,10000.0,1.0


In [23]:
sample[sample.unique_id == '9617']

Unnamed: 0,AccountNumber,CompanyName,AlternateCompanyName,StreetAddressLine1,StreetAddressLine2,City1,State1,Zip1,BusinessPhone,TaxIdNumber,BusinessIPAddress,BusinessURL,BusinessEmailAddress,Rep1FirstName,Rep1MiddleName,Rep1LastName,Rep1NameSuffix,Rep1StreetAddressLine1,Rep1StreetAddressLine2,Rep1City,Rep1State,Rep1Zip,Rep1SSN,Rep1DOB,Rep1Age,Rep1DLNumber,Rep1DLState,Rep1HomePhone,Rep1EmailAddress,Rep1FormerLastName,Rep1LexID,ArchiveDate,PowID,ProxID,SeleID,OrgID,UltID,SIC_Code,NAIC_Code,Rep2FirstName,Rep2MiddleName,Rep2LastName,Rep2NameSuffix,Rep2StreetAddressLine1,Rep2StreetAddressLine2,Rep2City,Rep2State,Rep2Zip,Rep2SSN,Rep2DOB,Rep2Age,Rep2DLNumber,Rep2DLState,Rep2HomePhone,Rep2EmailAddress,Rep2FormerLastName,Rep2LexID,Rep3FirstName,Rep3MiddleName,Rep3LastName,Rep3NameSuffix,Rep3StreetAddressLine1,Rep3StreetAddressLine2,Rep3City,Rep3State,Rep3Zip,Rep3SSN,Rep3DOB,Rep3Age,Rep3DLNumber,Rep3DLState,Rep3HomePhone,Rep3EmailAddress,Rep3FormerLastName,Rep3LexID,Rep4FirstName,Rep4MiddleName,Rep4LastName,Rep4NameSuffix,Rep4StreetAddressLine1,Rep4StreetAddressLine2,Rep4City,Rep4State,Rep4Zip,Rep4SSN,Rep4DOB,Rep4Age,Rep4DLNumber,Rep4DLState,Rep4HomePhone,Rep4EmailAddress,Rep4FormerLastName,Rep4LexID,Rep5FirstName,Rep5MiddleName,Rep5LastName,Rep5NameSuffix,Rep5StreetAddressLine1,Rep5StreetAddressLine2,Rep5City,Rep5State,Rep5Zip,Rep5SSN,Rep5DOB,Rep5Age,Rep5DLNumber,Rep5DLState,Rep5HomePhone,Rep5EmailAddress,Rep5FormerLastName,Rep5LexID,ln_project_id,pf_fraud,pf_bad,pf_funded,pf_declined,pf_approved_not_funded,BusinessStreetAddress,Rep1_StreetAddress,Rep2_StreetAddress,Rep3_StreetAddress,sufficient_Business,sufficient_input_rep1,sufficient_input_rep2,sufficient_input_rep3,group,unique_id
9616,BIIDTransactionJune2018_50968041R1098689,"VENTURE.CO HOLDINGS, INC.",,12 EAST 49TH ST 15TH FLOOR,,NEW YORK,NY,10017,8447806797,,,,,JUSTIN,,WICKS,,,,,,,,,,,,,,,,20180629 200513,,,,,,,,ANDREW,,SZABO,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BIID_Transaction_June2018,,,,,,12 EAST 49TH ST 15TH FLOOR,,,,1,0,0,0,0rep,9617


In [22]:
sample.AccountNumber

0                                  TMobile7731_0000043457
1                                  TMobile7731_0000030895
2                                   USBank8557_AWB3424810
3                                  TMobile7731_0000031301
4                         cancapital7343_0017233710556183
                              ...                        
9995    SBFEExtract2016_0005010111FCO0901_603532219245...
9996    SBFEExtract2016_0017110111ALF0912_24924667067_001
9997                                  USBank8585_40005409
9998    SBFEExtract2016_0006010111CCS0103_424631521427...
9999    SBFEExtract2016_0013010411VEN0116_9853796001PA...
Name: AccountNumber, Length: 10000, dtype: object

In [18]:
## it's fine the REP2 data is populated
pd.set_option('display.max_columns', None)
temp_before[(temp_before['group'] =="0rep")&(temp_before["sum_diff_rep2"] >0)]

Unnamed: 0,account,in_rep2_first_x,pop_rep2_last_x,pop_rep2_first_x,e2b_rep2_match_bus_in_first_x,e2b_rep2_match_bus_in_last_x,e2b_rep2_match_bus_in_full_x,out_rep2_addr_status_x,pop_rep2_zip_x,pop_rep2_addr_x,in_rep2_streetaddress1_x,in_rep2_zip_x,in_rep2_city_x,pop_rep2_city_x,out_rep2_state_x,out_rep2_addr_type_x,out_rep2_prim_name_x,out_rep2_zip5_x,out_rep2_lat_x,out_rep2_long_x,out_rep2_geo_block_x,out_rep2_county_x,out_rep2_prim_range_x,pop_rep2_ssn_x,lexid_rep2_x,pop_rep2_state_x,in_rep2_state_x,out_rep2_zip4_x,out_rep2_addr_suffix_x,in_rep2_phone10_x,pop_rep2_phone_x,e2b_rep2_on_inquiry_x,bus2exec_rep2_desc_x,e2b_rep2_match_bus_in_addr_x,in_rep3_first_x,e2b_rep3_match_bus_in_last_x,pop_rep3_first_x,pop_rep3_last_x,e2b_rep3_match_bus_in_first_x,e2b_rep3_match_bus_in_full_x,out_rep3_addr_status_x,in_rep3_streetaddress1_x,pop_rep3_addr_x,in_rep3_zip_x,pop_rep3_zip_x,in_rep3_city_x,pop_rep3_city_x,pop_rep3_state_x,in_rep3_state_x,out_rep3_state_x,out_rep3_prim_name_x,out_rep3_addr_type_x,out_rep3_zip5_x,out_rep3_geo_block_x,out_rep3_long_x,out_rep3_lat_x,out_rep3_county_x,out_rep3_prim_range_x,lexid_rep3_x,in_rep3_phone10_x,pop_rep3_phone_x,bus2exec_rep2_x,out_rep3_zip4_x,e2b_rep2_idsearch_name_x,e2b_rep2_lexid_on_file_x,e2b_rep2_name_on_file_x,e2b_rep2_on_property_cnt_x,e2b_rep2_on_utility_x,e2b_rep2_match_bus_file_full_x,e2b_rep2_paw_match_x,e2b_rep2_match_bus_file_first_x,e2b_rep2_match_bus_file_last_x,e2b_rep2_idsearch_addr_x,e2b_rep2_match_bus_file_addr_x,e2b_rep2_consheader_addr_x,e2b_rep2_addr_on_file_x,e2b_rep2_bus_addr_own_cnt_x,e2b_rep2_match_bus_file_fein_x,e2b_rep2_busheader_ssn_x,e2b_rep2_consheader_ssn_x,e2b_rep2_idsearch_ssn_x,e2b_rep2_ssn_on_file_x,e2b_rep2_distance_addr_x,out_rep3_addr_suffix_x,bus2exec_rep3_desc_x,e2b_rep3_on_inquiry_x,e2b_rep3_match_bus_in_addr_x,pop_rep3_ssn_x,bus2exec_rep3_x,e2b_rep3_name_on_file_x,e2b_rep3_lexid_on_file_x,e2b_rep3_on_property_cnt_x,e2b_rep3_idsearch_name_x,e2b_rep3_paw_match_x,e2b_rep3_on_utility_x,e2b_rep3_match_bus_file_first_x,e2b_rep3_match_bus_file_last_x,e2b_rep3_match_bus_file_full_x,e2b_rep3_match_bus_file_addr_x,e2b_rep3_idsearch_addr_x,e2b_rep3_consheader_addr_x,e2b_rep3_addr_on_file_x,e2b_rep3_bus_addr_own_cnt_x,e2b_rep3_distance_addr_x,e2b_rep2_busheader_phone_x,e2b_rep2_phn_on_file_x,e2b_rep2_idsearch_phone_x,e2b_rep2_match_bus_file_phn_x,e2b_rep2_consheader_phone_x,e2b_rep3_phn_on_file_x,e2b_rep3_busheader_phone_x,e2b_rep3_idsearch_phone_x,e2b_rep3_match_bus_file_phn_x,e2b_rep3_consheader_phone_x,e2b_rep3_match_bus_file_fein_x,e2b_rep3_idsearch_ssn_x,e2b_rep3_consheader_ssn_x,e2b_rep3_ssn_on_file_x,e2b_rep3_busheader_ssn_x,bus2exec_rep2_title_x,bus2exec_rep3_title_x,in_rep2_dob_x,pop_rep2_dobyear_x,pop_rep2_dob_x,pop_rep2_dobday_x,in_rep3_dob_x,pop_rep3_dob_x,pop_rep3_dobyear_x,pop_rep3_dobday_x,in_rep2_middle_x,pop_rep2_middle_x,in_rep3_middle_x,pop_rep3_middle_x,out_rep2_predir_x,out_rep3_predir_x,e2b_rep2_distance_phone_x,e2b_rep3_distance_phone_x,out_rep2_sec_range_x,out_rep2_unit_desig_x,out_rep3_sec_range_x,out_rep3_unit_desig_x,out_rep2_postdir_x,pop_rep2_dobmonth_x,out_rep3_postdir_x,pop_rep3_dobmonth_x,e2b_rep1_consheader_addr_x,in_rep2_dlnumber_x,pop_rep2_dlnumber_x,pop_rep3_dlnumber_x,in_rep2_dlstate_x,pop_rep2_dlstate_x,in_rep3_dlnumber_x,pop_rep3_dlstate_x,in_rep3_dlstate_x,in_rep3_email_x,in_rep2_email_x,pop_rep3_age_x,pop_rep2_age_x,in_rep3_age_x,in_rep2_age_x,in_rep2_first_y,pop_rep2_last_y,pop_rep2_first_y,e2b_rep2_match_bus_in_first_y,e2b_rep2_match_bus_in_last_y,e2b_rep2_match_bus_in_full_y,out_rep2_addr_status_y,pop_rep2_zip_y,pop_rep2_addr_y,in_rep2_streetaddress1_y,in_rep2_zip_y,in_rep2_city_y,pop_rep2_city_y,out_rep2_state_y,out_rep2_addr_type_y,out_rep2_prim_name_y,out_rep2_zip5_y,out_rep2_lat_y,out_rep2_long_y,out_rep2_geo_block_y,out_rep2_county_y,out_rep2_prim_range_y,pop_rep2_ssn_y,lexid_rep2_y,pop_rep2_state_y,in_rep2_state_y,out_rep2_zip4_y,out_rep2_addr_suffix_y,in_rep2_phone10_y,pop_rep2_phone_y,e2b_rep2_on_inquiry_y,bus2exec_rep2_desc_y,e2b_rep2_match_bus_in_addr_y,in_rep3_first_y,e2b_rep3_match_bus_in_last_y,pop_rep3_first_y,pop_rep3_last_y,e2b_rep3_match_bus_in_first_y,e2b_rep3_match_bus_in_full_y,out_rep3_addr_status_y,in_rep3_streetaddress1_y,pop_rep3_addr_y,in_rep3_zip_y,pop_rep3_zip_y,in_rep3_city_y,pop_rep3_city_y,pop_rep3_state_y,in_rep3_state_y,out_rep3_state_y,out_rep3_prim_name_y,out_rep3_addr_type_y,out_rep3_zip5_y,out_rep3_geo_block_y,out_rep3_long_y,out_rep3_lat_y,out_rep3_county_y,out_rep3_prim_range_y,lexid_rep3_y,in_rep3_phone10_y,pop_rep3_phone_y,bus2exec_rep2_y,out_rep3_zip4_y,e2b_rep2_idsearch_name_y,e2b_rep2_lexid_on_file_y,e2b_rep2_name_on_file_y,e2b_rep2_on_property_cnt_y,e2b_rep2_on_utility_y,e2b_rep2_match_bus_file_full_y,e2b_rep2_paw_match_y,e2b_rep2_match_bus_file_first_y,e2b_rep2_match_bus_file_last_y,e2b_rep2_idsearch_addr_y,e2b_rep2_match_bus_file_addr_y,e2b_rep2_consheader_addr_y,e2b_rep2_addr_on_file_y,e2b_rep2_bus_addr_own_cnt_y,e2b_rep2_match_bus_file_fein_y,e2b_rep2_busheader_ssn_y,e2b_rep2_consheader_ssn_y,e2b_rep2_idsearch_ssn_y,e2b_rep2_ssn_on_file_y,e2b_rep2_distance_addr_y,out_rep3_addr_suffix_y,bus2exec_rep3_desc_y,e2b_rep3_on_inquiry_y,e2b_rep3_match_bus_in_addr_y,pop_rep3_ssn_y,bus2exec_rep3_y,e2b_rep3_name_on_file_y,e2b_rep3_lexid_on_file_y,e2b_rep3_on_property_cnt_y,e2b_rep3_idsearch_name_y,e2b_rep3_paw_match_y,e2b_rep3_on_utility_y,e2b_rep3_match_bus_file_first_y,e2b_rep3_match_bus_file_last_y,e2b_rep3_match_bus_file_full_y,e2b_rep3_match_bus_file_addr_y,e2b_rep3_idsearch_addr_y,e2b_rep3_consheader_addr_y,e2b_rep3_addr_on_file_y,e2b_rep3_bus_addr_own_cnt_y,e2b_rep3_distance_addr_y,e2b_rep2_busheader_phone_y,e2b_rep2_phn_on_file_y,e2b_rep2_idsearch_phone_y,e2b_rep2_match_bus_file_phn_y,e2b_rep2_consheader_phone_y,e2b_rep3_phn_on_file_y,e2b_rep3_busheader_phone_y,e2b_rep3_idsearch_phone_y,e2b_rep3_match_bus_file_phn_y,e2b_rep3_consheader_phone_y,e2b_rep3_match_bus_file_fein_y,e2b_rep3_idsearch_ssn_y,e2b_rep3_consheader_ssn_y,e2b_rep3_ssn_on_file_y,e2b_rep3_busheader_ssn_y,bus2exec_rep2_title_y,bus2exec_rep3_title_y,in_rep2_dob_y,pop_rep2_dobyear_y,pop_rep2_dob_y,pop_rep2_dobday_y,in_rep3_dob_y,pop_rep3_dob_y,pop_rep3_dobyear_y,pop_rep3_dobday_y,in_rep2_middle_y,pop_rep2_middle_y,in_rep3_middle_y,pop_rep3_middle_y,out_rep2_predir_y,out_rep3_predir_y,e2b_rep2_distance_phone_y,e2b_rep3_distance_phone_y,out_rep2_sec_range_y,out_rep2_unit_desig_y,out_rep3_sec_range_y,out_rep3_unit_desig_y,out_rep2_postdir_y,pop_rep2_dobmonth_y,out_rep3_postdir_y,pop_rep3_dobmonth_y,e2b_rep1_consheader_addr_y,in_rep2_dlnumber_y,pop_rep2_dlnumber_y,pop_rep3_dlnumber_y,in_rep2_dlstate_y,pop_rep2_dlstate_y,in_rep3_dlnumber_y,pop_rep3_dlstate_y,in_rep3_dlstate_y,in_rep3_email_y,in_rep2_email_y,pop_rep3_age_y,pop_rep2_age_y,in_rep3_age_y,in_rep2_age_y,unique_id,group,in_rep2_first_flag,pop_rep2_last_flag,pop_rep2_first_flag,e2b_rep2_match_bus_in_first_flag,e2b_rep2_match_bus_in_last_flag,e2b_rep2_match_bus_in_full_flag,out_rep2_addr_status_flag,pop_rep2_zip_flag,pop_rep2_addr_flag,in_rep2_streetaddress1_flag,in_rep2_zip_flag,in_rep2_city_flag,pop_rep2_city_flag,out_rep2_state_flag,out_rep2_addr_type_flag,out_rep2_prim_name_flag,out_rep2_zip5_flag,out_rep2_lat_flag,out_rep2_long_flag,out_rep2_geo_block_flag,out_rep2_county_flag,out_rep2_prim_range_flag,pop_rep2_ssn_flag,lexid_rep2_flag,pop_rep2_state_flag,in_rep2_state_flag,out_rep2_zip4_flag,out_rep2_addr_suffix_flag,in_rep2_phone10_flag,pop_rep2_phone_flag,e2b_rep2_on_inquiry_flag,bus2exec_rep2_desc_flag,e2b_rep2_match_bus_in_addr_flag,in_rep3_first_flag,e2b_rep3_match_bus_in_last_flag,pop_rep3_first_flag,pop_rep3_last_flag,e2b_rep3_match_bus_in_first_flag,e2b_rep3_match_bus_in_full_flag,out_rep3_addr_status_flag,in_rep3_streetaddress1_flag,pop_rep3_addr_flag,in_rep3_zip_flag,pop_rep3_zip_flag,in_rep3_city_flag,pop_rep3_city_flag,pop_rep3_state_flag,in_rep3_state_flag,out_rep3_state_flag,out_rep3_prim_name_flag,out_rep3_addr_type_flag,out_rep3_zip5_flag,out_rep3_geo_block_flag,out_rep3_long_flag,out_rep3_lat_flag,out_rep3_county_flag,out_rep3_prim_range_flag,lexid_rep3_flag,in_rep3_phone10_flag,pop_rep3_phone_flag,bus2exec_rep2_flag,out_rep3_zip4_flag,e2b_rep2_idsearch_name_flag,e2b_rep2_lexid_on_file_flag,e2b_rep2_name_on_file_flag,e2b_rep2_on_property_cnt_flag,e2b_rep2_on_utility_flag,e2b_rep2_match_bus_file_full_flag,e2b_rep2_paw_match_flag,e2b_rep2_match_bus_file_first_flag,e2b_rep2_match_bus_file_last_flag,e2b_rep2_idsearch_addr_flag,e2b_rep2_match_bus_file_addr_flag,e2b_rep2_consheader_addr_flag,e2b_rep2_addr_on_file_flag,e2b_rep2_bus_addr_own_cnt_flag,e2b_rep2_match_bus_file_fein_flag,e2b_rep2_busheader_ssn_flag,e2b_rep2_consheader_ssn_flag,e2b_rep2_idsearch_ssn_flag,e2b_rep2_ssn_on_file_flag,e2b_rep2_distance_addr_flag,out_rep3_addr_suffix_flag,bus2exec_rep3_desc_flag,e2b_rep3_on_inquiry_flag,e2b_rep3_match_bus_in_addr_flag,pop_rep3_ssn_flag,bus2exec_rep3_flag,e2b_rep3_name_on_file_flag,e2b_rep3_lexid_on_file_flag,e2b_rep3_on_property_cnt_flag,e2b_rep3_idsearch_name_flag,e2b_rep3_paw_match_flag,e2b_rep3_on_utility_flag,e2b_rep3_match_bus_file_first_flag,e2b_rep3_match_bus_file_last_flag,e2b_rep3_match_bus_file_full_flag,e2b_rep3_match_bus_file_addr_flag,e2b_rep3_idsearch_addr_flag,e2b_rep3_consheader_addr_flag,e2b_rep3_addr_on_file_flag,e2b_rep3_bus_addr_own_cnt_flag,e2b_rep3_distance_addr_flag,e2b_rep2_busheader_phone_flag,e2b_rep2_phn_on_file_flag,e2b_rep2_idsearch_phone_flag,e2b_rep2_match_bus_file_phn_flag,e2b_rep2_consheader_phone_flag,e2b_rep3_phn_on_file_flag,e2b_rep3_busheader_phone_flag,e2b_rep3_idsearch_phone_flag,e2b_rep3_match_bus_file_phn_flag,e2b_rep3_consheader_phone_flag,e2b_rep3_match_bus_file_fein_flag,e2b_rep3_idsearch_ssn_flag,e2b_rep3_consheader_ssn_flag,e2b_rep3_ssn_on_file_flag,e2b_rep3_busheader_ssn_flag,bus2exec_rep2_title_flag,bus2exec_rep3_title_flag,in_rep2_dob_flag,pop_rep2_dobyear_flag,pop_rep2_dob_flag,pop_rep2_dobday_flag,in_rep3_dob_flag,pop_rep3_dob_flag,pop_rep3_dobyear_flag,pop_rep3_dobday_flag,in_rep2_middle_flag,pop_rep2_middle_flag,in_rep3_middle_flag,pop_rep3_middle_flag,out_rep2_predir_flag,out_rep3_predir_flag,e2b_rep2_distance_phone_flag,e2b_rep3_distance_phone_flag,out_rep2_sec_range_flag,out_rep2_unit_desig_flag,out_rep3_sec_range_flag,out_rep3_unit_desig_flag,out_rep2_postdir_flag,pop_rep2_dobmonth_flag,out_rep3_postdir_flag,pop_rep3_dobmonth_flag,e2b_rep1_consheader_addr_flag,in_rep2_dlnumber_flag,pop_rep2_dlnumber_flag,pop_rep3_dlnumber_flag,in_rep2_dlstate_flag,pop_rep2_dlstate_flag,in_rep3_dlnumber_flag,pop_rep3_dlstate_flag,in_rep3_dlstate_flag,in_rep3_email_flag,in_rep2_email_flag,pop_rep3_age_flag,pop_rep2_age_flag,in_rep3_age_flag,in_rep2_age_flag,sum_diff_rep2,sum_diff_rep3
635,9617,,0,0,-1,-1,-1,,0,0,,,,0,,,,,,,,,,0,,0,,,,,0,-1,,-1,,-1,0,0,-1,-1,,,0,,0,,0,0,,,,,,,,,,,,,0,0,,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,,,-1,-1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,,,,0,0,0,,0,0,0,,0,,0,,,-1,-1,,,,,,0,,0,-1,,0,0,,0,,0,,,,0,0,,,ANDREW,1,1,0,0,0,,0,0,,,,0,,,,,,,,,,0,,0,,,,,0,0,The input authorized rep cannot be linked to t...,-1,,-1,0,0,-1,-1,,,0,,0,,0,0,,,,,,,,,,,,,0,0,,0,0,0,0,0,0,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,,,-1,-1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,,,,0,0,0,,0,0,0,,0,,0,,,-1,-1,,,,,,0,,0,-1,,0,0,,0,,0,,,,0,0,,,9617,0rep,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0


In [13]:
temp_before['in_rep2_first_flag'] = np.where(temp_before.in_rep2_first_x != temp_before.in_rep2_first_y, 1, 0)
temp_before['pop_rep2_last_flag'] = np.where(temp_before.pop_rep2_last_x != temp_before.pop_rep2_last_y, 1, 0)
temp_before['pop_rep2_first_flag'] = np.where(temp_before.pop_rep2_first_x != temp_before.pop_rep2_first_y, 1, 0)
temp_before['e2b_rep2_match_bus_in_first_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_first_x != temp_before.e2b_rep2_match_bus_in_first_y, 1, 0)
temp_before['e2b_rep2_match_bus_in_last_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_last_x != temp_before.e2b_rep2_match_bus_in_last_y, 1, 0)
temp_before['e2b_rep2_match_bus_in_full_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_full_x != temp_before.e2b_rep2_match_bus_in_full_y, 1, 0)
temp_before['out_rep2_addr_status_flag'] = np.where(temp_before.out_rep2_addr_status_x != temp_before.out_rep2_addr_status_y, 1, 0)
temp_before['pop_rep2_zip_flag'] = np.where(temp_before.pop_rep2_zip_x != temp_before.pop_rep2_zip_y, 1, 0)
temp_before['pop_rep2_addr_flag'] = np.where(temp_before.pop_rep2_addr_x != temp_before.pop_rep2_addr_y, 1, 0)
temp_before['in_rep2_streetaddress1_flag'] = np.where(temp_before.in_rep2_streetaddress1_x != temp_before.in_rep2_streetaddress1_y, 1, 0)
temp_before['in_rep2_zip_flag'] = np.where(temp_before.in_rep2_zip_x != temp_before.in_rep2_zip_y, 1, 0)
temp_before['in_rep2_city_flag'] = np.where(temp_before.in_rep2_city_x != temp_before.in_rep2_city_y, 1, 0)
temp_before['pop_rep2_city_flag'] = np.where(temp_before.pop_rep2_city_x != temp_before.pop_rep2_city_y, 1, 0)
temp_before['out_rep2_state_flag'] = np.where(temp_before.out_rep2_state_x != temp_before.out_rep2_state_y, 1, 0)
temp_before['out_rep2_addr_type_flag'] = np.where(temp_before.out_rep2_addr_type_x != temp_before.out_rep2_addr_type_y, 1, 0)
temp_before['out_rep2_prim_name_flag'] = np.where(temp_before.out_rep2_prim_name_x != temp_before.out_rep2_prim_name_y, 1, 0)
temp_before['out_rep2_zip5_flag'] = np.where(temp_before.out_rep2_zip5_x != temp_before.out_rep2_zip5_y, 1, 0)
temp_before['out_rep2_lat_flag'] = np.where(temp_before.out_rep2_lat_x != temp_before.out_rep2_lat_y, 1, 0)
temp_before['out_rep2_long_flag'] = np.where(temp_before.out_rep2_long_x != temp_before.out_rep2_long_y, 1, 0)
temp_before['out_rep2_geo_block_flag'] = np.where(temp_before.out_rep2_geo_block_x != temp_before.out_rep2_geo_block_y, 1, 0)
temp_before['out_rep2_county_flag'] = np.where(temp_before.out_rep2_county_x != temp_before.out_rep2_county_y, 1, 0)
temp_before['out_rep2_prim_range_flag'] = np.where(temp_before.out_rep2_prim_range_x != temp_before.out_rep2_prim_range_y, 1, 0)
temp_before['pop_rep2_ssn_flag'] = np.where(temp_before.pop_rep2_ssn_x != temp_before.pop_rep2_ssn_y, 1, 0)
temp_before['lexid_rep2_flag'] = np.where(temp_before.lexid_rep2_x != temp_before.lexid_rep2_y, 1, 0)
temp_before['pop_rep2_state_flag'] = np.where(temp_before.pop_rep2_state_x != temp_before.pop_rep2_state_y, 1, 0)
temp_before['in_rep2_state_flag'] = np.where(temp_before.in_rep2_state_x != temp_before.in_rep2_state_y, 1, 0)
temp_before['out_rep2_zip4_flag'] = np.where(temp_before.out_rep2_zip4_x != temp_before.out_rep2_zip4_y, 1, 0)
temp_before['out_rep2_addr_suffix_flag'] = np.where(temp_before.out_rep2_addr_suffix_x != temp_before.out_rep2_addr_suffix_y, 1, 0)
temp_before['in_rep2_phone10_flag'] = np.where(temp_before.in_rep2_phone10_x != temp_before.in_rep2_phone10_y, 1, 0)
temp_before['pop_rep2_phone_flag'] = np.where(temp_before.pop_rep2_phone_x != temp_before.pop_rep2_phone_y, 1, 0)
temp_before['e2b_rep2_on_inquiry_flag'] = np.where(temp_before.e2b_rep2_on_inquiry_x != temp_before.e2b_rep2_on_inquiry_y, 1, 0)
temp_before['bus2exec_rep2_desc_flag'] = np.where(temp_before.bus2exec_rep2_desc_x != temp_before.bus2exec_rep2_desc_y, 1, 0)
temp_before['e2b_rep2_match_bus_in_addr_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_addr_x != temp_before.e2b_rep2_match_bus_in_addr_y, 1, 0)
temp_before['in_rep3_first_flag'] = np.where(temp_before.in_rep3_first_x != temp_before.in_rep3_first_y, 1, 0)
temp_before['e2b_rep3_match_bus_in_last_flag'] = np.where(temp_before.e2b_rep3_match_bus_in_last_x != temp_before.e2b_rep3_match_bus_in_last_y, 1, 0)
temp_before['pop_rep3_first_flag'] = np.where(temp_before.pop_rep3_first_x != temp_before.pop_rep3_first_y, 1, 0)
temp_before['pop_rep3_last_flag'] = np.where(temp_before.pop_rep3_last_x != temp_before.pop_rep3_last_y, 1, 0)
temp_before['e2b_rep3_match_bus_in_first_flag'] = np.where(temp_before.e2b_rep3_match_bus_in_first_x != temp_before.e2b_rep3_match_bus_in_first_y, 1, 0)
temp_before['e2b_rep3_match_bus_in_full_flag'] = np.where(temp_before.e2b_rep3_match_bus_in_full_x != temp_before.e2b_rep3_match_bus_in_full_y, 1, 0)
temp_before['out_rep3_addr_status_flag'] = np.where(temp_before.out_rep3_addr_status_x != temp_before.out_rep3_addr_status_y, 1, 0)
temp_before['in_rep3_streetaddress1_flag'] = np.where(temp_before.in_rep3_streetaddress1_x != temp_before.in_rep3_streetaddress1_y, 1, 0)
temp_before['pop_rep3_addr_flag'] = np.where(temp_before.pop_rep3_addr_x != temp_before.pop_rep3_addr_y, 1, 0)
temp_before['in_rep3_zip_flag'] = np.where(temp_before.in_rep3_zip_x != temp_before.in_rep3_zip_y, 1, 0)
temp_before['pop_rep3_zip_flag'] = np.where(temp_before.pop_rep3_zip_x != temp_before.pop_rep3_zip_y, 1, 0)
temp_before['in_rep3_city_flag'] = np.where(temp_before.in_rep3_city_x != temp_before.in_rep3_city_y, 1, 0)
temp_before['pop_rep3_city_flag'] = np.where(temp_before.pop_rep3_city_x != temp_before.pop_rep3_city_y, 1, 0)
temp_before['pop_rep3_state_flag'] = np.where(temp_before.pop_rep3_state_x != temp_before.pop_rep3_state_y, 1, 0)
temp_before['in_rep3_state_flag'] = np.where(temp_before.in_rep3_state_x != temp_before.in_rep3_state_y, 1, 0)
temp_before['out_rep3_state_flag'] = np.where(temp_before.out_rep3_state_x != temp_before.out_rep3_state_y, 1, 0)
temp_before['out_rep3_prim_name_flag'] = np.where(temp_before.out_rep3_prim_name_x != temp_before.out_rep3_prim_name_y, 1, 0)
temp_before['out_rep3_addr_type_flag'] = np.where(temp_before.out_rep3_addr_type_x != temp_before.out_rep3_addr_type_y, 1, 0)
temp_before['out_rep3_zip5_flag'] = np.where(temp_before.out_rep3_zip5_x != temp_before.out_rep3_zip5_y, 1, 0)
temp_before['out_rep3_geo_block_flag'] = np.where(temp_before.out_rep3_geo_block_x != temp_before.out_rep3_geo_block_y, 1, 0)
temp_before['out_rep3_long_flag'] = np.where(temp_before.out_rep3_long_x != temp_before.out_rep3_long_y, 1, 0)
temp_before['out_rep3_lat_flag'] = np.where(temp_before.out_rep3_lat_x != temp_before.out_rep3_lat_y, 1, 0)
temp_before['out_rep3_county_flag'] = np.where(temp_before.out_rep3_county_x != temp_before.out_rep3_county_y, 1, 0)
temp_before['out_rep3_prim_range_flag'] = np.where(temp_before.out_rep3_prim_range_x != temp_before.out_rep3_prim_range_y, 1, 0)
temp_before['lexid_rep3_flag'] = np.where(temp_before.lexid_rep3_x != temp_before.lexid_rep3_y, 1, 0)
temp_before['in_rep3_phone10_flag'] = np.where(temp_before.in_rep3_phone10_x != temp_before.in_rep3_phone10_y, 1, 0)
temp_before['pop_rep3_phone_flag'] = np.where(temp_before.pop_rep3_phone_x != temp_before.pop_rep3_phone_y, 1, 0)
temp_before['bus2exec_rep2_flag'] = np.where(temp_before.bus2exec_rep2_x != temp_before.bus2exec_rep2_y, 1, 0)
temp_before['out_rep3_zip4_flag'] = np.where(temp_before.out_rep3_zip4_x != temp_before.out_rep3_zip4_y, 1, 0)
temp_before['e2b_rep2_idsearch_name_flag'] = np.where(temp_before.e2b_rep2_idsearch_name_x != temp_before.e2b_rep2_idsearch_name_y, 1, 0)
temp_before['e2b_rep2_lexid_on_file_flag'] = np.where(temp_before.e2b_rep2_lexid_on_file_x != temp_before.e2b_rep2_lexid_on_file_y, 1, 0)
temp_before['e2b_rep2_name_on_file_flag'] = np.where(temp_before.e2b_rep2_name_on_file_x != temp_before.e2b_rep2_name_on_file_y, 1, 0)
temp_before['e2b_rep2_on_property_cnt_flag'] = np.where(temp_before.e2b_rep2_on_property_cnt_x != temp_before.e2b_rep2_on_property_cnt_y, 1, 0)
temp_before['e2b_rep2_on_utility_flag'] = np.where(temp_before.e2b_rep2_on_utility_x != temp_before.e2b_rep2_on_utility_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_full_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_full_x != temp_before.e2b_rep2_match_bus_file_full_y, 1, 0)
temp_before['e2b_rep2_paw_match_flag'] = np.where(temp_before.e2b_rep2_paw_match_x != temp_before.e2b_rep2_paw_match_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_first_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_first_x != temp_before.e2b_rep2_match_bus_file_first_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_last_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_last_x != temp_before.e2b_rep2_match_bus_file_last_y, 1, 0)
temp_before['e2b_rep2_idsearch_addr_flag'] = np.where(temp_before.e2b_rep2_idsearch_addr_x != temp_before.e2b_rep2_idsearch_addr_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_addr_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_addr_x != temp_before.e2b_rep2_match_bus_file_addr_y, 1, 0)
temp_before['e2b_rep2_consheader_addr_flag'] = np.where(temp_before.e2b_rep2_consheader_addr_x != temp_before.e2b_rep2_consheader_addr_y, 1, 0)
temp_before['e2b_rep2_addr_on_file_flag'] = np.where(temp_before.e2b_rep2_addr_on_file_x != temp_before.e2b_rep2_addr_on_file_y, 1, 0)
temp_before['e2b_rep2_bus_addr_own_cnt_flag'] = np.where(temp_before.e2b_rep2_bus_addr_own_cnt_x != temp_before.e2b_rep2_bus_addr_own_cnt_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_fein_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_fein_x != temp_before.e2b_rep2_match_bus_file_fein_y, 1, 0)
temp_before['e2b_rep2_busheader_ssn_flag'] = np.where(temp_before.e2b_rep2_busheader_ssn_x != temp_before.e2b_rep2_busheader_ssn_y, 1, 0)
temp_before['e2b_rep2_consheader_ssn_flag'] = np.where(temp_before.e2b_rep2_consheader_ssn_x != temp_before.e2b_rep2_consheader_ssn_y, 1, 0)
temp_before['e2b_rep2_idsearch_ssn_flag'] = np.where(temp_before.e2b_rep2_idsearch_ssn_x != temp_before.e2b_rep2_idsearch_ssn_y, 1, 0)
temp_before['e2b_rep2_ssn_on_file_flag'] = np.where(temp_before.e2b_rep2_ssn_on_file_x != temp_before.e2b_rep2_ssn_on_file_y, 1, 0)
temp_before['e2b_rep2_distance_addr_flag'] = np.where(temp_before.e2b_rep2_distance_addr_x != temp_before.e2b_rep2_distance_addr_y, 1, 0)
temp_before['out_rep3_addr_suffix_flag'] = np.where(temp_before.out_rep3_addr_suffix_x != temp_before.out_rep3_addr_suffix_y, 1, 0)
temp_before['bus2exec_rep3_desc_flag'] = np.where(temp_before.bus2exec_rep3_desc_x != temp_before.bus2exec_rep3_desc_y, 1, 0)
temp_before['e2b_rep3_on_inquiry_flag'] = np.where(temp_before.e2b_rep3_on_inquiry_x != temp_before.e2b_rep3_on_inquiry_y, 1, 0)
temp_before['e2b_rep3_match_bus_in_addr_flag'] = np.where(temp_before.e2b_rep3_match_bus_in_addr_x != temp_before.e2b_rep3_match_bus_in_addr_y, 1, 0)
temp_before['pop_rep3_ssn_flag'] = np.where(temp_before.pop_rep3_ssn_x != temp_before.pop_rep3_ssn_y, 1, 0)
temp_before['bus2exec_rep3_flag'] = np.where(temp_before.bus2exec_rep3_x != temp_before.bus2exec_rep3_y, 1, 0)
temp_before['e2b_rep3_name_on_file_flag'] = np.where(temp_before.e2b_rep3_name_on_file_x != temp_before.e2b_rep3_name_on_file_y, 1, 0)
temp_before['e2b_rep3_lexid_on_file_flag'] = np.where(temp_before.e2b_rep3_lexid_on_file_x != temp_before.e2b_rep3_lexid_on_file_y, 1, 0)
temp_before['e2b_rep3_on_property_cnt_flag'] = np.where(temp_before.e2b_rep3_on_property_cnt_x != temp_before.e2b_rep3_on_property_cnt_y, 1, 0)
temp_before['e2b_rep3_idsearch_name_flag'] = np.where(temp_before.e2b_rep3_idsearch_name_x != temp_before.e2b_rep3_idsearch_name_y, 1, 0)
temp_before['e2b_rep3_paw_match_flag'] = np.where(temp_before.e2b_rep3_paw_match_x != temp_before.e2b_rep3_paw_match_y, 1, 0)
temp_before['e2b_rep3_on_utility_flag'] = np.where(temp_before.e2b_rep3_on_utility_x != temp_before.e2b_rep3_on_utility_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_first_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_first_x != temp_before.e2b_rep3_match_bus_file_first_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_last_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_last_x != temp_before.e2b_rep3_match_bus_file_last_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_full_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_full_x != temp_before.e2b_rep3_match_bus_file_full_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_addr_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_addr_x != temp_before.e2b_rep3_match_bus_file_addr_y, 1, 0)
temp_before['e2b_rep3_idsearch_addr_flag'] = np.where(temp_before.e2b_rep3_idsearch_addr_x != temp_before.e2b_rep3_idsearch_addr_y, 1, 0)
temp_before['e2b_rep3_consheader_addr_flag'] = np.where(temp_before.e2b_rep3_consheader_addr_x != temp_before.e2b_rep3_consheader_addr_y, 1, 0)
temp_before['e2b_rep3_addr_on_file_flag'] = np.where(temp_before.e2b_rep3_addr_on_file_x != temp_before.e2b_rep3_addr_on_file_y, 1, 0)
temp_before['e2b_rep3_bus_addr_own_cnt_flag'] = np.where(temp_before.e2b_rep3_bus_addr_own_cnt_x != temp_before.e2b_rep3_bus_addr_own_cnt_y, 1, 0)
temp_before['e2b_rep3_distance_addr_flag'] = np.where(temp_before.e2b_rep3_distance_addr_x != temp_before.e2b_rep3_distance_addr_y, 1, 0)
temp_before['e2b_rep2_busheader_phone_flag'] = np.where(temp_before.e2b_rep2_busheader_phone_x != temp_before.e2b_rep2_busheader_phone_y, 1, 0)
temp_before['e2b_rep2_phn_on_file_flag'] = np.where(temp_before.e2b_rep2_phn_on_file_x != temp_before.e2b_rep2_phn_on_file_y, 1, 0)
temp_before['e2b_rep2_idsearch_phone_flag'] = np.where(temp_before.e2b_rep2_idsearch_phone_x != temp_before.e2b_rep2_idsearch_phone_y, 1, 0)
temp_before['e2b_rep2_match_bus_file_phn_flag'] = np.where(temp_before.e2b_rep2_match_bus_file_phn_x != temp_before.e2b_rep2_match_bus_file_phn_y, 1, 0)
temp_before['e2b_rep2_consheader_phone_flag'] = np.where(temp_before.e2b_rep2_consheader_phone_x != temp_before.e2b_rep2_consheader_phone_y, 1, 0)
temp_before['e2b_rep3_phn_on_file_flag'] = np.where(temp_before.e2b_rep3_phn_on_file_x != temp_before.e2b_rep3_phn_on_file_y, 1, 0)
temp_before['e2b_rep3_busheader_phone_flag'] = np.where(temp_before.e2b_rep3_busheader_phone_x != temp_before.e2b_rep3_busheader_phone_y, 1, 0)
temp_before['e2b_rep3_idsearch_phone_flag'] = np.where(temp_before.e2b_rep3_idsearch_phone_x != temp_before.e2b_rep3_idsearch_phone_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_phn_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_phn_x != temp_before.e2b_rep3_match_bus_file_phn_y, 1, 0)
temp_before['e2b_rep3_consheader_phone_flag'] = np.where(temp_before.e2b_rep3_consheader_phone_x != temp_before.e2b_rep3_consheader_phone_y, 1, 0)
temp_before['e2b_rep3_match_bus_file_fein_flag'] = np.where(temp_before.e2b_rep3_match_bus_file_fein_x != temp_before.e2b_rep3_match_bus_file_fein_y, 1, 0)
temp_before['e2b_rep3_idsearch_ssn_flag'] = np.where(temp_before.e2b_rep3_idsearch_ssn_x != temp_before.e2b_rep3_idsearch_ssn_y, 1, 0)
temp_before['e2b_rep3_consheader_ssn_flag'] = np.where(temp_before.e2b_rep3_consheader_ssn_x != temp_before.e2b_rep3_consheader_ssn_y, 1, 0)
temp_before['e2b_rep3_ssn_on_file_flag'] = np.where(temp_before.e2b_rep3_ssn_on_file_x != temp_before.e2b_rep3_ssn_on_file_y, 1, 0)
temp_before['e2b_rep3_busheader_ssn_flag'] = np.where(temp_before.e2b_rep3_busheader_ssn_x != temp_before.e2b_rep3_busheader_ssn_y, 1, 0)
temp_before['bus2exec_rep2_title_flag'] = np.where(temp_before.bus2exec_rep2_title_x != temp_before.bus2exec_rep2_title_y, 1, 0)
temp_before['bus2exec_rep3_title_flag'] = np.where(temp_before.bus2exec_rep3_title_x != temp_before.bus2exec_rep3_title_y, 1, 0)
temp_before['in_rep2_dob_flag'] = np.where(temp_before.in_rep2_dob_x != temp_before.in_rep2_dob_y, 1, 0)
temp_before['pop_rep2_dobyear_flag'] = np.where(temp_before.pop_rep2_dobyear_x != temp_before.pop_rep2_dobyear_y, 1, 0)
temp_before['pop_rep2_dob_flag'] = np.where(temp_before.pop_rep2_dob_x != temp_before.pop_rep2_dob_y, 1, 0)
temp_before['pop_rep2_dobday_flag'] = np.where(temp_before.pop_rep2_dobday_x != temp_before.pop_rep2_dobday_y, 1, 0)
temp_before['in_rep3_dob_flag'] = np.where(temp_before.in_rep3_dob_x != temp_before.in_rep3_dob_y, 1, 0)
temp_before['pop_rep3_dob_flag'] = np.where(temp_before.pop_rep3_dob_x != temp_before.pop_rep3_dob_y, 1, 0)
temp_before['pop_rep3_dobyear_flag'] = np.where(temp_before.pop_rep3_dobyear_x != temp_before.pop_rep3_dobyear_y, 1, 0)
temp_before['pop_rep3_dobday_flag'] = np.where(temp_before.pop_rep3_dobday_x != temp_before.pop_rep3_dobday_y, 1, 0)
temp_before['in_rep2_middle_flag'] = np.where(temp_before.in_rep2_middle_x != temp_before.in_rep2_middle_y, 1, 0)
temp_before['pop_rep2_middle_flag'] = np.where(temp_before.pop_rep2_middle_x != temp_before.pop_rep2_middle_y, 1, 0)
temp_before['in_rep3_middle_flag'] = np.where(temp_before.in_rep3_middle_x != temp_before.in_rep3_middle_y, 1, 0)
temp_before['pop_rep3_middle_flag'] = np.where(temp_before.pop_rep3_middle_x != temp_before.pop_rep3_middle_y, 1, 0)
temp_before['out_rep2_predir_flag'] = np.where(temp_before.out_rep2_predir_x != temp_before.out_rep2_predir_y, 1, 0)
temp_before['out_rep3_predir_flag'] = np.where(temp_before.out_rep3_predir_x != temp_before.out_rep3_predir_y, 1, 0)
temp_before['e2b_rep2_distance_phone_flag'] = np.where(temp_before.e2b_rep2_distance_phone_x != temp_before.e2b_rep2_distance_phone_y, 1, 0)
temp_before['e2b_rep3_distance_phone_flag'] = np.where(temp_before.e2b_rep3_distance_phone_x != temp_before.e2b_rep3_distance_phone_y, 1, 0)
temp_before['out_rep2_sec_range_flag'] = np.where(temp_before.out_rep2_sec_range_x != temp_before.out_rep2_sec_range_y, 1, 0)
temp_before['out_rep2_unit_desig_flag'] = np.where(temp_before.out_rep2_unit_desig_x != temp_before.out_rep2_unit_desig_y, 1, 0)
temp_before['out_rep3_sec_range_flag'] = np.where(temp_before.out_rep3_sec_range_x != temp_before.out_rep3_sec_range_y, 1, 0)
temp_before['out_rep3_unit_desig_flag'] = np.where(temp_before.out_rep3_unit_desig_x != temp_before.out_rep3_unit_desig_y, 1, 0)
temp_before['out_rep2_postdir_flag'] = np.where(temp_before.out_rep2_postdir_x != temp_before.out_rep2_postdir_y, 1, 0)
temp_before['pop_rep2_dobmonth_flag'] = np.where(temp_before.pop_rep2_dobmonth_x != temp_before.pop_rep2_dobmonth_y, 1, 0)
temp_before['out_rep3_postdir_flag'] = np.where(temp_before.out_rep3_postdir_x != temp_before.out_rep3_postdir_y, 1, 0)
temp_before['pop_rep3_dobmonth_flag'] = np.where(temp_before.pop_rep3_dobmonth_x != temp_before.pop_rep3_dobmonth_y, 1, 0)
temp_before['e2b_rep1_consheader_addr_flag'] = np.where(temp_before.e2b_rep1_consheader_addr_x != temp_before.e2b_rep1_consheader_addr_y, 1, 0)
temp_before['in_rep2_dlnumber_flag'] = np.where(temp_before.in_rep2_dlnumber_x != temp_before.in_rep2_dlnumber_y, 1, 0)
temp_before['pop_rep2_dlnumber_flag'] = np.where(temp_before.pop_rep2_dlnumber_x != temp_before.pop_rep2_dlnumber_y, 1, 0)
temp_before['pop_rep3_dlnumber_flag'] = np.where(temp_before.pop_rep3_dlnumber_x != temp_before.pop_rep3_dlnumber_y, 1, 0)
temp_before['in_rep2_dlstate_flag'] = np.where(temp_before.in_rep2_dlstate_x != temp_before.in_rep2_dlstate_y, 1, 0)
temp_before['pop_rep2_dlstate_flag'] = np.where(temp_before.pop_rep2_dlstate_x != temp_before.pop_rep2_dlstate_y, 1, 0)
temp_before['in_rep3_dlnumber_flag'] = np.where(temp_before.in_rep3_dlnumber_x != temp_before.in_rep3_dlnumber_y, 1, 0)
temp_before['pop_rep3_dlstate_flag'] = np.where(temp_before.pop_rep3_dlstate_x != temp_before.pop_rep3_dlstate_y, 1, 0)
temp_before['in_rep3_dlstate_flag'] = np.where(temp_before.in_rep3_dlstate_x != temp_before.in_rep3_dlstate_y, 1, 0)
temp_before['in_rep3_email_flag'] = np.where(temp_before.in_rep3_email_x != temp_before.in_rep3_email_y, 1, 0)
temp_before['in_rep2_email_flag'] = np.where(temp_before.in_rep2_email_x != temp_before.in_rep2_email_y, 1, 0)
temp_before['pop_rep3_age_flag'] = np.where(temp_before.pop_rep3_age_x != temp_before.pop_rep3_age_y, 1, 0)
temp_before['pop_rep2_age_flag'] = np.where(temp_before.pop_rep2_age_x != temp_before.pop_rep2_age_y, 1, 0)
temp_before['in_rep3_age_flag'] = np.where(temp_before.in_rep3_age_x != temp_before.in_rep3_age_y, 1, 0)
temp_before['in_rep2_age_flag'] = np.where(temp_before.in_rep2_age_x != temp_before.in_rep2_age_y, 1, 0)

  temp_before['in_rep2_first_flag'] = np.where(temp_before.in_rep2_first_x != temp_before.in_rep2_first_y, 1, 0)
  temp_before['pop_rep2_last_flag'] = np.where(temp_before.pop_rep2_last_x != temp_before.pop_rep2_last_y, 1, 0)
  temp_before['pop_rep2_first_flag'] = np.where(temp_before.pop_rep2_first_x != temp_before.pop_rep2_first_y, 1, 0)
  temp_before['e2b_rep2_match_bus_in_first_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_first_x != temp_before.e2b_rep2_match_bus_in_first_y, 1, 0)
  temp_before['e2b_rep2_match_bus_in_last_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_last_x != temp_before.e2b_rep2_match_bus_in_last_y, 1, 0)
  temp_before['e2b_rep2_match_bus_in_full_flag'] = np.where(temp_before.e2b_rep2_match_bus_in_full_x != temp_before.e2b_rep2_match_bus_in_full_y, 1, 0)
  temp_before['out_rep2_addr_status_flag'] = np.where(temp_before.out_rep2_addr_status_x != temp_before.out_rep2_addr_status_y, 1, 0)
  temp_before['pop_rep2_zip_flag'] = np.where(temp_before.pop_rep

In [8]:
field = ["account", 'in_rep2_first',
'pop_rep2_last',
'pop_rep2_first',
'e2b_rep2_match_bus_in_first',
'e2b_rep2_match_bus_in_last',
'e2b_rep2_match_bus_in_full',
'out_rep2_addr_status',
'pop_rep2_zip',
'pop_rep2_addr',
'in_rep2_streetaddress1',
'in_rep2_zip',
'in_rep2_city',
'pop_rep2_city',
'out_rep2_state',
'out_rep2_addr_type',
'out_rep2_prim_name',
'out_rep2_zip5',
'out_rep2_lat',
'out_rep2_long',
'out_rep2_geo_block',
'out_rep2_county',
'out_rep2_prim_range',
'pop_rep2_ssn',
'lexid_rep2',
'pop_rep2_state',
'in_rep2_state',
'out_rep2_zip4',
'out_rep2_addr_suffix',
'in_rep2_phone10',
'pop_rep2_phone',
'e2b_rep2_on_inquiry',
'bus2exec_rep2_desc',
'e2b_rep2_match_bus_in_addr',
'in_rep3_first',
'e2b_rep3_match_bus_in_last',
'pop_rep3_first',
'pop_rep3_last',
'e2b_rep3_match_bus_in_first',
'e2b_rep3_match_bus_in_full',
'out_rep3_addr_status',
'in_rep3_streetaddress1',
'pop_rep3_addr',
'in_rep3_zip',
'pop_rep3_zip',
'in_rep3_city',
'pop_rep3_city',
'pop_rep3_state',
'in_rep3_state',
'out_rep3_state',
'out_rep3_prim_name',
'out_rep3_addr_type',
'out_rep3_zip5',
'out_rep3_geo_block',
'out_rep3_long',
'out_rep3_lat',
'out_rep3_county',
'out_rep3_prim_range',
'lexid_rep3',
'in_rep3_phone10',
'pop_rep3_phone',
'bus2exec_rep2',
'out_rep3_zip4',
'e2b_rep2_idsearch_name',
'e2b_rep2_lexid_on_file',
'e2b_rep2_name_on_file',
'e2b_rep2_on_property_cnt',
'e2b_rep2_on_utility',
'e2b_rep2_match_bus_file_full',
'e2b_rep2_paw_match',
'e2b_rep2_match_bus_file_first',
'e2b_rep2_match_bus_file_last',
'e2b_rep2_idsearch_addr',
'e2b_rep2_match_bus_file_addr',
'e2b_rep2_consheader_addr',
'e2b_rep2_addr_on_file',
'e2b_rep2_bus_addr_own_cnt',
'e2b_rep2_match_bus_file_fein',
'e2b_rep2_busheader_ssn',
'e2b_rep2_consheader_ssn',
'e2b_rep2_idsearch_ssn',
'e2b_rep2_ssn_on_file',
'e2b_rep2_distance_addr',
'out_rep3_addr_suffix',
'bus2exec_rep3_desc',
'e2b_rep3_on_inquiry',
'e2b_rep3_match_bus_in_addr',
'pop_rep3_ssn',
'bus2exec_rep3',
'e2b_rep3_name_on_file',
'e2b_rep3_lexid_on_file',
'e2b_rep3_on_property_cnt',
'e2b_rep3_idsearch_name',
'e2b_rep3_paw_match',
'e2b_rep3_on_utility',
'e2b_rep3_match_bus_file_first',
'e2b_rep3_match_bus_file_last',
'e2b_rep3_match_bus_file_full',
'e2b_rep3_match_bus_file_addr',
'e2b_rep3_idsearch_addr',
'e2b_rep3_consheader_addr',
'e2b_rep3_addr_on_file',
'e2b_rep3_bus_addr_own_cnt',
'e2b_rep3_distance_addr',
'e2b_rep2_busheader_phone',
'e2b_rep2_phn_on_file',
'e2b_rep2_idsearch_phone',
'e2b_rep2_match_bus_file_phn',
'e2b_rep2_consheader_phone',
'e2b_rep3_phn_on_file',
'e2b_rep3_busheader_phone',
'e2b_rep3_idsearch_phone',
'e2b_rep3_match_bus_file_phn',
'e2b_rep3_consheader_phone',
'e2b_rep3_match_bus_file_fein',
'e2b_rep3_idsearch_ssn',
'e2b_rep3_consheader_ssn',
'e2b_rep3_ssn_on_file',
'e2b_rep3_busheader_ssn',
'bus2exec_rep2_title',
'bus2exec_rep3_title',
'in_rep2_dob',
'pop_rep2_dobyear',
'pop_rep2_dob',
'pop_rep2_dobday',
'in_rep3_dob',
'pop_rep3_dob',
'pop_rep3_dobyear',
'pop_rep3_dobday',
'in_rep2_middle',
'pop_rep2_middle',
'in_rep3_middle',
'pop_rep3_middle',
'out_rep2_predir',
'out_rep3_predir',
'e2b_rep2_distance_phone',
'e2b_rep3_distance_phone',
'out_rep2_sec_range',
'out_rep2_unit_desig',
'out_rep3_sec_range',
'out_rep3_unit_desig',
'out_rep2_postdir',
'pop_rep2_dobmonth',
'out_rep3_postdir',
'pop_rep3_dobmonth',
'e2b_rep1_consheader_addr',
'in_rep2_dlnumber',
'pop_rep2_dlnumber',
'pop_rep3_dlnumber',
'in_rep2_dlstate',
'pop_rep2_dlstate',
'in_rep3_dlnumber',
'pop_rep3_dlstate',
'in_rep3_dlstate',
'in_rep3_email',
'in_rep2_email',
'pop_rep3_age',
'pop_rep2_age',
'in_rep3_age',
'in_rep2_age',
]