In [None]:
# import libraries
import pandas as pd
pd.set_option('display.max_columns', None)
import datetime as dt
import warnings
warnings.simplefilter("ignore")

In [None]:
today = dt.datetime.now()
year = today.year

In [None]:
# get file paths
party2port = input('please input file location of report 4-2A: ') # ../data_and_output/110817/R4_2_A_Report.xlsx
party2party = input('please input file location of report 4-2B: ') # ../data_and_output/110817/R4_2_B_Report.xlsx
prev = input('please input file location of previous quarter\'s report: ') # #../data_and_output/4-2 related_master_q3.xlsx

In [None]:
# read data
df_party2party = pd.read_excel(party2port) # report 4-2a
df_party2port = pd.read_excel(party2party) # report 4-2b
df_prev = pd.read_excel(prev) # previous report

In [None]:
# merge 4-2a and 4-2b to identify related parties that are tagged to direct parties with active portfolios
# 4-2a: direct party -> related party
# 4-2b: active portfolio -> direct party
# link: active portfolio -> (direct party) -> related party
# columns: 4-2b.portfolio_number -> 4-2b.party_id <-> 4-2a.party_id1 -> 4-2a.party_id2

df_curr = df_party2port.merge(df_party2party,
                              how='inner',
                              left_on='Party ID', right_on='Party ID 1',
                              suffixes=('_4-2b', '_4-2a'))

In [None]:
# rename columns
df_curr = df_curr.rename(columns={'Party ID': 'Direct Party ID',
                                  'Party Status (CRM)': 'Direct Party Status (CRM)',
                                  'Is Public_4-2a': 'Direct Party Is Public',
                                  'Relationship Type_4-2a': 'Relationship Type to Direct Party',
                                  'Party ID 2': 'Related Party ID',
                                  'Party Name_4-2a': 'Related Party Name',
                                  'Party Type_4-2a': 'Related Party Type',
                                  'Date of Incorporation_4-2a': 'Related Party Date of Incorporation',
                                  'Date of Birth_4-2a': 'Related Party Date of Birth',
                                  'Party Status (CRM).1': 'Related Party Status (CRM)',
                                  'EAM/EIA/Finder': 'Related Party Agent Type'})

In [None]:
# columns to drop 
df_curr = df_curr.drop(columns=['Portfolio Name',
                                'Portfolio Number',
                                'Current Status',
                                'Portfolio Creation Date',
                                'Relationship Type_4-2b',
                                'Relationship Sub-Type',
                                'RM Name',
                                'RM Location Name',
                                'RM Team Name',
                                'Booking Center Officer Name',
                                'Managed By EAM / EIA / Finder',
                                'EAM / EIA Name',
                                'Finder Name',
                                'Relationship (Party Role)',
                                'Party Name_4-2b',
                                'Party Type_4-2b',
                                'Is Public_4-2b',
                                'Date of Birth_4-2b',
                                'Date of Incorporation_4-2b',
                                'Party ID 1'])

In [None]:
df_curr['Batch'] = input('input batch (eg. 2023 Q4): ')

In [None]:
df_curr[['Remarks','Action Required','Action Team','Status of Cleanup','Completion Date','Valid Exception']] = ""

In [None]:
df_curr['Related Party Date of Birth']  = pd.to_datetime(df_curr['Related Party Date of Birth'], format='%Y-%m-%d', errors='coerce').dt.date
df_curr['Related Party Date of Incorporation']  = pd.to_datetime(df_curr['Related Party Date of Incorporation'], format='%Y-%m-%d', errors='coerce').dt.date

df_curr['dob_year'] = df_curr['Related Party Date of Birth'].apply(lambda x: x.year)
df_curr['doi_year'] = df_curr['Related Party Date of Incorporation'].apply(lambda x: x.year)

In [None]:
df_prev['Related Party Date of Birth']  = pd.to_datetime(df_prev['Related Party Date of Birth'], format='%Y-%m-%d', errors='coerce').dt.date
df_prev['Related Party Date of Incorporation']  = pd.to_datetime(df_prev['Related Party Date of Incorporation'], format='%Y-%m-%d', errors='coerce').dt.date

df_prev['dob_year'] = df_prev['Related Party Date of Birth'].apply(lambda x: x.year)
df_prev['doi_year'] = df_prev['Related Party Date of Incorporation'].apply(lambda x: x.year)

In [None]:
df_curr['key'] = df_curr['Direct Party ID'].astype('str') + df_curr['Related Party ID'].astype('str') + df_curr['Relationship Type to Direct Party'].astype('str')
df_prev['key'] = df_prev['Direct Party ID'].astype('str') + df_prev['Related Party ID'].astype('str') + df_prev['Relationship Type to Direct Party'].astype('str')

In [None]:
# filters curr
df_curr = df_curr[df_curr['Is Relationship Active']=='Yes']

relationship_list = ['Company (of Director)', 'Director','Company (of Shareholder)', 'Shareholder',
                     'Foundation | Trust (of Protector)', 'Protector','Company (of Signatory)', 'Signatory',
                     'Trust (of Trustee)', 'Trustee']
df_curr = df_curr[df_curr['Relationship Type to Direct Party'].isin(relationship_list)]

In [None]:
# filters prev
df_prev = df_prev[df_prev['Is Relationship Active']=='Yes']

relationship_list = ['Company (of Director)', 'Director','Company (of Shareholder)', 'Shareholder',
                     'Foundation | Trust (of Protector)', 'Protector','Company (of Signatory)', 'Signatory',
                     'Trust (of Trustee)', 'Trustee']
df_prev = df_prev[df_prev['Relationship Type to Direct Party'].isin(relationship_list)]

In [None]:
#filter dob/doi
df_curr = df_curr[(year-df_curr['dob_year']>100) | (year-df_curr['doi_year']>100)]
df_prev = df_prev[(year-df_prev['dob_year']>100) | (year-df_prev['doi_year']>100)]

In [None]:
df_curr = df_curr.drop_duplicates()
df_prev = df_prev.drop_duplicates()

In [None]:
# at this point we have finished preparing the current report
# now merge with the records in the previous report

In [None]:
# identify parties that appeared in previous report
prev_parties = df_prev['key'].tolist()

In [None]:
# identify parties that appear in current report
curr_parties = df_curr['key'].tolist()

In [None]:
# list of parties that appear in both previous and current report (unresolved or exceptions)
both_parties = set(prev_parties).intersection(set(curr_parties))

In [None]:
# list of new parties that appear only in the current report
new_parties = set(curr_parties).difference(set(prev_parties))

In [None]:
# total unresolved = old unresolved (both_parties) and new unresolved (new_parties)
df_curr = df_curr[(df_curr['key'].isin(both_parties)) | (df_curr['key'].isin(new_parties))]

In [None]:
df_curr['key'].head()

In [None]:
cols = ['Batch', 'Remarks','Action Required','Action Team','Status of Cleanup','Completion Date','Valid Exception']
for col in cols:
    for key in both_parties:
        df_curr.loc[df_curr['key']==key,col] = df_prev.loc[df_prev['key']==key,col].tolist()

In [None]:
df_curr['Batch'].value_counts()

In [None]:
# drop extra columns, rearrange columns
df_curr = df_curr.drop(columns=['dob_year','doi_year', 'key'])

df_curr = df_curr[['Batch', 'Remarks','Action Required','Action Team','Status of Cleanup','Completion Date','Valid Exception',
                   'Direct Party ID', 'Direct Party Status (CRM)','Direct Party Is Public', 'Is Relationship Active',
                   'Relationship Effective Start Date', 'Relationship Effective End Date','Relationship Type to Direct Party', 
                   'Related Party ID', 'Related Party Name', 'Related Party Type', 'Related Party Date of Incorporation', 
                   'Related Party Date of Birth','Related Party Status (CRM)', 'Related Party Agent Type', ]]

df_curr = df_curr.sort_values(by='Batch')

In [None]:
# print to excel
df_curr.to_excel('related_party_dob-doi.xlsx', index=False)