In [2]:
# [Steps to solve the APE task]
# load rowdata and datalive 2 spreadsheets to python memory
# make a copy of datalive - datalive_update because we will update it by operations
# generate a list of policy numbers for per rowdata and datalive
# find new policy numbers, which exist in rawdata but not in datalive 
# remove all empty rows in datalive_update
# add new policy numbers as new rows in datalive_update
# populate 2020-05-01 column in datalive_update table by Premium value in rowdata table, and the matching fields are policy number and time
# for those new added rows, populate 2019-10-01 column in datalive_update table by Premium value in rowdata table, and the matching fields are policy number and time equal to 2019-10-01
# do the same for 2019-11-01, 2019-12-01, 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01
# for each policy in datalive, if payments < 4 then set Freq to 'ASP'
# for each policy in datalive, if payments >= 4 and all payments are the same then set Freq to 'M'
# for each policy in datalive, if payments >=4 but all payments are not the same then mark as 'Check', also highlight the row as yellow
# for each policy in datalive, if payments >=4 but all payments are not the same then mark 'Check SP' as 'Yes'
# for each row in datalive 'Check SP' as 'Yes', highlight the row as yellow

In [3]:
import pandas as pd
import numpy as np
%matplotlib inline

In [6]:
# load rowdata and datalive 2 spreadsheets to python memory
rowdata = pd.read_excel('../sampledata/APESample.xlsx', sheet_name='Raw data')
datalive = pd.read_excel('../sampledata/APESample.xlsx', sheet_name='Data live')

# make a copy of datalive - datalive_update because we will update it by operations
datalive_update = datalive.copy()

In [8]:
# generate a list of policy numbers for per rowdata and datalive
rowdata_policynolist = rowdata['Policy Nr'].values.tolist()
datalive_policynolist = datalive['Policy Number'].values.tolist()

rowdata_policynolist_distinct = list(set(rowdata_policynolist))
datalive_policynolist_distinct = list(set(datalive_policynolist))

# find new policy numbers, which exist in rawdata but not in datalive 
newpolicynolist = np.setdiff1d(rowdata_policynolist_distinct,datalive_policynolist_distinct).tolist()

# remove all empty rows in datalive_update
datalive_update = datalive_update[datalive_update['Policy Number'].isnull() != True]

# add new policy numbers as new rows in datalive_update
for p in newpolicynolist:
    datalive_update = datalive_update.append({'Policy Number': p, 'Freq': 'ASP'}, ignore_index=True)

datalive_update = datalive_update.iloc[:, 0:25]

In [10]:
# populate 2020-05-01 column in datalive_update table by Premium value in rowdata table, 
# and the matching fields are policy number and time equal to 2020-05-01
for index in datalive_update.index:    
    policyno = datalive_update.loc[index]['Policy Number']    
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2020-05-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.iloc[index,11] = premium.values[0]


'2.1'

In [11]:
# for those new added rows, populate 2019-10-01 and other date columns in datalive_update table by Premium value in rowdata table, 
# and the matching fields are policy number and time
for policyno in newpolicynolist:
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2019-10-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 4] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2019-11-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 5] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2019-12-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 6] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2020-01-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 7] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2020-02-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 8] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2020-03-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 9] = premium.values[0]
        
    premium = rowdata.loc[(rowdata['Policy Nr'] == policyno) & (rowdata['time'] == '2020-04-01')]['Premium']
    if len(premium.values) > 0:
        datalive_update.loc[datalive_update['Policy Number'] == policyno, 10] = premium.values[0]
        


'2.1'

In [23]:
for index in datalive_update.index:
    try:        
        payments = datalive_update.iloc[index, 4:11].tolist()
        numberofpayments = [float(payment) for payment in payments if (isinstance(payment, float) or isinstance(payment, int)) and str(payment) != 'nan']
        paymentamounts = list(set(numberofpayments))
        # for each policy in datalive, if payments >= 4 and all payments are the same then set Freq to M
        if len(numberofpayments) > 3 and len(paymentamounts) == 1 and datalive_update.loc[index, 'Freq'] != 'M':
            datalive_update.loc[index, 'Freq'] = 'M'
            print('Updated: ', datalive_update.loc[index, 'Policy Number'])
        # for each policy in datalive, if payments >=4 but all payments are not the same then mark 'Check SP' as 'Yes'
        elif len(numberofpayments) > 3 and len(paymentamounts) > 1:
            datalive_update.loc[index, 'Check SP'] = 'Yes'           
    except:
        print('Err: ', datalive_update.loc[index, 'Policy Number'])       

In [46]:
datalive_update.style.set_properties(**{'background-color': 'black',
                           'color': 'lawngreen',
                           'border-color': 'white'})
#datalive_update.iloc[8:9, [0,1,4,5,6,7,8,9,10,11,13,24]]
# output to a excel
datalive_update.to_excel('datalive_update.xlsx', sheet_name = 'Data live', index = False)