### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import glob

### Define File Paths

In [2]:
profit_detail = [i for i in glob.glob('PROFIT_DETAIL/*_*.csv') if i[-10:-4] >= '202304']
bbpos = glob.glob('bbposmsl-merchant/*-*-??????.csv')
mdr = [i for i in glob.glob('MDR/*') if i[-12:-5] >= '2024-02']

### Process AIP ID.csv

In [4]:
aipid = pd.read_csv('AIP ID.csv')

# Store test Merchant No to a list
test = aipid.loc[aipid['Test'] == 1, 'Merchant No'].to_list()

aipid = aipid[['Merchant No', 'BBPOS Merchant ID']].dropna()
aipid['BBPOS Merchant ID'] = aipid['BBPOS Merchant ID'].map(int)

### Create Dataframe for all combinations of Merchant No and Merchant ID

In [19]:
mid = pd.DataFrame()

for file in bbpos:
    df = pd.read_csv(file, usecols=['Merchant No', 'BBPOS Merchant ID'], dtype=str).dropna()
    df['Merchant No'] = df['Merchant No'].str.replace('"', '').str.strip().str.lstrip('0').str[:15]
    df = df[df['Merchant No'] != '']
    df = df.map(int)
    mid = pd.concat([mid, df]).drop_duplicates()

mid = mid.groupby('Merchant No', as_index=False)['BBPOS Merchant ID'].max()
mid = pd.concat([mid, aipid]).drop_duplicates()

### Combine Test Merchant No from AIP ID.csv and Excluded.csv

In [12]:
test_mid = pd.read_csv('Excluded.csv').iloc[:, 0].to_list()
test_mid = mid[mid['BBPOS Merchant ID'].isin(test_mid)]['Merchant No'].to_list()

In [13]:
test += test_mid
test = list(set(test))

### Preprocess PROFIT_DETAIL, Split into two parts (< 2024-02 & >= 2024-02)

In [None]:
cols = ['Transaction Time', 'Merchant No', 'MCC', 'SME Flag', 'Product', 'Transaction Type', 'Card Organization', 'Card interal', 'Settlement Amount', 'Transaction Fee', 'Service Fee profit']
monthly, daily = pd.DataFrame(), pd.DataFrame()

for file in profit_detail:
    df = pd.read_csv(file, usecols=cols, dtype={'Merchant No': str, 'MCC': str}, encoding='gbk')
    df = df[df['Product'].isin(['POSPRODUCT', 'QR', 'INSTOREQR']) & df['Transaction Type'].isin(['SALES', 'TIPS_ADJUST', 'PRE_AUTH_COMPLETE', 'UNIONPAY_MICROPAY', 'UNIONPAY_SCANCODE'])]
    df['Merchant No'] = df['Merchant No'].replace('=|"', '', regex=True)
    df[['Merchant No', 'MCC']] = df[['Merchant No', 'MCC']].map(int)
    df = df[~df['Merchant No'].isin(test)]
    df.loc[df['Transaction Type'].isin(['UNIONPAY_MICROPAY', 'UNIONPAY_SCANCODE']) & df['Card Organization'].isna(), 'Card Organization'] = 'UNIONPAY'
    df['Card interal'] = np.where(df['Card interal'].isin(['INTERREGIONAL', 'INTRAREGIONAL']), 'FOREIGN', 'DOMESTIC')
    df['Cost'] = df['Transaction Fee'].sub(df['Service Fee profit'])
    if file[-10:-4] < '202402':
        try:
            df['Transaction Time'] = pd.to_datetime(df['Transaction Time']).dt.date.map(str).str[:7]
        except:
            df['Transaction Time'] = pd.to_datetime(df['Transaction Time'], dayfirst=True).dt.date.map(str).str[:7]
        df = df.groupby(['Transaction Time', 'Merchant No', 'MCC', 'SME Flag', 'Card Organization', 'Card interal'])[['Settlement Amount', 'Cost']].agg(Amount=('Settlement Amount', 'sum'), Count=('Settlement Amount', 'count'), Cost=('Cost', 'sum'))
        monthly = pd.concat([monthly, df])
    else:
        try:
            df['Transaction Time'] = pd.to_datetime(df['Transaction Time']).dt.date.map(str)
        except:
            df['Transaction Time'] = pd.to_datetime(df['Transaction Time'], dayfirst=True).dt.date.map(str)
        df = df.groupby(['Transaction Time', 'Merchant No', 'MCC', 'SME Flag', 'Card Organization', 'Card interal'])[['Settlement Amount', 'Cost']].agg(Amount=('Settlement Amount', 'sum'), Count=('Settlement Amount', 'count'), Cost=('Cost', 'sum'))
        daily = pd.concat([daily, df])

### Merge Merchant ID into PROFIT_DETAIL

In [15]:
monthly = monthly.reset_index().merge(mid, how='left', on='Merchant No')
daily = daily.reset_index().merge(mid, how='left', on='Merchant No')

In [16]:
print(monthly['BBPOS Merchant ID'].isna().sum())
print(daily['BBPOS Merchant ID'].isna().sum())

0
0


In [17]:
daily.loc[(daily['Transaction Time'].str[:7] == '2024-04') & (daily['Merchant No'] == 852999994029349), 'BBPOS Merchant ID'] = 17372

### Create Dataframe for monthly Mdr from bbposmsl

In [18]:
cols = ['BBPOS Merchant ID', 'MdrVisa', 'MdrVisa Foreign', 'MdrMaster', 'MdrMaster Foreign', 'MdrCUP', 'MdrJCB']
bbpos_mdr = pd.DataFrame()

for file in bbpos:
    df = pd.read_csv(file, dtype={'Merchant No': str, 'BBPOS Merchant ID': str})
    df = df[[c for c in df.columns if c in cols]].dropna(subset='BBPOS Merchant ID')
    df['BBPOS Merchant ID'] = df['BBPOS Merchant ID'].map(int)
    df[[c for c in df.columns if c != 'BBPOS Merchant ID']] = df[[c for c in df.columns if c != 'BBPOS Merchant ID']].replace('%', '', regex=True).map(float).div(100)
    df['Transaction Time'] = file[-10:-6] + '-' + file[-6:-4]
    bbpos_mdr = pd.concat([bbpos_mdr, df])

### Merge Mdr (< 2024-02)

In [19]:
monthly = monthly.merge(bbpos_mdr.drop(columns=['MdrVisa Foreign', 'MdrMaster Foreign']), how='left', on=['Transaction Time', 'BBPOS Merchant ID'])

In [20]:
mdr_map = {'VISA': 'MdrVisa', 'MASTERCARD': 'MdrMaster', 'UNIONPAY': 'MdrCUP', 'JCB': 'MdrJCB'}

for k, v in mdr_map.items():
    monthly.loc[monthly['Card Organization'] == k, 'Mdr'] = monthly[v]

In [21]:
no_mdr = monthly[(monthly['Mdr'].isna() | (monthly['Mdr'] == 0)) & (monthly['BBPOS Merchant ID'] != 3466)][['Transaction Time', 'BBPOS Merchant ID', 'Card Organization']]

In [22]:
for row in no_mdr.itertuples():
    try:
        adjusted_mdr = bbpos_mdr.loc[(bbpos_mdr['Transaction Time'] == str(pd.to_datetime(row[1]) + pd.tseries.offsets.DateOffset(months=1))[:7]) & (bbpos_mdr['BBPOS Merchant ID'] == row[2]), mdr_map[row[3]]].item()
        if adjusted_mdr == 0 or pd.isna(adjusted_mdr):
            raise ValueError
    except:
        try:
            adjusted_mdr = bbpos_mdr.loc[(bbpos_mdr['Transaction Time'] == str(pd.to_datetime(row[1]) - pd.tseries.offsets.DateOffset(months=1))[:7]) & (bbpos_mdr['BBPOS Merchant ID'] == row[2]), mdr_map[row[3]]].item()
            if adjusted_mdr == 0 or pd.isna(adjusted_mdr):
                raise ValueError
        except:
            print(row)
            continue
    monthly.loc[row[0], 'Mdr'] = adjusted_mdr

In [23]:
monthly.loc[no_mdr.index]

Unnamed: 0,Transaction Time,Merchant No,MCC,SME Flag,Card Organization,Card interal,Amount,Count,Cost,BBPOS Merchant ID,MdrVisa,MdrMaster,MdrCUP,MdrJCB,Mdr
52587,2023-07,852999957320148,5732,SME,MASTERCARD,DOMESTIC,1400.0,1,13.63,2523,,,,,0.015
160576,2024-01,852999994022149,5814,SME,VISA,DOMESTIC,204.0,1,1.92,15012,0.0,0.0,0.0,0.0,0.014


In [24]:
monthly['Revenue'] = monthly['Amount'].mul(monthly['Mdr'])

### Create Dataframe for daily Mdr from MDR

In [25]:
cols = ['transaction_date', 'merchant_id', 'VISA DOMESTIC', 'VISA FOREIGN', 'MASTER DOMESTIC', 'MASTER FOREIGN', 'UNIONPAY DOMESTIC', 'JCB DOMESTIC']
daily_mdr = pd.DataFrame()

for file in mdr:
    df = pd.read_csv(file, usecols=cols)
    daily_mdr = pd.concat([daily_mdr, df])

daily_mdr.rename(columns={'transaction_date': 'Transaction Time', 'merchant_id': 'BBPOS Merchant ID'}, inplace=True)

In [26]:
daily_mdr['VISA FOREIGN'] = daily_mdr['VISA FOREIGN'].fillna(daily_mdr['VISA DOMESTIC'])
daily_mdr['VISA DOMESTIC'] = daily_mdr['VISA DOMESTIC'].fillna(daily_mdr['VISA FOREIGN'])
daily_mdr['MASTER FOREIGN'] = daily_mdr['MASTER FOREIGN'].fillna(daily_mdr['MASTER DOMESTIC'])
daily_mdr['MASTER DOMESTIC'] = daily_mdr['MASTER DOMESTIC'].fillna(daily_mdr['MASTER FOREIGN'])

### Merge Mdr (>= 2024-02)

In [27]:
daily1 = daily[(daily['Transaction Time'] >= '2024-02-01') & (daily['Transaction Time'] <= '2024-02-05')]
daily2 = daily[daily['Transaction Time'] >= '2024-02-06']

In [28]:
cols_map = {'MdrVisa': 'VISA DOMESTIC', 'MdrVisa Foreign': 'VISA FOREIGN', 'MdrMaster': 'MASTER DOMESTIC', 'MdrMaster Foreign': 'MASTER FOREIGN', 'MdrCUP': 'UNIONPAY DOMESTIC', 'MdrJCB': 'JCB DOMESTIC'}

bbpos_mdr_2401 = bbpos_mdr[bbpos_mdr['Transaction Time'] == '2024-01'].drop(columns='Transaction Time').rename(columns=cols_map)
bbpos_mdr_2401 = bbpos_mdr_2401.fillna({'VISA FOREIGN': bbpos_mdr_2401['VISA DOMESTIC'], 'MASTER FOREIGN': bbpos_mdr_2401['MASTER DOMESTIC']})

daily1 = daily1.merge(bbpos_mdr_2401, how='left', on='BBPOS Merchant ID')
daily2 = daily2.reset_index().merge(daily_mdr, how='left', on=['Transaction Time', 'BBPOS Merchant ID']).set_index('index')

In [29]:
daily = pd.concat([daily1, daily2])

In [30]:
for k in mdr_map:
    if k in ['VISA', 'MASTERCARD']:
        for i in ['DOMESTIC', 'FOREIGN']:
            daily.loc[(daily['Card Organization'] == k) & (daily['Card interal'] == i), 'Mdr'] = daily[f'{k[:6]} {i}']
    else:
        daily.loc[daily['Card Organization'] == k, 'Mdr'] = daily[f'{k} DOMESTIC']

In [31]:
no_mdr = daily[(daily['Mdr'].isna() | (daily['Mdr'] == 0)) & (daily['BBPOS Merchant ID'] != 3466)][['Transaction Time', 'BBPOS Merchant ID', 'Card Organization', 'Card interal']]

In [32]:
cols_map_swap = {v: k for k, v in cols_map.items()}
for row in no_mdr.itertuples():
    try:
        if row[3] == 'MASTERCARD':
            card_type = row[3][:6]
        else:
            card_type = row[3]
        adjusted_mdr = bbpos_mdr.loc[(bbpos_mdr['Transaction Time'] == row[1][:7]) & (bbpos_mdr['BBPOS Merchant ID'] == row[2]), cols_map_swap[f'{card_type} {row[4]}']].item()
        if adjusted_mdr == 0 or pd.isna(adjusted_mdr):
            raise ValueError
    except:
        print(row)
        continue
    daily.loc[row[0], 'Mdr'] = adjusted_mdr

In [33]:
daily.loc[no_mdr.index]

Unnamed: 0,Transaction Time,Merchant No,MCC,SME Flag,Card Organization,Card interal,Amount,Count,Cost,BBPOS Merchant ID,VISA DOMESTIC,MASTER DOMESTIC,UNIONPAY DOMESTIC,JCB DOMESTIC,VISA FOREIGN,MASTER FOREIGN,Mdr
159372,2024-03-07,852999956910388,5691,SME,MASTERCARD,DOMESTIC,900.0,1,8.89,11930,0.015,,,,0.015,,0.015


In [34]:
daily['Revenue'] = daily['Amount'].mul(daily['Mdr'])

### Combine two parts

In [37]:
monthly = monthly.groupby(['Transaction Time', 'BBPOS Merchant ID', 'MCC', 'SME Flag', 'Card Organization', 'Card interal'])[['Amount', 'Count', 'Cost', 'Revenue']].sum()
daily = daily.groupby([daily['Transaction Time'].str[:7], 'BBPOS Merchant ID', 'MCC', 'SME Flag', 'Card Organization', 'Card interal'])[['Amount', 'Count', 'Cost', 'Revenue']].sum()

In [38]:
result = pd.concat([monthly, daily]).reset_index()

In [39]:
result.loc[result['BBPOS Merchant ID'] == 3466, 'Revenue'] = result['Count']

In [40]:
result['Net Profit'] = result['Revenue'].sub(result['Cost'])

In [41]:
result.to_csv('AIP Month.csv', index=False)