In [None]:
try:
  df_platform
  print("Data already loaded")
except NameError:
  print("Loading data...")
  %run data.ipynb


In [None]:
import pandas as pd

ignored_types = ['Reversal of General Account Hold', 'General Currency Conversion', 'Hold on Available Balance', 'User Initiated Currency Conversion', 'Hold on Balance for Dispute Investigation', 'Cancellation of Hold for Dispute Resolution']
df_paypal_reconciliation = df_paypal[~df_paypal['Type'].isin(ignored_types)].copy()
df_paypal_reconciliation.head()

In [None]:
df_platform_paypal_related = df_platform[(df_platform['paymentMethod.service'] == 'PAYPAL') | (df_platform['payoutMethod.type'] == 'PAYPAL')]

In [None]:
df_platform_paypal = pd.merge(df_platform_paypal_related.dropna(subset=['merchantId']), df_paypal_reconciliation, left_on='merchantId', right_on='Transaction ID', how='left').dropna(subset=['Transaction ID'])
print('Successfully merged {} rows out of {}'.format(len(df_platform_paypal), len(df_platform_paypal_related)))
# First Pass, joining with IDs
df_left_platform = df_platform_paypal_related[~df_platform_paypal_related['id'].isin(df_platform_paypal['id'])]
df_left_paypal = df_paypal_reconciliation[~df_paypal_reconciliation['Transaction ID'].isin(df_platform_paypal['Transaction ID'])]

# Second Pass, joining with dates and amounts
from utils import matchByDateAndValue
df_second_pass_paypal = matchByDateAndValue(df_left_platform, ['createdAt', 'amountInHostCurrency.value'], df_left_paypal, ['Datetime', 'Gross'], '2 hours', 0.05).dropna(subset=['df2key'])
df_platform_paypal = pd.concat([df_platform_paypal, df_second_pass_paypal.loc[:, df_second_pass_paypal.columns!='df2key']], ignore_index=True)
df_left_paypal = df_paypal_reconciliation[~df_paypal_reconciliation['Transaction ID'].isin(df_platform_paypal['Transaction ID'])]
df_left_paypal = df_left_paypal[~(df_left_paypal['Transaction ID'].isin(df_platform_paypal['Reference Txn ID']))]
print('Successfully merged {} rows out of {} from Value and Date'.format(len(df_second_pass_paypal), len(df_platform_paypal_related)))


print(f"Platform transactions that have a corresponding PayPal transaction: {len(df_platform_paypal)}")
print(f"Platform transactions without a corresponding PayPal transaction: {len(df_left_platform)}")
print(f"Percentage of Platform transactions that have a corresponding PayPal transaction: {round(len(df_platform_paypal)/len(df_platform_paypal_related)*100, 2)}%")
print(f"PayPal transactions without a corresponding platform transaction: {len(df_left_paypal)}")
print(f"Percentage of PayPal transactions that have a corresponding Platform transaction: {round(len(df_platform_paypal)/len(df_paypal_reconciliation)*100, 2)}%")


In [None]:
df_left_paypal['Datetime'].value_counts()

In [None]:
# merged_df[['id', 'Transaction ID']].to_csv('platform_paypal_reconciliation.csv', index=False)

In [None]:
pd.set_option('display.max_rows', None)
print(df_left_paypal.value_counts('Balance Impact'))
print(df_left_paypal.value_counts('Status'))
df_left_paypal.value_counts('Type')

In [None]:
df_left_paypal[df_left_paypal['Type'] == 'Express Checkout Payment']

In [None]:
print(df_platform_paypal.value_counts('Balance Impact'))
print(df_platform_paypal.value_counts('Status'))
print(df_platform_paypal.value_counts('Type'))

In [None]:
df_left_platform.sample(10)

In [None]:
from utils import matchByDateAndValue

df_left_paypal = df_left_paypal[~df_left_paypal['Transaction ID'].isin(df_second_pass_paypal['Transaction ID'])]
print(f"PayPal transactions with a corresponding Platform transaction: {len(df_second_pass_paypal)}")
print(f"PayPal transactions without either a corresponding Platform transaction or a Bank transaction: {len(df_left_paypal)} out of {len(df_paypal_reconciliation)}")
print(f"Percentage of PayPal transactions that have a corresponding transaction either on the Platform or on the Bank: {round((len(df_second_pass_paypal)+len(df_platform_paypal))/len(df_paypal_reconciliation)*100, 2)}%")
df_left_paypal.head(10)

In [None]:
df_second_pass_paypal.head()

In [None]:
df_paypal_bank = matchByDateAndValue(df_left_paypal, ['Datetime', 'Gross'], df_banks, ['Post Date', 'Debit'], '2 days', 0.05).dropna(subset=['df2key'])
df_left_paypal = df_left_paypal[~df_left_paypal['Transaction ID'].isin(df_paypal_bank['Transaction ID'])]
print(f"PayPal transactions with a corresponding Bank transaction: {len(df_paypal_bank)}")
print(f"PayPal transactions without either a corresponding Platform transaction or a Bank transaction: {len(df_left_paypal)} out of {len(df_paypal_reconciliation)}")
print(f"Percentage of PayPal transactions that have a corresponding transaction either on the Platform or on the Bank: {round((len(df_paypal_bank)+len(df_platform_paypal))/len(df_paypal_reconciliation)*100, 2)}%")
df_paypal_bank.head(10)