In [None]:
try:
  df_platform
  print("Data already loaded")
except NameError:
  print("Loading data...")
  %run data.ipynb

In [None]:
import pandas as pd

# Reconciling Stripe

## Automatic reconciliation by merchantId

In [None]:
# copy df_strip to df_s
df_stripe_reconciliation = df_stripe.copy()

In [None]:
df_stripe_reconciliation.head()

In [None]:
# df_platform where kind is CONTRIBUTION and paymentMethod.service is STRIPE
df_platform_stripe_related = df_platform[(df_platform['kind'] == 'CONTRIBUTION') & (df_platform['paymentMethod.service'] == 'STRIPE')]

In [None]:
df_platform_stripe_related.head()

In [None]:
# for each row in df_stripe, find the corresponding row in df_platform, match df_stripe['id'] with df_platform['merchantId']
# if the row exists, add the platform transaction id to df_stripe in a new column called platform_transaction_id
# if the row does not exist, set the platform_transaction_id to None

df_platform_stripe = pd.merge(df_platform_stripe_related.dropna(subset=['merchantId']), df_stripe_reconciliation, left_on='merchantId', right_on='stripe.id', how='left').dropna(subset=['stripe.id'])
print('Successfully merged {} rows out of {}'.format(len(df_platform_stripe), len(df_platform_stripe_related)))
df_left_platform = df_platform_stripe_related[~df_platform_stripe_related['id'].isin(df_platform_stripe['id'])]
df_left_stripe = df_stripe_reconciliation[~df_stripe_reconciliation['stripe.id'].isin(df_platform_stripe['stripe.id'])]

from utils import matchByDateAndValue
df_second_pass_stripe = matchByDateAndValue(df_left_platform, ['createdAt', 'amountInHostCurrency.value'], df_left_stripe, ['Created date (UTC)', 'Amount'], timeDelta='2 days', valueDelta=0.05, absolute=True).dropna(subset=['df2key'])
df_platform_stripe = pd.concat([df_platform_stripe, df_second_pass_stripe.loc[:, df_second_pass_stripe.columns!='df2key']], ignore_index=True)
df_left_stripe = df_stripe_reconciliation[~df_stripe_reconciliation['stripe.id'].isin(df_platform_stripe['stripe.id'])]
print('Successfully merged {} rows out of {} from Value and Date'.format(len(df_second_pass_stripe), len(df_platform_stripe_related)))

print(f"Platform transactions that have a corresponding Stripe transaction: {len(df_platform_stripe)}")
print(f"Platform transactions without a corresponding Stripe transaction: {len(df_left_platform)}")
print(f"Percentage of Platform transactions that have a corresponding Stripe transaction: {round(len(df_platform_stripe)/len(df_platform_stripe_related)*100, 2)}%")
print(f"Stripe transactions without a corresponding platform transaction: {len(df_left_stripe)}")
print(f"Percentage of Stripe transactions that have a corresponding Platform transaction: {round(len(df_platform_stripe)/len(df_stripe_reconciliation)*100, 2)}%")
