In [149]:
import pandas as pd
import glob
import requests
import json
import time

# Project notes

- Identify transactions that are missing from OC that are on any of the accounts (meaning banks, stripe, wise, paypal)
- Identify any transactions on the platform that are missing from the accounts
- Prep a balance/p&l with minimal wrangling

Project steps:

- Clarify project intention and goal (fill in project assessment in Make OC)
- Map out data 
- Write scripts 
- Analyze and prepare accounts

# Load config

In [150]:
# host variables
host = 'opensource'
dateFrom = '2022-12-31T22:59:59.999Z'
dateTo = '2023-12-31T22:59:59.999Z'
year = '2023'

In [151]:
# load API tokens from .env file
import os
from dotenv import load_dotenv
load_dotenv()
octoken = os.getenv('OCTOKEN')
wisetoken = os.getenv('WISETOKEN')

# set the profile id of the target Wise profile
wiseprofile = os.getenv('WISEPROFILE')

In [152]:
# Reload the data or not - set these to false after the first run to avoid reloading
reloadOC = False
reloadWise = False
reloadStripe = False

# Get data

TODO: Query below returns null for all merchandId. Need to get merchantId for all transactions where available to reconcile with Stripe, Wise and Paypal.

In [153]:
# load transactions from open collective or from local file

def fetch_transactions():
    url = f"https://opencollective.com/api/graphql/v2?personalToken={octoken}"

    # Your GraphQL query
    query = """
    query TransactionsTable(
        $hostAccount: AccountReferenceInput,
        $limit: Int!,
        $offset: Int!,
        $type: TransactionType,
        $paymentMethodType: [PaymentMethodType],
        $dateFrom: DateTime,
        $dateTo: DateTime,
        $kind: [TransactionKind],
        $includeIncognitoTransactions: Boolean,
        $includeGiftCardTransactions: Boolean,
        $includeChildrenTransactions: Boolean,
        $virtualCard: [VirtualCardReferenceInput],
        $orderBy: ChronologicalOrderInput,
        $group: String,
        $includeHost: Boolean,
        $expense: ExpenseReferenceInput,
        $order: OrderReferenceInput
      ) {
        transactions(
          host: $hostAccount
          limit: $limit
          offset: $offset
          type: $type
          paymentMethodType: $paymentMethodType
          dateFrom: $dateFrom
          dateTo: $dateTo
          kind: $kind
          includeIncognitoTransactions: $includeIncognitoTransactions
          includeGiftCardTransactions: $includeGiftCardTransactions
          includeChildrenTransactions: $includeChildrenTransactions
          includeDebts: true
          virtualCard: $virtualCard
          orderBy: $orderBy
          group: $group
          includeHost: $includeHost
          expense: $expense
          order: $order
        ) {
          ...TransactionsTableQueryCollectionFragment
          __typename
        }
      }
      
      fragment TransactionsTableQueryCollectionFragment on TransactionCollection {
        totalCount
        offset
        limit
        nodes {
          id
          uuid
          kind
          amount {
            currency
            valueInCents
            __typename
          }
          amountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          netAmount {
            currency
            valueInCents
            __typename
          }
          netAmountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          paymentProcessorFee {
            currency
            valueInCents
            __typename
          }
          paymentMethod {
            name
            service
            sourcePaymentMethod {
              id
              name
              service
              __typename
            }
            type
          }
          giftCardEmitterAccount {  
            id
            name
            slug
            __typename
          }
          group
          type
          description
          createdAt
          merchantId
          isRefunded
          isRefund
          refundTransaction {
            id
          }
          isOrderRejected
          account {
            ... on AccountWithParent {
              parent {
                id
                slug
                name
              }
            }
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          oppositeAccount {
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          expense {
            id
            type
            description
            invoiceInfo 
            tags
            payee {
              id
              name
              slug
              type
              __typename
            }
            __typename
          }
          permissions {
            id
            canRefund
            canDownloadInvoice
            canReject
            __typename
          }
          __typename
        }
        __typename
      }
    """

    headers = {
        "Authorization": f"Bearer {octoken}",
        "Content-Type": "application/json"
    }

    # Initial variables setup
    variables = {
        "hostAccount": {"slug": host},
        "includeIncognitoTransactions": True,
        "includeChildrenTransactions": True,
        "limit": 10000,  # Adjust if necessary but keep a sensible number to avoid server strain
        "offset": 0,  # Will be adjusted for each subsequent request
        "dateFrom": dateFrom,
        "dateTo": dateTo,
        "orderBy": {"field": "CREATED_AT", "direction": "DESC"},
        "includeHost": True
    }

    all_transactions = []  # To hold all transactions
    while True:
        
        # Try catch around json_data = response.json() to avoid errors

        max_retries = 10  # Setting the maximum number of retries
        retries = 0  # Initial retry count

        while retries < max_retries:
            try:
                # Make the HTTP request
                response = requests.post(
                    url, json={'query': query, 'variables': variables}, headers=headers)
                json_data = response.json()
                break 
            except:
                print("Request failed. Attempt:", retries + 1)
                print(response)
                retries += 1 
                time.sleep(10)

        if retries == max_retries:
            print("Maximum retry attempts reached. Exiting.")
        # Extract data
        transactions = json_data['data']['transactions']['nodes']
        all_transactions.extend(transactions)

        # Pagination: Update offset
        variables['offset'] += variables['limit']

        # Check if all transactions are fetched
        if len(all_transactions) >= json_data['data']['transactions']['totalCount']:
            break
        
        # print progress
        print(f'Fetched {len(all_transactions)} transactions')
        # sleep for 10 seconds to avoid server strain
        time.sleep(5)

    return all_transactions

# if account has property parent, replace account with parent
def replace_account_with_parent(transaction):
  if 'parent' in transaction['account']:
    transaction['account'] = transaction['account']['parent']
  return transaction

# apply the function to all transactions
def post_process_transactions(all_transactions):
  return list(map(replace_account_with_parent, all_transactions))

if reloadOC:
  all_transactions = fetch_transactions()
  # dump all transactions to a json file
  post_process_transactions(all_transactions)
  with open(f'data/{host}/{year}/{host}_{year}_all_platform_transactions.json', 'w') as f:
      json.dump(all_transactions, f, indent=2)
  # convert the json file to a dataframe
  df_platform_transactions = pd.json_normalize(all_transactions)
  # save dataframe as pickle
  df_platform_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')
else:
  df_platform_transactions = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')


In [154]:
# get unique transaction kind
transaction_kinds = df_platform_transactions['kind'].unique()

# sample 2 transactions for each kind
df_sample = df_platform_transactions.groupby('kind').apply(lambda x: x.sample(2))

df_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
kind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1
ADDED_FUNDS,100932,0d5a1901-fcf8-4c6a-ad1b-e1b404a9e915,0d5a1901-fcf8-4c6a-ad1b-e1b404a9e915,ADDED_FUNDS,,,ec966901-a4c4-4405-aa37-5216efd42318,CREDIT,GitHub Sponsors Contributions,2023-11-09T22:10:04.567Z,,False,False,,False,,Credit,USD,126927,Amount,USD,126927,Amount,USD,126927,Amount,USD,126927,Amount,USD,0,Amount,eng0kzdy-vor4pzbz-kb9pbma8-37xlw95j,448749.0,Play Framework,playframework,False,COLLECTIVE,Collective,03k0exgz-nm8yj64w-y88q5wao-9r7b4dlv,164160,GitHub Sponsors,github-sponsors,False,ORGANIZATION,Organization,mvrwng4k-j03dpbwe-x8zpz57o-yl9e8xba,True,True,True,TransactionPermissions,,OPENCOLLECTIVE,,HOST,,,,,,,,,,,,,,,,
ADDED_FUNDS,228758,5508b0c7-fb23-4a01-8cdf-dda20dbb68c4,5508b0c7-fb23-4a01-8cdf-dda20dbb68c4,ADDED_FUNDS,,,de9cdeb5-ddbe-45d7-8489-b80992423aeb,CREDIT,GitHub Sponsors payment,2023-09-25T19:41:49.968Z,,False,False,,False,,Credit,USD,1300,Amount,USD,1300,Amount,USD,1300,Amount,USD,1300,Amount,USD,0,Amount,rxg0j35l-zkwm6v5o-zzxqvoe9-8n47daby,522296.0,Scoop,scoopinstaller,False,COLLECTIVE,Collective,03k0exgz-nm8yj64w-y88q5wao-9r7b4dlv,164160,GitHub Sponsors,github-sponsors,False,ORGANIZATION,Organization,v349mrwg-z75lpybg-0856a08d-jeybknox,True,True,True,TransactionPermissions,,OPENCOLLECTIVE,,HOST,,,,,,,,,,,,,,,,
BALANCE_TRANSFER,736325,330fff1e-1a1b-4ecf-93b2-83e41a39d0b1,330fff1e-1a1b-4ecf-93b2-83e41a39d0b1,BALANCE_TRANSFER,,,da6595e6-315e-4754-a9fb-6937b12c6414,CREDIT,Financial contribution to Open Source Diversity,2023-02-07T14:17:21.352Z,,False,False,,False,,Credit,USD,3704,Amount,USD,3704,Amount,USD,3704,Amount,USD,3704,Amount,USD,0,Amount,rvedj9wr-oz3a56d3-kyop7blg-8x4m0ykn,19378.0,Open Source Diversity,opensourcediversity,False,COLLECTIVE,Collective,zzaxon79-3jy8gplx-0adpbrkd-emwl5v04,56993,🌼 Open Source Diversity contribution day,open-source-diversity-contribution-day-19378ev,False,EVENT,Event,ax8emk7l-nw54q7gz-9wxqgyvj-0o93zdrb,True,True,True,TransactionPermissions,🌼 Open Source Diversity contribution day (Event),OPENCOLLECTIVE,,COLLECTIVE,,,,,,,,,,,,,,,,
BALANCE_TRANSFER,671107,ebfec948-939c-43c7-aab5-e724c6ac9b87,ebfec948-939c-43c7-aab5-e724c6ac9b87,BALANCE_TRANSFER,,,ff00ec42-819c-48bb-8a08-164b631462f6,DEBIT,Financial contribution to Open Source Collective,2023-03-04T17:09:53.883Z,,False,False,,False,,Debit,USD,-4865,Amount,USD,-4865,Amount,USD,-4865,Amount,USD,-4865,Amount,USD,0,Amount,nmlo94zn-7x08dpoj-mao6ewga-3vjbrky5,61951.0,Ferdi,getferdi,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,8rzownxl-9e50pxm5-gvw6ymvb-dgk7j43a,False,True,False,TransactionPermissions,Ferdi (Collective),OPENCOLLECTIVE,,COLLECTIVE,,,,,,,,,,,,,,,,
CONTRIBUTION,461105,5e3d4581-a56c-4cef-91ce-aa00f9cbbea3,5e3d4581-a56c-4cef-91ce-aa00f9cbbea3,CONTRIBUTION,,,025d0d5c-1bae-4448-abed-55b709da1421,CREDIT,Financial contribution to Terms of Service; Di...,2023-06-07T22:40:46.882Z,1BJ95041Y13429021,False,False,,False,,Credit,USD,1000,Amount,USD,1000,Amount,USD,901,Amount,USD,901,Amount,USD,-99,Amount,0n4gx0br-ov5m96n0-4wqd8lk3-ey7jzwan,959.0,Terms of Service; Didn’t Read,tosdr,False,COLLECTIVE,Collective,e0adkjrv-8xwm6979-vl4q7z5n-4l93bgoy,656994,Guest,guest-89d21662,False,INDIVIDUAL,Individual,gm9bnk80-437xqr70-xgepvzeo-ljdayw5r,True,True,True,TransactionPermissions,,PAYPAL,,PAYMENT,,,,,,,,,,,,,,,,
CONTRIBUTION,115411,0829d135-0e65-4b5b-a051-a95f12f5947c,0829d135-0e65-4b5b-a051-a95f12f5947c,CONTRIBUTION,,,29d800dc-998b-42b8-9cd0-be7f99c0b605,CREDIT,Monthly financial contribution to OpenCore Leg...,2023-11-02T11:05:24.446Z,ch_3O7yiEBYycQg1OMf1QEz9xcM,False,False,,False,,Credit,USD,300,Amount,USD,300,Amount,USD,257,Amount,USD,257,Amount,USD,-43,Amount,mvrwng4k-j03dpbm0-da9qz57o-yl9e8xba,648434.0,OpenCore Legacy Patcher,opencore-legacy-patcher,False,COLLECTIVE,Collective,8k03reyd-5agmq504-y9bplbwo-z7j4nxv9,673733,Wei,wei6,False,INDIVIDUAL,Individual,mywxoz34-09rl6kek-dvy6venb-dj7gk85a,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
EXPENSE,451765,2c5c3aa9-921f-4726-ac0d-e52dc14eb741,2c5c3aa9-921f-4726-ac0d-e52dc14eb741,EXPENSE,,,49542ccf-ea1d-4a96-a5ce-6f145bf928cd,DEBIT,Emmet support (February 2023),2023-06-16T20:15:59.232Z,717025155,False,False,,False,,Debit,USD,-500000,Amount,USD,-500000,Amount,USD,-500039,Amount,USD,-500039,Amount,USD,-39,Amount,x8k03rey-d5agmq55-e5gqlbwo-z7j4nxv9,55981.0,Emmet,emmet,False,COLLECTIVE,Collective,dgm9bnk8-0437xqrj-yj5pvzeo-ljdayw5r,55980,Sergey Chikuyonok,sergey-chikuyonok,False,INDIVIDUAL,Individual,eng0kzdy-vor4pzym-9zrqbma8-37xlw95j,False,False,False,TransactionPermissions,,,,,03k0exgz-nm8yj64k-k38q5wao-9r7b4dlv,INVOICE,Emmet support (February 2023),,[],dgm9bnk8-0437xqrj-yj5pvzeo-ljdayw5r,Sergey Chikuyonok,sergey-chikuyonok,INDIVIDUAL,Individual,Expense,,,,,
EXPENSE,318814,de72e2ca-3bc6-47ba-bfd1-1e806269523e,de72e2ca-3bc6-47ba-bfd1-1e806269523e,EXPENSE,,,42455575-2156-4b71-ab22-e3eb75d23789,DEBIT,$300 bug bounty claim for https://github.com/j...,2023-08-08T18:06:03.938Z,767803188,False,False,,False,,Debit,USD,-30000,Amount,USD,-30000,Amount,USD,-30039,Amount,USD,-30039,Amount,USD,-39,Amount,ov349mrw-gz75lpyy-4npa08dj-eybknoxd,10350.0,JHipster,generator-jhipster,False,COLLECTIVE,Collective,vjrkx5lm-nv904qjy-7zbq8bwa-7zdygoe3,91316,Marcelo Boveto Shima,marcelo-boveto-shima,False,INDIVIDUAL,Individual,jrkx5lmn-v904qjrn-lam68bwa-7zdygoe3,False,False,False,TransactionPermissions,,,,,5ax8emk7-lnw54q7w-4vxpgyvj-0o93zdrb,INVOICE,$300 bug bounty claim for https://github.com/j...,,[],vjrkx5lm-nv904qjy-7zbq8bwa-7zdygoe3,Marcelo Boveto Shima,marcelo-boveto-shima,INDIVIDUAL,Individual,Expense,,,,,
HOST_FEE,269842,367157fc-989e-487e-9638-9bf9eda90c3e,367157fc-989e-487e-9638-9bf9eda90c3e,HOST_FEE,,,4fc17edc-7f63-4b8f-a148-be578f8c88b6,DEBIT,Host Fee,2023-09-01T22:07:51.275Z,,False,False,,False,,Debit,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,0,Amount,8a47byg9-nxozdp8l-zlr6mjlv-03rek5w8,196752.0,Swiper,swiper,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,3k0exgzn-m8yj649r-ywmp5wao-9r7b4dlv,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
HOST_FEE,460528,898b06db-c1a1-4012-a9b7-c2e4d5a34eb3,898b06db-c1a1-4012-a9b7-c2e4d5a34eb3,HOST_FEE,,,72df71da-4ce4-449a-b567-d3604921846b,CREDIT,Host Fee,2023-06-08T11:24:03.433Z,,False,False,,False,,Credit,USD,1000,Amount,USD,1000,Amount,USD,1000,Amount,USD,1000,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,8a47byg9-nxozdp8e-k7pmjlv0-3rek5w8k,10352,nodemon,nodemon,False,COLLECTIVE,Collective,ejoxl3az-45w9palv-eymqy870-mgkbrvdn,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,


In [155]:
# load transactions from wise

# Define the base URL for the Wise API
BASE_URL = 'https://api.transferwise.com/v1/transfers'

def fetch_all_transfers(profile_id=None, status=None, source_currency=None, 
                        target_currency=None, created_date_start=None, 
                        created_date_end=None, limit=100):
    headers = {
        'Authorization': f'Bearer {wisetoken}'
    }
    offset = 0
    all_transfers = []

    while True:
        # Prepare query parameters
        query_params = {'limit': limit, 'offset': offset}
        if profile_id:
            query_params['profile'] = profile_id
        if status:
            query_params['status'] = status
        if source_currency:
            query_params['sourceCurrency'] = source_currency
        if target_currency:
            query_params['targetCurrency'] = target_currency
        if created_date_start:
            query_params['createdDateStart'] = created_date_start
        if created_date_end:
            query_params['createdDateEnd'] = created_date_end

        # Make the GET request to the Wise API
        response = requests.get(BASE_URL, headers=headers, params=query_params)
        data = response.json()

        print(data)

        # Break if there are no more results
        if not data:
            break

        for transfer in data:
            all_transfers.append({
                'id': transfer['id'],
                'user': transfer['user'],
                'targetAccount': transfer['targetAccount'],
                'sourceAccount': transfer['sourceAccount'],
                'quote': transfer['quote'],
                'quoteUuid': transfer['quoteUuid'],
                'status': transfer['status'],
                'reference': transfer['reference'],
                'rate': transfer['rate'],
                'created': transfer['created'],
                'business': transfer['business'],
                'transferRequest': transfer['transferRequest'],
                'details.reference': transfer.get('details', {}).get('reference'),
                'hasActiveIssues': transfer['hasActiveIssues'],
                'sourceCurrency': transfer['sourceCurrency'],
                'sourceValue': transfer['sourceValue'],
                'targetCurrency': transfer['targetCurrency'],
                'targetValue': transfer['targetValue'],
                'customerTransactionId': transfer['customerTransactionId']
            })

        # Update the offset for the next iteration
        offset += limit

    return all_transfers

if reloadWise:
    transfers = fetch_all_transfers(profile_id=wiseprofile, created_date_start=dateFrom, created_date_end=dateTo)

    # create transfers dataframe
    df_wise_transactions = pd.DataFrame()

    for transfer in transfers:
        # add to dataframe with concat
        df_wise_transactions = pd.concat([df_wise_transactions, pd.DataFrame([transfer])])

    # save dataframe as pickle
    df_wise_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

In [156]:
# load transactions from stripe export
if reloadStripe:
    df_stripe_in = pd.read_csv(f'data/{host}/{year}/{host}_{year}_stripe.csv')
    # ensure only relevant columns
    df_stripe_in = df_stripe_in[[
    'id',
    'Description',
    'Seller Message',
    'Amount',
    'Amount Refunded',
    'Currency',
    'Converted Amount',
    'Converted Amount Refunded',
    'Fee',
    'Taxes On Fee',
    'Converted Currency',
    'Mode',
    'Status',
    'Statement Descriptor',
    'Customer ID',
    'Customer Description',
    'Captured',
    'Card ID',
    'PaymentIntent ID',
    'Application Fee',
    'Application ID',
    'from (metadata)',
    'to (metadata)'
    ]]
    df_stripe_in.to_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')
    


In [157]:
# load open collective data
df_platform = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')

# load stripe data
df_stripe = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')

# load wise data
df_wise = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

# load bank data
dir = f'data/{host}/{year}/oscbank_1'
df_bank_1 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_1 = df_bank_1.dropna(axis=1, how='all')

dir = f'data/{host}/{year}/oscbank_2'
df_bank_2 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_2 = df_bank_2.dropna(axis=1, how='all')

# Post process data

In [158]:
# post process

# remove failed and pending transactions from Stripe
df_stripe = df_stripe[df_stripe['Status'] != 'Failed']
df_stripe = df_stripe[df_stripe['Status'] != 'Pending']

# remove cancelled from Wise
df_wise = df_wise[df_wise['status'] != 'cancelled']

# Inspect transactions dataframe

In [159]:
# show all columns
pd.set_option('display.max_columns', None)
df_platform.sample(10)

Unnamed: 0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
146013,09e6fc7f-cf99-4a09-a508-2f40bbf499fc,09e6fc7f-cf99-4a09-a508-2f40bbf499fc,CONTRIBUTION,,,e2c934d6-1044-4ab1-adea-90032f6706ed,CREDIT,Monthly financial contribution to LinuxServer ...,2023-11-01T02:10:36.091Z,ch_3O7Tt7BYycQg1OMf1WCOXQAQ,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,5ax8emk7-lnw54q7x-bow6gyvj-0o93zdrb,52019.0,LinuxServer,linuxserver,False,COLLECTIVE,Collective,lk9mbw7y-48r3zq35-5vn60ej5-lavnodgx,58014,MountainMaster,mountainmaster,False,INDIVIDUAL,Individual,zaxon793-jy8gpld0-ax96brkd-emwl5v04,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
612850,48d56130-dd23-4b45-abd6-d6faefe31c2d,48d56130-dd23-4b45-abd6-d6faefe31c2d,HOST_FEE,,,9966a38c-ef48-4b05-9aa1-77c083e0e8d8,DEBIT,Host Fee,2023-04-02T06:11:46.218Z,,False,False,,False,,Debit,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,0,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,3z8arxve-ymko60nw-8mbpgl5n-bj9w704d,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
215073,71adef05-0d87-4c15-9743-bc2c8afb34b3,71adef05-0d87-4c15-9743-bc2c8afb34b3,HOST_FEE_SHARE,,,90fd9f09-a508-4dec-a1dd-e8485548332f,DEBIT,Host Fee Share,2023-10-01T05:04:53.568Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,mlo94zn7-x08dpovd-50o6ewga-3vjbrky5,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
747376,8b4d2e72-77df-4afb-b3b9-1a8dfbb04ec5,8b4d2e72-77df-4afb-b3b9-1a8dfbb04ec5,HOST_FEE,,,88f87188-a896-4937-8fcf-bc8e56311b3f,CREDIT,Host Fee,2023-02-02T09:10:01.465Z,,False,False,,False,,Credit,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914,Logseq,logseq,False,COLLECTIVE,Collective,rxg0j35l-zkwm6vmb-xwx6voe9-8n47daby,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
481812,0700ae41-2c6f-43b3-882d-f930d1fc57e1,0700ae41-2c6f-43b3-882d-f930d1fc57e1,HOST_FEE,,,088d3cf5-8dd0-4b9c-b9e3-1c45a3449734,CREDIT,Host Fee,2023-06-01T23:07:01.394Z,,False,False,,False,,Credit,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914,Logseq,logseq,False,COLLECTIVE,Collective,zaxon793-jy8gpl78-nn96brkd-emwl5v04,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
708202,e4a2d1d9-5ed8-464f-88a4-a848320757cf,e4a2d1d9-5ed8-464f-88a4-a848320757cf,CONTRIBUTION,,,d2652a20-2d2b-4b39-a134-751e2afe0ced,CREDIT,Monthly financial contribution to vue (Backers),2023-03-01T02:21:27.210Z,ch_3MgfIGBYycQg1OMf0mTbXOIE,False,False,,False,,Credit,USD,200,Amount,USD,200,Amount,USD,164,Amount,USD,164,Amount,USD,-36,Amount,nmlo94zn-7x08dpob-awqewga3-vjbrky5m,903.0,vue,vuejs,False,COLLECTIVE,Collective,bvrgbk35-7l4x96ez-zoj6omew-a0jdyzn8,72377,Bright Rain,brightrain,False,ORGANIZATION,Organization,ax8emk7l-nw54q7gk-n8mqgyvj-0o93zdrb,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
38578,700a99db-92fb-4746-b871-fe718ea132ba,700a99db-92fb-4746-b871-fe718ea132ba,HOST_FEE,,,07e1022a-ac29-4091-bf8f-c14b76bfe5ee,DEBIT,Host Fee,2023-12-02T12:10:06.187Z,,False,False,,False,,Debit,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,0,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,vrgbk357-l4x96eyw-orkpomew-a0jdyzn8,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
332532,8b566b05-0736-4051-9c77-c7e7a38d2d27,8b566b05-0736-4051-9c77-c7e7a38d2d27,HOST_FEE_SHARE,,,18bc3eda-80c6-400a-83bf-dcc89e46578e,DEBIT,Host Fee Share,2023-08-02T10:05:11.285Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,3k0exgzn-m8yj643x-d9jp5wao-9r7b4dlv,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,
510149,f15a38ac-b902-4f31-8052-d1507578a6cb,f15a38ac-b902-4f31-8052-d1507578a6cb,CONTRIBUTION,,,4a4184d9-ca06-4da4-b1e8-662b4068dd8e,CREDIT,Monthly financial contribution to FOSSBilling ...,2023-05-27T10:43:12.945Z,9YH874806A416331X,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,434,Amount,USD,434,Amount,USD,-66,Amount,n4gx0bro-v5m96n0a-zeyqd8lk-3ey7jzwa,511783.0,FOSSBilling,fossbilling,False,COLLECTIVE,Collective,x8k03rey-d5agmq5w-5xyqlbwo-z7j4nxv9,154457,Henry Weismann,henry-weismann,False,INDIVIDUAL,Individual,vedj9wro-z3a56dl3-g4zp7blg-8x4m0ykn,True,True,True,TransactionPermissions,,PAYPAL,,SUBSCRIPTION,,,,,,,,,,,,,,,,
607736,9c5cd889-b4b3-493b-9036-ca82aeec8d53,9c5cd889-b4b3-493b-9036-ca82aeec8d53,HOST_FEE_SHARE,,,d5a63d2c-8f5b-4995-8632-1d683c9fab14,DEBIT,Host Fee Share,2023-04-02T11:09:14.348Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,gnxdwzj3-le5mpw8a-rmvpy8rv-bko04a97,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,


In [160]:
# count transactions by kind
df_platform.groupby(['kind']).size()

kind
ADDED_FUNDS                        2616
BALANCE_TRANSFER                    116
CONTRIBUTION                     193827
EXPENSE                            7582
HOST_FEE                         385894
HOST_FEE_SHARE                   192946
HOST_FEE_SHARE_DEBT               61479
PAYMENT_PROCESSOR_COVER             728
PAYMENT_PROCESSOR_DISPUTE_FEE        18
PLATFORM_TIP_DEBT                    11
PREPAID_PAYMENT_METHOD               13
dtype: int64

# Reconciling Stripe

TODO: Before we can do this, we need to get the merchantIDs.

In [161]:
# copy df_strip to df_s
df_stripe_reconciliation = df_stripe.copy()

In [162]:
df_stripe_reconciliation.head()

Unnamed: 0,id,Description,Seller Message,Amount,Amount Refunded,Currency,Converted Amount,Converted Amount Refunded,Fee,Taxes On Fee,Converted Currency,Mode,Status,Statement Descriptor,Customer ID,Customer Description,Captured,Card ID,PaymentIntent ID,Application Fee,Application ID,from (metadata),to (metadata)
0,ch_3OTXgHBYycQg1OMf0IfKN8Sw,Monthly financial contribution to Logseq (Back...,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI7wZ2sLFYmMPi,https://opencollective.com/sanket-sharma,True,pm_1OTXmVBYycQg1OMfzXUwCvAm,pi_3OTXgHBYycQg1OMf0PkaNI3M,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/sanket-sharma,https://opencollective.com/logseq
3,ch_3OTXVMBYycQg1OMf0qVZ5tjm,Financial contribution to OpenSCAD,Payment complete.,50.0,0.0,usd,50.0,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTXVpBYycQg1OMfien2j8Cr,pi_3OTXVMBYycQg1OMf02f4Vhyl,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/openscad
4,ch_3OTULSBYycQg1OMf10wlDceK,Monthly financial contribution to LibreELEC,Payment complete.,10.0,0.0,usd,10.0,0.0,1.24,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI4UZkic5DVDO1,https://opencollective.com/daniel-dolejska,True,pm_1OTULSBYycQg1OMf7Lt0BwEi,pi_3OTULSBYycQg1OMf1saV7WWG,0.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/daniel-dolejska,https://opencollective.com/libreelec
5,ch_3OTUG8BYycQg1OMf0FRuZULZ,Financial contribution to Spotube,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTUI5BYycQg1OMfAbkDmXhS,pi_3OTUG8BYycQg1OMf0AcGFa2r,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/spotube
7,ch_3OTT8EBYycQg1OMf0gR3PzrY,Financial contribution to .fmbot (.fmbot lifet...,Payment complete.,49.99,0.0,usd,49.99,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTT94BYycQg1OMf9AOHyAVx,pi_3OTT8EBYycQg1OMf0xdn31JV,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/fmbot


In [163]:
# df_platform where kind is CONTRIBUTION and paymentMethod.service is STRIPE
df_platform_stripe = df_platform[(df_platform['kind'] == 'CONTRIBUTION') & (df_platform['paymentMethod.service'] == 'STRIPE')]

In [164]:
df_platform_stripe.head()

Unnamed: 0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
3,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,CONTRIBUTION,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,CREDIT,Monthly financial contribution to Logseq (Back...,2023-12-31T22:46:58.403Z,ch_3OTXgHBYycQg1OMf0IfKN8Sw,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,n4gx0bro-v5m96nkd-3ay6d8lk-3ey7jzwa,744511,Sanket Sharma,sanket-sharma,False,INDIVIDUAL,Individual,8k03reyd-5agmq5r7-rlyplbwo-z7j4nxv9,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
7,f3fb4940-1026-4440-bd85-bc3c0cb439b0,f3fb4940-1026-4440-bd85-bc3c0cb439b0,CONTRIBUTION,,,9931763a-79b1-4141-923f-216db174f56e,CREDIT,Financial contribution to OpenSCAD,2023-12-31T22:29:44.179Z,ch_3OTXVMBYycQg1OMf0qVZ5tjm,False,False,,False,,Credit,USD,5000,Amount,USD,5000,Amount,USD,4825,Amount,USD,4825,Amount,USD,-175,Amount,rmvrwng4-kj03dpbk-ljzpz57o-yl9e8xba,132610.0,OpenSCAD,openscad,False,COLLECTIVE,Collective,n4gx0bro-v5m96n0l-878qd8lk-3ey7jzwa,467683,Holland Hopson,guest-8761eb54,False,INDIVIDUAL,Individual,ywz9j4av-god8pg8g-8ly6mr35-nxklb0e7,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
46,e438b64b-473b-405b-be1c-67c7cfc58fba,e438b64b-473b-405b-be1c-67c7cfc58fba,CONTRIBUTION,,,91d79b1b-e38d-4559-be03-ad4a5f5fa301,CREDIT,Monthly financial contribution to LibreELEC,2023-12-31T19:06:49.150Z,ch_3OTULSBYycQg1OMf10wlDceK,False,False,,False,,Credit,USD,1000,Amount,USD,1000,Amount,USD,926,Amount,USD,926,Amount,USD,-74,Amount,88rzownx-l9e50pxv-3vb6ymvb-dgk7j43a,204839.0,LibreELEC,libreelec,False,COLLECTIVE,Collective,e0adkjrv-8xwm69oy-zwl67z5n-4l93bgoy,744470,Daniel Dolejska,daniel-dolejska,False,INDIVIDUAL,Individual,eng0kzdy-vor4pzez-e7bpbma8-37xlw95j,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
55,8ee3d5b0-b031-4b06-b7ab-69a8419900c2,8ee3d5b0-b031-4b06-b7ab-69a8419900c2,CONTRIBUTION,,,0870251c-c506-421d-b969-9b98f303bb90,CREDIT,Financial contribution to Spotube,2023-12-31T19:03:20.581Z,ch_3OTUG8BYycQg1OMf0FRuZULZ,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,3kzxy4v0-7wlr6mvy-5m76mj9n-o8agdbe5,475166.0,Spotube,spotube,False,COLLECTIVE,Collective,zaxon793-jy8gpl0r-4k9pbrkd-emwl5v04,744469,Ignacho,guest-a4555b91,False,INDIVIDUAL,Individual,8rzownxl-9e50pxox-orkpymvb-dgk7j43a,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,
64,0c7db04c-bb75-44cf-9f4a-149c07b09fd4,0c7db04c-bb75-44cf-9f4a-149c07b09fd4,CONTRIBUTION,,,315eb508-ca18-462a-bf43-5689b25f729d,CREDIT,Financial contribution to .fmbot (.fmbot lifet...,2023-12-31T17:49:57.026Z,ch_3OTT8EBYycQg1OMf0gR3PzrY,False,False,,False,,Credit,USD,4999,Amount,USD,4999,Amount,USD,4824,Amount,USD,4824,Amount,USD,-175,Amount,ggnxdwzj-3le5mpwe-zrvpy8rv-bko04a97,126137.0,.fmbot,fmbot,False,COLLECTIVE,Collective,mywxoz34-09rl6k3m-nvyqvenb-dj7gk85a,744444,Ijipop,guest-44e5fa66,False,INDIVIDUAL,Individual,vrgbk357-l4x96ey9-je5pomew-a0jdyzn8,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,


In [165]:
# for each row in df_stripe, find the corresponding row in df_platform, match df_stripe['id'] with df_platform['merchantId']
# if the row exists, add the platform transaction id to df_stripe in a new column called platform_transaction_id
# if the row does not exist, set the platform_transaction_id to None

# Merge the two dataframes on the 'id' from df_stripe and 'merchantId' from df_platform
merged_df = pd.merge(df_stripe_reconciliation, df_platform[['merchantId', 'id']], left_on='id', right_on='merchantId', how='left', suffixes=('', '_platform_transaction'))

# Drop the extra 'merchantId' column from the merge
merged_df.drop(columns='merchantId', inplace=True)

# Assign the resulting dataframe back to df_stripe
df_stripe_reconciliation = merged_df

In [175]:
# count the number of Stripe transactions that have a id_platform_transaction
print(f"Stripe transactions that have a corresponding platform transaction: {len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].notnull()])}")

# count the number of Stripe transactions that do not have a id_platform_transaction
print(f"Stripe transactions without a corresponding platform transaction: {len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].isnull()])}")

# print the percentage of Stripe transactions that have a corresponding platform transaction
print(f"Percentage of Stripe transactions that have a corresponding platform transaction: {round(len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].notnull()])/len(df_stripe_reconciliation)*100, 2)}%")

Stripe transactions that have a corresponding platform transaction: 131588
Stripe transactions without a corresponding platform transaction: 432
Percentage of Stripe transactions that have a corresponding platform transaction: 99.67%
