In [1]:
import pandas as pd
import glob
import requests
import json
import time

# Project notes

- Identify transactions that are missing from OC that are on any of the accounts (meaning banks, stripe, wise, paypal)
- Identify any transactions on the platform that are missing from the accounts
- Prep a balance/p&l with minimal wrangling

Project steps:

- Clarify project intention and goal (fill in project assessment in Make OC)
- Map out data 
- Write scripts 
- Analyze and prepare accounts

# Load config

In [2]:
# host variables
host = 'opensource'
dateFrom = '2022-12-31T22:59:59.999Z'
dateTo = '2023-12-31T22:59:59.999Z'
year = '2023'

In [3]:
# load API tokens from .env file
import os
from dotenv import load_dotenv
load_dotenv()
octoken = os.getenv('OCTOKEN')
wisetoken = os.getenv('WISETOKEN')

# set the profile id of the target Wise profile
wiseprofile = os.getenv('WISEPROFILE')

In [4]:
# Reload the data or not - set these to false after the first run to avoid reloading
reloadOC = False
reloadWise = False
reloadStripe = True

In [5]:
import locale
locale.getpreferredencoding()

'utf-8'

# Get data

In [6]:
# load transactions from open collective or from local file

def fetch_transactions():
    url = f"https://opencollective.com/api/graphql/v2?personalToken={octoken}"

    # Your GraphQL query
    query = """
    query TransactionsTable(
        $hostAccount: AccountReferenceInput,
        $limit: Int!,
        $offset: Int!,
        $type: TransactionType,
        $paymentMethodType: [PaymentMethodType],
        $dateFrom: DateTime,
        $dateTo: DateTime,
        $kind: [TransactionKind],
        $includeIncognitoTransactions: Boolean,
        $includeGiftCardTransactions: Boolean,
        $includeChildrenTransactions: Boolean,
        $virtualCard: [VirtualCardReferenceInput],
        $orderBy: ChronologicalOrderInput,
        $group: String,
        $includeHost: Boolean,
        $expense: ExpenseReferenceInput,
        $order: OrderReferenceInput
      ) {
        transactions(
          host: $hostAccount
          limit: $limit
          offset: $offset
          type: $type
          paymentMethodType: $paymentMethodType
          dateFrom: $dateFrom
          dateTo: $dateTo
          kind: $kind
          includeIncognitoTransactions: $includeIncognitoTransactions
          includeGiftCardTransactions: $includeGiftCardTransactions
          includeChildrenTransactions: $includeChildrenTransactions
          includeDebts: true
          virtualCard: $virtualCard
          orderBy: $orderBy
          group: $group
          includeHost: $includeHost
          expense: $expense
          order: $order
        ) {
          ...TransactionsTableQueryCollectionFragment
          __typename
        }
      }
      
      fragment TransactionsTableQueryCollectionFragment on TransactionCollection {
        totalCount
        offset
        limit
        nodes {
          id
          uuid
          kind
          amount {
            currency
            valueInCents
            __typename
          }
          amountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          netAmount {
            currency
            valueInCents
            __typename
          }
          netAmountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          paymentProcessorFee {
            currency
            valueInCents
            __typename
          }
          paymentMethod {
            name
            service
            sourcePaymentMethod {
              id
              name
              service
              __typename
            }
            type
          }
          payoutMethod {
            type
            name
            type
          }
          giftCardEmitterAccount {  
            id
            name
            slug
            __typename
          }
          group
          type
          description
          createdAt
          merchantId
          isRefunded
          isRefund
          refundTransaction {
            id
          }
          isOrderRejected
          account {
            ... on AccountWithParent {
              parent {
                id
                slug
                name
              }
            }
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          oppositeAccount {
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          expense {
            id
            type
            description
            invoiceInfo 
            tags
            payee {
              id
              name
              slug
              type
              __typename
            }
            __typename
          }
          permissions {
            id
            canRefund
            canDownloadInvoice
            canReject
            __typename
          }
          __typename
        }
        __typename
      }
    """

    headers = {
        "Authorization": f"Bearer {octoken}",
        "Content-Type": "application/json"
    }

    # Initial variables setup
    variables = {
        "hostAccount": {"slug": host},
        "includeIncognitoTransactions": True,
        "includeChildrenTransactions": True,
        "limit": 10000,  # Adjust if necessary but keep a sensible number to avoid server strain
        "offset": 0,  # Will be adjusted for each subsequent request
        "dateFrom": dateFrom,
        "dateTo": dateTo,
        "orderBy": {"field": "CREATED_AT", "direction": "DESC"},
        "includeHost": True
    }

    all_transactions = []  # To hold all transactions
    while True:
        
        # Try catch around json_data = response.json() to avoid errors

        max_retries = 10  # Setting the maximum number of retries
        retries = 0  # Initial retry count
        response = ''

        while retries < max_retries:
            try:
                # Make the HTTP request
                response = requests.post(
                    url, json={'query': query, 'variables': variables}, headers=headers)
                json_data = response.json()
                break 
            except Exception as e:
                print("Request failed. Attempt:", retries + 1)
                print(e)
                retries += 1 
                time.sleep(10)

        if retries == max_retries:
            print("Maximum retry attempts reached. Exiting.")
        # Extract data
        transactions = json_data['data']['transactions']['nodes']
        all_transactions.extend(transactions)

        # Pagination: Update offset
        variables['offset'] += variables['limit']

        # Check if all transactions are fetched
        if len(all_transactions) >= json_data['data']['transactions']['totalCount']:
            break
        
        # print progress
        print(f'Fetched {len(all_transactions)} transactions')
        # sleep for 10 seconds to avoid server strain
        time.sleep(5)

    return all_transactions

# if account has property parent, replace account with parent
def replace_account_with_parent(transaction):
  if 'parent' in transaction['account']:
    transaction['account'] = transaction['account']['parent']
  return transaction

# apply the function to all transactions
def post_process_transactions(all_transactions):
  return list(map(replace_account_with_parent, all_transactions))

if reloadOC:
  all_transactions = fetch_transactions()
  # dump all transactions to a json file
  post_process_transactions(all_transactions)
  with open(f'data/{host}/{year}/{host}_{year}_all_platform_transactions.json', 'w') as f:
      json.dump(all_transactions, f, indent=2)
  # convert the json file to a dataframe
  df_platform_transactions = pd.json_normalize(all_transactions)
  # save dataframe as pickle
  df_platform_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')
else:
  df_platform_transactions = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')


Fetched 10000 transactions
Fetched 20000 transactions
Fetched 30000 transactions
Fetched 40000 transactions
Fetched 50000 transactions
Fetched 60000 transactions
Fetched 70000 transactions
Fetched 80000 transactions
Fetched 90000 transactions
Fetched 100000 transactions
Fetched 110000 transactions
Fetched 120000 transactions
Fetched 130000 transactions
Fetched 140000 transactions
Fetched 150000 transactions
Fetched 160000 transactions
Fetched 170000 transactions
Fetched 180000 transactions
Fetched 190000 transactions
Fetched 200000 transactions
Fetched 210000 transactions
Fetched 220000 transactions
Fetched 230000 transactions
Fetched 240000 transactions
Fetched 250000 transactions
Fetched 260000 transactions
Fetched 270000 transactions
Fetched 280000 transactions
Fetched 290000 transactions
Fetched 300000 transactions
Fetched 310000 transactions
Fetched 320000 transactions
Fetched 330000 transactions
Request failed. Attempt: 1
Expecting value: line 1 column 1 (char 0)
Fetched 340000 t

In [8]:
# load transactions from wise

# Define the base URL for the Wise API
BASE_URL = 'https://api.transferwise.com/v1/transfers'

def fetch_all_transfers(profile_id=None, status=None, source_currency=None, 
                        target_currency=None, created_date_start=None, 
                        created_date_end=None, limit=100):
    headers = {
        'Authorization': f'Bearer {wisetoken}'
    }
    offset = 0
    all_transfers = []

    while True:
        # Prepare query parameters
        query_params = {'limit': limit, 'offset': offset}
        if profile_id:
            query_params['profile'] = profile_id
        if status:
            query_params['status'] = status
        if source_currency:
            query_params['sourceCurrency'] = source_currency
        if target_currency:
            query_params['targetCurrency'] = target_currency
        if created_date_start:
            query_params['createdDateStart'] = created_date_start
        if created_date_end:
            query_params['createdDateEnd'] = created_date_end

        # Make the GET request to the Wise API
        response = requests.get(BASE_URL, headers=headers, params=query_params)
        data = response.json()

        print(data)

        # Break if there are no more results
        if not data:
            break

        for transfer in data:
            all_transfers.append({
                'id': transfer['id'],
                'user': transfer['user'],
                'targetAccount': transfer['targetAccount'],
                'sourceAccount': transfer['sourceAccount'],
                'quote': transfer['quote'],
                'quoteUuid': transfer['quoteUuid'],
                'status': transfer['status'],
                'reference': transfer['reference'],
                'rate': transfer['rate'],
                'created': transfer['created'],
                'business': transfer['business'],
                'transferRequest': transfer['transferRequest'],
                'details.reference': transfer.get('details', {}).get('reference'),
                'hasActiveIssues': transfer['hasActiveIssues'],
                'sourceCurrency': transfer['sourceCurrency'],
                'sourceValue': transfer['sourceValue'],
                'targetCurrency': transfer['targetCurrency'],
                'targetValue': transfer['targetValue'],
                'customerTransactionId': transfer['customerTransactionId']
            })

        # Update the offset for the next iteration
        offset += limit

    return all_transfers

if reloadWise:
    transfers = fetch_all_transfers(profile_id=wiseprofile, created_date_start=dateFrom, created_date_end=dateTo)

    # create transfers dataframe
    df_wise_transactions = pd.DataFrame()

    for transfer in transfers:
        # add to dataframe with concat
        df_wise_transactions = pd.concat([df_wise_transactions, pd.DataFrame([transfer])])

    # save dataframe as pickle
    df_wise_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

In [9]:
# load transactions from stripe export
if reloadStripe:
    df_stripe_in = pd.read_csv(f'data/{host}/{year}/{host}_{year}_stripe.csv')
    # ensure only relevant columns
    df_stripe_in = df_stripe_in[[
    'id',
    'Created date (UTC)',
    'Description',
    'Seller Message',
    'Amount',
    'Amount Refunded',
    'Currency',
    'Converted Amount',
    'Converted Amount Refunded',
    'Fee',
    'Taxes On Fee',
    'Converted Currency',
    'Mode',
    'Status',
    'Statement Descriptor',
    'Customer ID',
    'Customer Description',
    'Captured',
    'Card ID',
    'PaymentIntent ID',
    'Application Fee',
    'Application ID',
    'from (metadata)',
    'to (metadata)'
    ]]
    df_stripe_in.to_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')
    


  df_stripe_in = pd.read_csv(f'data/{host}/{year}/{host}_{year}_stripe.csv')


In [10]:
# load open collective data
df_platform = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')

# load stripe data
df_stripe = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')

# load wise data
df_wise = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

# load bank data
dir = f'data/{host}/{year}/oscbank_1'
df_bank_1 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_1 = df_bank_1.dropna(axis=1, how='all')

dir = f'data/{host}/{year}/oscbank_2'
df_bank_2 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_2 = df_bank_2.dropna(axis=1, how='all')

# Post process data

In [11]:
# post process

# remove failed and pending transactions from Stripe
df_stripe = df_stripe[df_stripe['Status'] != 'Failed']
df_stripe = df_stripe[df_stripe['Status'] != 'Pending']

# remove cancelled from Wise
df_wise = df_wise[df_wise['status'] != 'cancelled']

# Inspect transactions dataframe

In [12]:
# get unique transaction kind
transaction_kinds = df_platform_transactions['kind'].unique()

# sample 2 transactions for each kind
df_sample = df_platform_transactions.groupby('kind').apply(lambda x: x.sample(2))

df_sample

Unnamed: 0_level_0,Unnamed: 1_level_0,id,uuid,kind,paymentMethod,payoutMethod,giftCardEmitterAccount,group,type,description,createdAt,...,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,payoutMethod.type,payoutMethod.name,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
kind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ADDED_FUNDS,596777,2dc4c418-f5c5-47a7-88f3-0f84f2a3b357,2dc4c418-f5c5-47a7-88f3-0f84f2a3b357,ADDED_FUNDS,,,,5ad7b31e-fd14-4629-affc-c0af2e46eb37,CREDIT,GitHub Sponsors payment,2023-04-08T18:10:55.771Z,...,,,,,,,,,,
ADDED_FUNDS,667897,595e994a-de7e-4a86-89d8-09c148ea7456,595e994a-de7e-4a86-89d8-09c148ea7456,ADDED_FUNDS,,,,3d476175-87e9-45bf-b7c9-c8e136e011b2,CREDIT,GitHub Sponsors payment,2023-03-07T00:39:19.520Z,...,,,,,,,,,,
BALANCE_TRANSFER,655320,c82dfdbd-4939-4a9e-ad2b-d3ed42ca190e,c82dfdbd-4939-4a9e-ad2b-d3ed42ca190e,BALANCE_TRANSFER,,,,abec7d44-fac7-49ea-84f4-35ef44f32332,CREDIT,Financial contribution to IFC.js,2023-03-19T19:34:57.662Z,...,,,,,,,,,,
BALANCE_TRANSFER,799996,3fa54306-3edd-4178-bfd9-8e8aba7de49b,3fa54306-3edd-4178-bfd9-8e8aba7de49b,BALANCE_TRANSFER,,,,c7fe1cdc-a29e-45ab-a9a3-ef52aa80b6f9,DEBIT,Financial contribution to Open Source Collective,2023-01-10T13:22:49.613Z,...,,,,,,,,,,
CONTRIBUTION,618017,e8a571a5-187c-48be-aec0-6ccc9a6c9b51,e8a571a5-187c-48be-aec0-6ccc9a6c9b51,CONTRIBUTION,,,,992f24ba-26a9-4021-a62c-6fe67b046e7e,CREDIT,Monthly financial contribution to JHipster (Ba...,2023-04-01T23:10:41.550Z,...,,,,,,,,,,
CONTRIBUTION,670346,965205dd-08d8-498c-a4ef-8d3466d894cd,965205dd-08d8-498c-a4ef-8d3466d894cd,CONTRIBUTION,,,,2083135a-baea-47a6-9cb3-f5ab32a9ccd4,CREDIT,Monthly financial contribution to Logseq (Back...,2023-03-05T11:48:24.595Z,...,,,,,,,,,,
EXPENSE,643441,76680a4d-e74c-47a5-b836-42283f914ff9,76680a4d-e74c-47a5-b836-42283f914ff9,EXPENSE,,,,676fb308-a0b7-4214-bb3f-34eec2a19c7c,DEBIT,Amazon Web Services (Dec 2021 - March 2023),2023-03-30T17:05:03.279Z,...,INDIVIDUAL,Individual,Expense,,PAYPAL,,,,,
EXPENSE,510628,88bc4d90-f907-4f69-bdf4-ea2087b1fdb0,88bc4d90-f907-4f69-bdf4-ea2087b1fdb0,EXPENSE,,,,3e84c951-07b3-41db-a81e-d4a1c67781a7,DEBIT,AnkiDroid Development - Feb 2023,2023-05-26T15:01:14.464Z,...,INDIVIDUAL,Individual,Expense,,PAYPAL,,,,,
HOST_FEE,110435,5ef59acc-3fed-46b3-9f74-e0f80844f50e,5ef59acc-3fed-46b3-9f74-e0f80844f50e,HOST_FEE,,,,d233cfda-e48c-4e0e-989c-5a3cbdc22856,CREDIT,Host Fee,2023-11-02T15:18:48.502Z,...,,,,,,,,,,
HOST_FEE,517140,bf902c22-7d87-42d6-aa8c-4138e06f95ac,bf902c22-7d87-42d6-aa8c-4138e06f95ac,HOST_FEE,,,,d7cc5288-0b8c-4e18-8a15-da67f4113c6e,CREDIT,Host Fee,2023-05-21T10:52:24.314Z,...,,,,,,,,,,


In [13]:
# show all columns
pd.set_option('display.max_columns', None)
df_platform.sample(10)

Unnamed: 0,id,uuid,kind,paymentMethod,payoutMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,payoutMethod.type,payoutMethod.name,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
657467,1d378403-fdc4-4440-ab97-1254d1dbca22,1d378403-fdc4-4440-ab97-1254d1dbca22,HOST_FEE_SHARE,,,,826b4b89-b2fa-43e2-ac18-a872a131322f,DEBIT,Host Fee Share,2023-03-17T10:31:18.771Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,gnxdwzj3-le5mpwmw-bdvqy8rv-bko04a97,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
837507,129f8cdb-a745-4fce-b508-2e41f143c025,129f8cdb-a745-4fce-b508-2e41f143c025,HOST_FEE,,,,97449dad-76dc-48f0-b8c8-46f109132693,CREDIT,Host Fee,2023-01-01T05:04:28.841Z,,False,False,,False,,Credit,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,4rxg0j35-lzkwm6v3-v5xpvoe9-8n47daby,139424,NativeScript,nativescript,False,COLLECTIVE,Collective,mywxoz34-09rl6k9m-mjbqvenb-dj7gk85a,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
45022,ff31fed8-9cdd-476c-b65c-7959d02986be,ff31fed8-9cdd-476c-b65c-7959d02986be,HOST_FEE,,,,0922f837-9878-4fee-952d-6e34c78f5eac,CREDIT,Host Fee,2023-12-02T07:03:18.632Z,,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,500,Amount,USD,500,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,7mywxoz3-409rl6kg-wgnpvenb-dj7gk85a,13868,Framework7,framework7,False,COLLECTIVE,Collective,3kzxy4v0-7wlr6m0n-xympmj9n-o8agdbe5,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
397500,7e7ab7b6-cb7a-47c9-bfc7-ab4275283650,7e7ab7b6-cb7a-47c9-bfc7-ab4275283650,CONTRIBUTION,,,,b11d0710-4cc7-43fe-9d3b-32628f527170,CREDIT,Monthly financial contribution to Apple M1 bui...,2023-07-03T10:02:48.627Z,0H2728403H500332J,False,False,,False,,Credit,USD,1000,Amount,USD,1000,Amount,USD,901,Amount,USD,901,Amount,USD,-99,Amount,ggnxdwzj-3le5mpw7-4kjqy8rv-bko04a97,,IfcOpenShell,opensourcebim,,,,zaxon793-jy8gpl3b-3x9pbrkd-emwl5v04,539821,StefStap,stefstap,False,INDIVIDUAL,Individual,vedj9wro-z3a56dlj-5eap7blg-8x4m0ykn,True,True,True,TransactionPermissions,,PAYPAL,,SUBSCRIPTION,,,,,,,,,,,,,,,,,,
516241,c23e3901-d627-4e9b-b423-2cf0d2131233,c23e3901-d627-4e9b-b423-2cf0d2131233,HOST_FEE_SHARE_DEBT,,,,e6cf1eeb-33dc-43ec-92b4-43acbc7e9594,CREDIT,Host Fee Share owed to Open Collective,2023-05-22T11:03:37.170Z,,False,False,,False,,Credit,USD,10,Amount,USD,10,Amount,USD,10,Amount,USD,10,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,mywxoz34-09rl6ka0-jl4pvenb-dj7gk85a,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
636595,d9e3e0ff-fc96-4861-b0be-becf7f9885ca,d9e3e0ff-fc96-4861-b0be-becf7f9885ca,CONTRIBUTION,,,,ae4b726e-4883-47bd-81de-2f816838bc71,CREDIT,Monthly financial contribution to webpack (Bac...,2023-04-01T04:05:24.146Z,ch_3MrvgqBYycQg1OMf0KsK3jDc,False,False,,False,,Credit,USD,200,Amount,USD,200,Amount,USD,162,Amount,USD,162,Amount,USD,-38,Amount,wov349mr-wgz75lpy-mzpa08dj-eybknoxd,302.0,webpack,webpack,False,COLLECTIVE,Collective,8a47byg9-nxozdp8y-5r7qmjlv-03rek5w8,26504,Flip a coin,coin-flip-flip-a-coin,False,INDIVIDUAL,Individual,e0adkjrv-8xwm69vb-b9kq7z5n-4l93bgoy,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
431524,25c4e78a-ae1a-49eb-b056-8555149581ad,25c4e78a-ae1a-49eb-b056-8555149581ad,HOST_FEE_SHARE,,,,43ff3767-e04b-4aa8-8d56-943accb0e8b9,DEBIT,Host Fee Share,2023-07-01T04:05:17.762Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,8k03reyd-5agmq5l9-37oplbwo-z7j4nxv9,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
224174,7c1708b9-2c13-4241-95b7-f49f3d367831,7c1708b9-2c13-4241-95b7-f49f3d367831,HOST_FEE,,,,3d7b6971-3f05-405c-a538-867cc47fc39f,DEBIT,Host Fee,2023-09-29T23:59:31.937Z,,False,False,,False,,Debit,USD,-690,Amount,USD,-690,Amount,USD,-690,Amount,USD,-690,Amount,USD,0,Amount,3k0exgzn-m8yj64rm-nemp5wao-9r7b4dlv,501660.0,GitLink,gitlink,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,gnxdwzj3-le5mpwgk-xeeqy8rv-bko04a97,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
412558,f109fc7f-3cd6-486b-b466-eb6f4e5fde40,f109fc7f-3cd6-486b-b466-eb6f4e5fde40,HOST_FEE,,,,d4a7c07d-55d2-437c-937f-8062e2c6f100,CREDIT,Host Fee,2023-07-02T00:05:52.297Z,,False,False,,False,,Credit,USD,20,Amount,USD,20,Amount,USD,20,Amount,USD,20,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,rmvrwng4-kj03dpbm-g5aqz57o-yl9e8xba,16458,Simple DNSCrypt,simplednscrypt,False,COLLECTIVE,Collective,vrgbk357-l4x96ele-5zjqomew-a0jdyzn8,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
140425,de010e73-2a56-4b3f-b6ba-42a974064927,de010e73-2a56-4b3f-b6ba-42a974064927,HOST_FEE,,,,75ea54ab-280d-4121-870c-4b03faadd6b1,DEBIT,Host Fee,2023-11-01T09:02:17.225Z,,False,False,,False,,Debit,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,0,Amount,ggnxdwzj-3le5mpw4-ee9qy8rv-bko04a97,143661.0,Coc.nvim,cocnvim,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,mywxoz34-09rl6keb-3ol6venb-dj7gk85a,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,


In [14]:
# count transactions by kind
df_platform.groupby(['kind']).size()

kind
ADDED_FUNDS                        2616
BALANCE_TRANSFER                    116
CONTRIBUTION                     193827
EXPENSE                            7582
HOST_FEE                         385894
HOST_FEE_SHARE                   192946
HOST_FEE_SHARE_DEBT               61479
PAYMENT_PROCESSOR_COVER             728
PAYMENT_PROCESSOR_DISPUTE_FEE        18
PLATFORM_TIP_DEBT                    11
PREPAID_PAYMENT_METHOD               13
dtype: int64

# Reconciling Stripe

## Automatic reconciliation by merchantId

In [15]:
# copy df_strip to df_s
df_stripe_reconciliation = df_stripe.copy()

In [16]:
df_stripe_reconciliation.head()

Unnamed: 0,id,Created date (UTC),Description,Seller Message,Amount,Amount Refunded,Currency,Converted Amount,Converted Amount Refunded,Fee,Taxes On Fee,Converted Currency,Mode,Status,Statement Descriptor,Customer ID,Customer Description,Captured,Card ID,PaymentIntent ID,Application Fee,Application ID,from (metadata),to (metadata)
0,ch_3OTXgHBYycQg1OMf0IfKN8Sw,2023-12-31 22:46:55,Monthly financial contribution to Logseq (Back...,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI7wZ2sLFYmMPi,https://opencollective.com/sanket-sharma,True,pm_1OTXmVBYycQg1OMfzXUwCvAm,pi_3OTXgHBYycQg1OMf0PkaNI3M,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/sanket-sharma,https://opencollective.com/logseq
3,ch_3OTXVMBYycQg1OMf0qVZ5tjm,2023-12-31 22:29:41,Financial contribution to OpenSCAD,Payment complete.,50.0,0.0,usd,50.0,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTXVpBYycQg1OMfien2j8Cr,pi_3OTXVMBYycQg1OMf02f4Vhyl,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/openscad
4,ch_3OTULSBYycQg1OMf10wlDceK,2023-12-31 19:06:47,Monthly financial contribution to LibreELEC,Payment complete.,10.0,0.0,usd,10.0,0.0,1.24,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI4UZkic5DVDO1,https://opencollective.com/daniel-dolejska,True,pm_1OTULSBYycQg1OMf7Lt0BwEi,pi_3OTULSBYycQg1OMf1saV7WWG,0.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/daniel-dolejska,https://opencollective.com/libreelec
5,ch_3OTUG8BYycQg1OMf0FRuZULZ,2023-12-31 19:03:18,Financial contribution to Spotube,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTUI5BYycQg1OMfAbkDmXhS,pi_3OTUG8BYycQg1OMf0AcGFa2r,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/spotube
7,ch_3OTT8EBYycQg1OMf0gR3PzrY,2023-12-31 17:49:54,Financial contribution to .fmbot (.fmbot lifet...,Payment complete.,49.99,0.0,usd,49.99,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTT94BYycQg1OMf9AOHyAVx,pi_3OTT8EBYycQg1OMf0xdn31JV,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/fmbot


In [17]:
# df_platform where kind is CONTRIBUTION and paymentMethod.service is STRIPE
df_platform_stripe = df_platform[(df_platform['kind'] == 'CONTRIBUTION') & (df_platform['paymentMethod.service'] == 'STRIPE')]

In [18]:
df_platform_stripe.head()

Unnamed: 0,id,uuid,kind,paymentMethod,payoutMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,payoutMethod.type,payoutMethod.name,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
3,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,CONTRIBUTION,,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,CREDIT,Monthly financial contribution to Logseq (Back...,2023-12-31T22:46:58.403Z,ch_3OTXgHBYycQg1OMf0IfKN8Sw,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,n4gx0bro-v5m96nkd-3ay6d8lk-3ey7jzwa,744511,Sanket Sharma,sanket-sharma,False,INDIVIDUAL,Individual,8k03reyd-5agmq5r7-rlyplbwo-z7j4nxv9,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
7,f3fb4940-1026-4440-bd85-bc3c0cb439b0,f3fb4940-1026-4440-bd85-bc3c0cb439b0,CONTRIBUTION,,,,9931763a-79b1-4141-923f-216db174f56e,CREDIT,Financial contribution to OpenSCAD,2023-12-31T22:29:44.179Z,ch_3OTXVMBYycQg1OMf0qVZ5tjm,False,False,,False,,Credit,USD,5000,Amount,USD,5000,Amount,USD,4825,Amount,USD,4825,Amount,USD,-175,Amount,rmvrwng4-kj03dpbk-ljzpz57o-yl9e8xba,132610.0,OpenSCAD,openscad,False,COLLECTIVE,Collective,n4gx0bro-v5m96n0l-878qd8lk-3ey7jzwa,467683,Holland Hopson,guest-8761eb54,False,INDIVIDUAL,Individual,ywz9j4av-god8pg8g-8ly6mr35-nxklb0e7,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
46,e438b64b-473b-405b-be1c-67c7cfc58fba,e438b64b-473b-405b-be1c-67c7cfc58fba,CONTRIBUTION,,,,91d79b1b-e38d-4559-be03-ad4a5f5fa301,CREDIT,Monthly financial contribution to LibreELEC,2023-12-31T19:06:49.150Z,ch_3OTULSBYycQg1OMf10wlDceK,False,False,,False,,Credit,USD,1000,Amount,USD,1000,Amount,USD,926,Amount,USD,926,Amount,USD,-74,Amount,88rzownx-l9e50pxv-3vb6ymvb-dgk7j43a,204839.0,LibreELEC,libreelec,False,COLLECTIVE,Collective,e0adkjrv-8xwm69oy-zwl67z5n-4l93bgoy,744470,Daniel Dolejska,daniel-dolejska,False,INDIVIDUAL,Individual,eng0kzdy-vor4pzez-e7bpbma8-37xlw95j,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
55,8ee3d5b0-b031-4b06-b7ab-69a8419900c2,8ee3d5b0-b031-4b06-b7ab-69a8419900c2,CONTRIBUTION,,,,0870251c-c506-421d-b969-9b98f303bb90,CREDIT,Financial contribution to Spotube,2023-12-31T19:03:20.581Z,ch_3OTUG8BYycQg1OMf0FRuZULZ,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,3kzxy4v0-7wlr6mvy-5m76mj9n-o8agdbe5,475166.0,Spotube,spotube,False,COLLECTIVE,Collective,zaxon793-jy8gpl0r-4k9pbrkd-emwl5v04,744469,Ignacho,guest-a4555b91,False,INDIVIDUAL,Individual,8rzownxl-9e50pxox-orkpymvb-dgk7j43a,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
64,0c7db04c-bb75-44cf-9f4a-149c07b09fd4,0c7db04c-bb75-44cf-9f4a-149c07b09fd4,CONTRIBUTION,,,,315eb508-ca18-462a-bf43-5689b25f729d,CREDIT,Financial contribution to .fmbot (.fmbot lifet...,2023-12-31T17:49:57.026Z,ch_3OTT8EBYycQg1OMf0gR3PzrY,False,False,,False,,Credit,USD,4999,Amount,USD,4999,Amount,USD,4824,Amount,USD,4824,Amount,USD,-175,Amount,ggnxdwzj-3le5mpwe-zrvpy8rv-bko04a97,126137.0,.fmbot,fmbot,False,COLLECTIVE,Collective,mywxoz34-09rl6k3m-nvyqvenb-dj7gk85a,744444,Ijipop,guest-44e5fa66,False,INDIVIDUAL,Individual,vrgbk357-l4x96ey9-je5pomew-a0jdyzn8,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,


In [19]:
# for each row in df_stripe, find the corresponding row in df_platform, match df_stripe['id'] with df_platform['merchantId']
# if the row exists, add the platform transaction id to df_stripe in a new column called platform_transaction_id
# if the row does not exist, set the platform_transaction_id to None

# Merge the two dataframes on the 'id' from df_stripe and 'merchantId' from df_platform
merged_df = pd.merge(df_stripe_reconciliation, df_platform[['merchantId', 'id']], left_on='id', right_on='merchantId', how='left', suffixes=('', '_platform_transaction'))

# Drop the extra 'merchantId' column from the merge
merged_df.drop(columns='merchantId', inplace=True)

# Assign the resulting dataframe back to df_stripe
df_stripe_reconciliation = merged_df

In [20]:
# count the number of Stripe transactions that have a id_platform_transaction
print(f"Stripe transactions that have a corresponding platform transaction: {len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].notnull()])}")

# count the number of Stripe transactions that do not have a id_platform_transaction
print(f"Stripe transactions without a corresponding platform transaction: {len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].isnull()])}")

# print the percentage of Stripe transactions that have a corresponding platform transaction
print(f"Percentage of Stripe transactions that have a corresponding platform transaction: {round(len(df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].notnull()])/len(df_stripe_reconciliation)*100, 2)}%")

Stripe transactions that have a corresponding platform transaction: 131588
Stripe transactions without a corresponding platform transaction: 432
Percentage of Stripe transactions that have a corresponding platform transaction: 99.67%


## Reconciling remaining Stripe transactions

In [21]:
df_stripe_remainder = df_stripe_reconciliation[df_stripe_reconciliation['id_platform_transaction'].isnull()]

In [22]:
df_stripe_remainder.head()

Unnamed: 0,id,Created date (UTC),Description,Seller Message,Amount,Amount Refunded,Currency,Converted Amount,Converted Amount Refunded,Fee,Taxes On Fee,Converted Currency,Mode,Status,Statement Descriptor,Customer ID,Customer Description,Captured,Card ID,PaymentIntent ID,Application Fee,Application ID,from (metadata),to (metadata),id_platform_transaction
116,ch_3OSjH1BYycQg1OMf1SrSebC3,2023-12-29 16:51:06,Contribution to manjaro (US),Payment complete.,1.0,0.0,usd,1.0,0.0,0.33,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OSjH3BYycQg1OMfAPu0eks5,pi_3OSjH1BYycQg1OMf1hLSOPbZ,,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/manjaro-us,
117,ch_3OSjH1BYycQg1OMf03ruobeK,2023-12-29 16:51:06,Contribution to manjaro (US),Payment complete.,1.0,0.0,usd,1.0,0.0,0.33,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OSjH3BYycQg1OMfOEXlem38,pi_3OSjH1BYycQg1OMf0l0hNQLu,,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/manjaro-us,
118,ch_3OSjH1BYycQg1OMf0fkywX7k,2023-12-29 16:51:06,Contribution to manjaro (US),Payment complete.,1.0,0.0,usd,1.0,0.0,0.33,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OSjH3BYycQg1OMftpAiBEfm,pi_3OSjH1BYycQg1OMf0QvSjaGu,,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/manjaro-us,
119,ch_3OSjH0BYycQg1OMf04HatfLy,2023-12-29 16:51:06,Contribution to manjaro (US),Payment complete.,1.0,0.0,usd,1.0,0.0,0.33,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OSjH3BYycQg1OMfqhqMfElP,pi_3OSjH0BYycQg1OMf0xkJN2s7,,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/manjaro-us,
175,ch_3OSN05BYycQg1OMf1xfVvxkZ,2023-12-28 17:04:15,Contribution to manjaro (US),Payment complete.,1.0,0.0,usd,1.0,0.0,0.33,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OSN0EBYycQg1OMf6jMM1Jzf,pi_3OSN05BYycQg1OMf13FUXY3w,,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/manjaro-us,


In [23]:
# count df_stripe_remainder by 'Status'
df_stripe_remainder.groupby(['Status']).size()

Status
Paid        143
Refunded    289
dtype: int64

In [24]:
# count df_stripe_remainder by 'Seller Message'
df_stripe_remainder.groupby(['Seller Message']).size()

Seller Message
One of your rules allowed this payment, overriding Stripe's risk evaluation and any address or CVC checks.     41
Payment complete.                                                                                             376
Stripe evaluated this payment as having elevated risk, and placed it in your manual review queue.              15
dtype: int64

In [25]:
# sum amount of df_stripe_remainder by status
df_stripe_remainder.groupby(['Status'])['Amount'].sum()

Status
Paid        3907.0
Refunded     293.0
Name: Amount, dtype: float64

In [26]:
# count df_stripe_remainder by 'Created date (UTC)' date only
# create a new column with the date only
df_stripe_remainder['Created date'] = pd.to_datetime(df_stripe_remainder['Created date (UTC)']).dt.date

df_stripe_remainder.groupby(['Created date']).size()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_stripe_remainder['Created date'] = pd.to_datetime(df_stripe_remainder['Created date (UTC)']).dt.date


Created date
2023-01-25     1
2023-03-13    10
2023-03-14    15
2023-03-15     2
2023-03-16     7
              ..
2023-06-29     8
2023-06-30     2
2023-12-27     2
2023-12-28     5
2023-12-29     4
Length: 73, dtype: int64

In [27]:
df_stripe_remainder.to_csv(f'data/missing_osc_stripe_transactions.csv')

In [28]:
# count df_stripe_remainder by 'to (metadata)'
df_stripe_remainder.groupby(['to (metadata)']).size()

to (metadata)
https://opencollective.com/advanced-bim-frontend-developer-course      1
https://opencollective.com/curl                                       37
https://opencollective.com/hledger                                     1
https://opencollective.com/manjaro-us                                 10
https://opencollective.com/mvvmcross                                   1
https://opencollective.com/openaddresses                             324
dtype: int64

In [29]:
df_platform.head()

Unnamed: 0,id,uuid,kind,paymentMethod,payoutMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,payoutMethod.type,payoutMethod.name,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
0,a985a0d8-c94e-4173-9918-3246e3231021,a985a0d8-c94e-4173-9918-3246e3231021,HOST_FEE_SHARE,,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,DEBIT,Host Fee Share,2023-12-31T22:46:58.361Z,,False,False,,False,,Debit,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,-25,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,vedj9wro-z3a56dzb-zllq7blg-8x4m0ykn,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
1,377540a1-80e4-45dd-8cf8-44e5a8d04f4d,377540a1-80e4-45dd-8cf8-44e5a8d04f4d,HOST_FEE,,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,CREDIT,Host Fee,2023-12-31T22:46:58.361Z,,False,False,,False,,Credit,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914,Logseq,logseq,False,COLLECTIVE,Collective,vrgbk357-l4x96ey9-yl4pomew-a0jdyzn8,True,True,True,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
2,d8b98e5b-60a2-4bb0-8e66-a0528ecaaf7c,d8b98e5b-60a2-4bb0-8e66-a0528ecaaf7c,HOST_FEE,,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,DEBIT,Host Fee,2023-12-31T22:46:58.352Z,,False,False,,False,,Debit,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,0,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,a47byg9n-xozdp8n7-nlmpmjlv-03rek5w8,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,
3,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,20e9111c-db7b-4002-8dc1-1c2538ffd6b2,CONTRIBUTION,,,,00e1e29f-3348-4ebd-9598-c080adc4cbaf,CREDIT,Monthly financial contribution to Logseq (Back...,2023-12-31T22:46:58.403Z,ch_3OTXgHBYycQg1OMf0IfKN8Sw,False,False,,False,,Credit,USD,500,Amount,USD,500,Amount,USD,448,Amount,USD,448,Amount,USD,-52,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,n4gx0bro-v5m96nkd-3ay6d8lk-3ey7jzwa,744511,Sanket Sharma,sanket-sharma,False,INDIVIDUAL,Individual,8k03reyd-5agmq5r7-rlyplbwo-z7j4nxv9,True,True,True,TransactionPermissions,,STRIPE,,CREDITCARD,,,,,,,,,,,,,,,,,,
4,aafd8364-3df8-4d34-b751-55b048be7332,aafd8364-3df8-4d34-b751-55b048be7332,HOST_FEE_SHARE,,,,9931763a-79b1-4141-923f-216db174f56e,DEBIT,Host Fee Share,2023-12-31T22:29:44.108Z,,False,False,,False,,Debit,USD,-250,Amount,USD,-250,Amount,USD,-250,Amount,USD,-250,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,mywxoz34-09rl6krl-r7bqvenb-dj7gk85a,False,True,False,TransactionPermissions,,,,,,,,,,,,,,,,,,,,,,


# Reconciling Wise

In [38]:
df_wise_reconciliation = df_wise.copy()
print(df_wise_reconciliation.info())
df_wise_reconciliation.head()

<class 'pandas.core.frame.DataFrame'>
Index: 3463 entries, 0 to 0
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     3463 non-null   int64  
 1   user                   3463 non-null   int64  
 2   targetAccount          3463 non-null   int64  
 3   sourceAccount          3427 non-null   object 
 4   quote                  0 non-null      object 
 5   quoteUuid              3463 non-null   object 
 6   status                 3463 non-null   object 
 7   reference              3463 non-null   object 
 8   rate                   3463 non-null   float64
 9   created                3463 non-null   object 
 10  business               3463 non-null   int64  
 11  transferRequest        0 non-null      object 
 12  details.reference      3463 non-null   object 
 13  hasActiveIssues        3463 non-null   bool   
 14  sourceCurrency         3463 non-null   object 
 15  sourceValue 

Unnamed: 0,id,user,targetAccount,sourceAccount,quote,quoteUuid,status,reference,rate,created,business,transferRequest,details.reference,hasActiveIssues,sourceCurrency,sourceValue,targetCurrency,targetValue,customerTransactionId
0,575093811,6421549,298725929,59807023,,be48f661-368c-430b-82f9-94a101390e59,outgoing_payment_sent,,1.0,2023-01-02 00:07:08,4556280,,,False,GBP,46.86,GBP,46.86,72d112b5-775a-416e-a0d0-df6219c9d708
0,575919595,6421549,219325936,59807023,,21416ea6-87fd-4bbd-8a64-2082c6894abc,outgoing_payment_sent,,1.0,2023-01-02 23:47:59,4556280,,,False,USD,1550.0,USD,1550.0,9e7ca373-3eb6-45a2-9403-9e9d9f786410
0,575919769,6421549,333432475,59807023,,5f8c9bc9-0e3e-4748-9ef1-945507d73bb9,outgoing_payment_sent,,0.93705,2023-01-02 23:48:25,4556280,,,False,USD,71.79,EUR,67.27,f4d246b7-5c51-4430-9698-54e8750fd3c9
0,575919989,6421549,333432571,59807023,,778efd01-6d36-4b01-994a-253d2c064655,outgoing_payment_sent,,0.9372,2023-01-02 23:49:02,4556280,,,False,USD,500.0,EUR,468.6,b2abdbb0-2f61-4dc4-bad5-bd04cd0cdaa6
0,575920084,6421549,266354361,59807023,,59ff5aba-aa30-4fed-ad78-4160aca5dc60,outgoing_payment_sent,,1.0,2023-01-02 23:49:16,4556280,,,False,USD,265.0,USD,265.0,1841e526-f886-42d4-9cd4-c1afc5d017e9


In [51]:
df_platform_wise = df_platform[(df_platform['kind'] == 'EXPENSE') & (df_platform['payoutMethod.type'] == 'BANK_ACCOUNT')]
print(df_platform_wise.info())
df_platform_wise.head()

<class 'pandas.core.frame.DataFrame'>
Index: 3483 entries, 3986 to 819753
Data columns (total 73 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   id                                    3483 non-null   object 
 1   uuid                                  3483 non-null   object 
 2   kind                                  3483 non-null   object 
 3   paymentMethod                         0 non-null      float64
 4   payoutMethod                          0 non-null      float64
 5   giftCardEmitterAccount                0 non-null      float64
 6   group                                 3483 non-null   object 
 7   type                                  3483 non-null   object 
 8   description                           3483 non-null   object 
 9   createdAt                             3483 non-null   object 
 10  merchantId                            3431 non-null   object 
 11  isRefunded       

Unnamed: 0,id,uuid,kind,paymentMethod,payoutMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,expense,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,refundTransaction.id,payoutMethod.type,payoutMethod.name,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
3986,b3978b93-6791-474d-91e8-2ee20a19caa7,b3978b93-6791-474d-91e8-2ee20a19caa7,EXPENSE,,,,e344e7cb-9e21-4368-89ac-86c7af1ca89c,DEBIT,Repo and module support,2023-12-28T17:09:36.200Z,914831281,False,False,,False,,Debit,USD,-12000,Amount,USD,-12000,Amount,USD,-12039,Amount,USD,-12039,Amount,USD,-39,Amount,dgm9bnk8-0437xqr3-kw5qvzeo-ljdayw5r,46356.0,dbatools,dbatools,False,COLLECTIVE,Collective,rmvrwng4-kj03dpb3-b8l6z57o-yl9e8xba,46358,Shawn Melton,wsmelton,False,INDIVIDUAL,Individual,8k03reyd-5agmq5rz-dyyplbwo-z7j4nxv9,False,False,False,TransactionPermissions,,,,,vjrkx5lm-nv904qjz-eanp8bwa-7zdygoe3,INVOICE,Repo and module support,,[],rmvrwng4-kj03dpb3-b8l6z57o-yl9e8xba,Shawn Melton,wsmelton,INDIVIDUAL,Individual,Expense,,BANK_ACCOUNT,,,,,
3987,89ac51ae-2ff1-4786-88a3-052deedd71fe,89ac51ae-2ff1-4786-88a3-052deedd71fe,EXPENSE,,,,101aef0d-4c40-4924-95b6-48b4ed506103,DEBIT,Developer gift,2023-12-28T17:08:55.206Z,914901258,False,False,,False,,Debit,USD,-221386,Amount,USD,-221386,Amount,USD,-222281,Amount,USD,-222281,Amount,USD,-895,Amount,vjrkx5lm-nv904qjl-9lz68bwa-7zdygoe3,30588.0,Stellarium,stellarium,False,COLLECTIVE,Collective,53kzxy4v-07wlr6mk-rk7qmj9n-o8agdbe5,30590,Georg Zotti,georg-zotti,False,INDIVIDUAL,Individual,vrgbk357-l4x96eyv-bj4pomew-a0jdyzn8,False,False,False,TransactionPermissions,,,,,5ax8emk7-lnw54q7g-mgdqgyvj-0o93zdrb,INVOICE,Developer gift,,[],53kzxy4v-07wlr6mk-rk7qmj9n-o8agdbe5,Georg Zotti,georg-zotti,INDIVIDUAL,Individual,Expense,,BANK_ACCOUNT,,,,,
3994,3985e9cb-6e7e-4b56-806e-c8623c909db9,3985e9cb-6e7e-4b56-806e-c8623c909db9,EXPENSE,,,,4ff8a423-6ac4-47ad-bf6b-d97875c4c01c,DEBIT,Yii 3 and yiiframework.com development,2023-12-28T16:35:15.005Z,914833331,False,False,,False,,Debit,USD,-146400,Amount,USD,-146400,Amount,USD,-146439,Amount,USD,-146439,Amount,USD,-39,Amount,eeng0kzd-yvor4pz8-ggbpbma8-37xlw95j,30649.0,Yii Software,yiisoft,False,COLLECTIVE,Collective,rxg0j35l-zkwm6v5x-layqvoe9-8n47daby,451148,Alexey Rogachev,alexey-rogachev,False,INDIVIDUAL,Individual,eng0kzdy-vor4pzev-5lgpbma8-37xlw95j,False,False,False,TransactionPermissions,,,,,vjrkx5lm-nv904qjz-e7np8bwa-7zdygoe3,INVOICE,Yii 3 and yiiframework.com development,,[],rxg0j35l-zkwm6v5x-layqvoe9-8n47daby,Alexey Rogachev,alexey-rogachev,INDIVIDUAL,Individual,Expense,,BANK_ACCOUNT,,,,,
4008,1ea98914-c16d-48b9-ba66-80cd18df2d4c,1ea98914-c16d-48b9-ba66-80cd18df2d4c,EXPENSE,,,,c38b0530-a4aa-4799-ac6d-8cc68ca11cab,DEBIT,$500 bounty claim for https://github.com/jhips...,2023-12-28T16:06:53.760Z,914834934,False,False,,False,,Debit,USD,-50001,Amount,USD,-50001,Amount,USD,-50228,Amount,USD,-50228,Amount,USD,-227,Amount,ov349mrw-gz75lpyy-4npa08dj-eybknoxd,10350.0,JHipster,generator-jhipster,False,COLLECTIVE,Collective,53kzxy4v-07wlr6mv-44b6mj9n-o8agdbe5,20442,Aurélien Mino,murdos,False,INDIVIDUAL,Individual,eng0kzdy-vor4pzev-50gpbma8-37xlw95j,False,False,False,TransactionPermissions,,,,,re0adkjr-v8xwm69v-9lbq7z5n-4l93bgoy,INVOICE,$500 bounty claim for https://github.com/jhips...,,[],53kzxy4v-07wlr6mv-44b6mj9n-o8agdbe5,Aurélien Mino,murdos,INDIVIDUAL,Individual,Expense,,BANK_ACCOUNT,,,,,
4009,c8aa3c08-c4b9-45a8-bc9c-e0bc9e8c6121,c8aa3c08-c4b9-45a8-bc9c-e0bc9e8c6121,EXPENSE,,,,691222c4-eaa6-42ba-84a8-e1c0d094c299,DEBIT,Community Management and Support in December 2...,2023-12-28T16:06:24.099Z,914834378,False,False,,False,,Debit,USD,-443976,Amount,USD,-443976,Amount,USD,-445739,Amount,USD,-445739,Amount,USD,-1763,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,ywz9j4av-god8pgmk-zk9pmr35-nxklb0e7,466223,Ramses Oudt,rroudt,False,INDIVIDUAL,Individual,ywz9j4av-god8pg8y-4a36mr35-nxklb0e7,False,False,False,TransactionPermissions,,,,,zzaxon79-3jy8gplo-57jpbrkd-emwl5v04,INVOICE,Community Management and Support in December 2...,Chamber of Commerce # the Netherlands: 8084579...,[],ywz9j4av-god8pgmk-zk9pmr35-nxklb0e7,Ramses Oudt,rroudt,INDIVIDUAL,Individual,Expense,,BANK_ACCOUNT,,,,,


In [50]:
merged_df = pd.merge(df_wise_reconciliation, df_platform_wise[['merchantId', 'id']].dropna(subset=['merchantId']).astype({'merchantId': 'int64'}), left_on='id', right_on='merchantId', how='left', suffixes=('', '_platform_transaction'))

# Drop the extra 'merchantId' column from the merge
# merged_df.drop(columns='merchantId', inplace=True)

# Assign the resulting dataframe back to df_stripe
df_wise_reconciliation = merged_df

print(f"Wise transactions that have a corresponding platform transaction: {len(df_wise_reconciliation[df_wise_reconciliation['id_platform_transaction'].notnull()])}")
print(f"Wise transactions without a corresponding platform transaction: {len(df_wise_reconciliation[df_wise_reconciliation['id_platform_transaction'].isnull()])}")
print(f"Percentage of Wise transactions that have a corresponding platform transaction: {round(len(df_wise_reconciliation[df_wise_reconciliation['id_platform_transaction'].notnull()])/len(df_wise_reconciliation)*100, 2)}%")

df_wise_reconciliation.head()

Wise transactions that have a corresponding platform transaction: 3403
Wise transactions without a corresponding platform transaction: 60
Percentage of Wise transactions that have a corresponding platform transaction: 98.27%


Unnamed: 0,id,user,targetAccount,sourceAccount,quote,quoteUuid,status,reference,rate,created,business,transferRequest,details.reference,hasActiveIssues,sourceCurrency,sourceValue,targetCurrency,targetValue,customerTransactionId,merchantId,id_platform_transaction
0,575093811,6421549,298725929,59807023,,be48f661-368c-430b-82f9-94a101390e59,outgoing_payment_sent,,1.0,2023-01-02 00:07:08,4556280,,,False,GBP,46.86,GBP,46.86,72d112b5-775a-416e-a0d0-df6219c9d708,575093811.0,94dbd855-92f9-41cf-a47d-2d296aae4eef
1,575919595,6421549,219325936,59807023,,21416ea6-87fd-4bbd-8a64-2082c6894abc,outgoing_payment_sent,,1.0,2023-01-02 23:47:59,4556280,,,False,USD,1550.0,USD,1550.0,9e7ca373-3eb6-45a2-9403-9e9d9f786410,575919595.0,aa44fee3-916a-42eb-915d-a28ef1ebc67a
2,575919769,6421549,333432475,59807023,,5f8c9bc9-0e3e-4748-9ef1-945507d73bb9,outgoing_payment_sent,,0.93705,2023-01-02 23:48:25,4556280,,,False,USD,71.79,EUR,67.27,f4d246b7-5c51-4430-9698-54e8750fd3c9,575919769.0,42d3544a-9157-4b98-a340-8b35b31f8378
3,575919989,6421549,333432571,59807023,,778efd01-6d36-4b01-994a-253d2c064655,outgoing_payment_sent,,0.9372,2023-01-02 23:49:02,4556280,,,False,USD,500.0,EUR,468.6,b2abdbb0-2f61-4dc4-bad5-bd04cd0cdaa6,575919989.0,0c4091c2-2072-4c49-a7c2-90ae8155744d
4,575920084,6421549,266354361,59807023,,59ff5aba-aa30-4fed-ad78-4160aca5dc60,outgoing_payment_sent,,1.0,2023-01-02 23:49:16,4556280,,,False,USD,265.0,USD,265.0,1841e526-f886-42d4-9cd4-c1afc5d017e9,575920084.0,77b9834e-3989-478a-82e9-540dbf4339cf


In [58]:
df_wise_remainder = df_wise_reconciliation[df_wise_reconciliation['id_platform_transaction'].isnull()]
df_wise_remainder.sample(10)

Unnamed: 0,id,user,targetAccount,sourceAccount,quote,quoteUuid,status,reference,rate,created,business,transferRequest,details.reference,hasActiveIssues,sourceCurrency,sourceValue,targetCurrency,targetValue,customerTransactionId,merchantId,id_platform_transaction
228,596046428,29739303,59807023,,,719ddd54-d03f-4919-b677-73d8b97f5eb7,outgoing_payment_sent,,1.0,2023-01-29 23:41:09,4556280,,,False,USD,240000.0,USD,240000.0,719ddd54-d03f-4919-b677-73d8b97f5eb7,,
1022,665203574,6421549,373146940,59807023.0,,eef65bf3-68cc-40b8-84bb-61475a161a95,funds_refunded,,1.0,2023-04-20 15:25:51,4556280,,,False,USD,2500.0,USD,2500.0,90e751f2-849b-4ab4-833f-f1488cd58065,,
718,639795561,42089850,352322636,59807023.0,,126efc32-9929-4dd4-8e6f-49ce7ec855da,outgoing_payment_sent,Correction for expense number 124655,1.0,2023-03-22 18:41:30,4556280,,Correction for expense number 124655,False,USD,36.71,USD,36.71,126efc32-9929-4dd4-8e6f-49ce7ec855da,,
1633,720250675,42089850,59807023,,,451c2e7d-60b0-421b-a127-02447d66a7c6,outgoing_payment_sent,,1.0,2023-06-20 16:23:01,4556280,,,False,USD,200000.0,USD,200000.0,451c2e7d-60b0-421b-a127-02447d66a7c6,,
1030,668545804,6421549,59807023,,,1c66d9c2-d8c3-453b-a5b5-201060b37141,outgoing_payment_sent,AutoCredit,1.0,2023-04-25 06:49:17,4556280,,AutoCredit,False,USD,114996.48,USD,114996.48,d46dfe3d-9f2c-477a-9f76-bf5782a1c1d1,,
3087,875562410,42089850,59807023,,,d034f636-5909-4fec-b16b-723d48282f98,outgoing_payment_sent,,1.0,2023-11-22 17:47:10,4556280,,,False,USD,200000.0,USD,200000.0,d034f636-5909-4fec-b16b-723d48282f98,,
2310,790965132,6421549,392402279,59807023.0,,ad4f47e7-ab2e-4e0a-a6e4-43b1ce2ccedd,funds_refunded,OC,4.9509,2023-09-01 11:32:19,4556280,,OC,False,USD,526.41,BRL,2606.21,05b31803-b128-41d3-a6fb-6ff33820c504,,
2714,839365943,42089850,59807023,,,e2a4bafa-9ab0-4e44-9ec6-f3ccc6e7d234,outgoing_payment_sent,,1.0,2023-10-18 19:12:25,4556280,,,False,USD,200000.0,USD,200000.0,e2a4bafa-9ab0-4e44-9ec6-f3ccc6e7d234,,
1650,723244668,6421549,393787757,59807023.0,,e03dc849-c3a0-4d39-97af-00d7f1b58ce5,funds_refunded,OC,4.6765,2023-06-23 16:12:55,4556280,,OC,False,USD,300.0,MYR,1402.95,803a01c9-3f9f-4005-a32a-1a9a8486c591,,
1813,733974762,42089850,59807023,,,23d83bca-5a47-4238-ad0f-30c8b73948ca,outgoing_payment_sent,,1.0,2023-07-05 02:21:21,4556280,,,False,USD,140000.0,USD,140000.0,23d83bca-5a47-4238-ad0f-30c8b73948ca,,


# Reconciling Bank

# Reconciling Paypal