In [55]:
import pandas as pd
import glob
import requests
import json
import time

# Project notes

- Identify transactions that are missing from OC that are on any of the accounts (meaning banks, stripe, wise, paypal)
- Identify any transactions on the platform that are missing from the accounts
- Prep a balance/p&l with minimal wrangling

Project steps:

- Clarify project intention and goal (fill in project assessment in Make OC)
- Map out data 
- Write scripts 
- Analyze and prepare accounts

# Load config

In [56]:
# host variables
host = 'opensource'
dateFrom = '2022-12-31T22:59:59.999Z'
dateTo = '2023-01-05T22:59:59.999Z'
year = '2023'

In [57]:
# load API tokens from .env file
import os
from dotenv import load_dotenv
load_dotenv()
octoken = os.getenv('OCTOKEN')
wisetoken = os.getenv('WISETOKEN')

# set the profile id of the target Wise profile
wiseprofile = os.getenv('WISEPROFILE')

In [58]:
# Reload the data or not - set these to false after the first run to avoid reloading
reloadOC = True
reloadWise = False
reloadStripe = False

# Get data

TODO: Query below returns null for all merchandId. Need to get merchantId for all transactions where available to reconcile with Stripe, Wise and Paypal.

In [59]:
# load transactions from open collective or from local file

def fetch_transactions():
    url = f"https://opencollective.com/api/graphql/v2?personalToken={octoken}"

    # Your GraphQL query
    query = """
    query TransactionsTable(
        $hostAccount: AccountReferenceInput,
        $limit: Int!,
        $offset: Int!,
        $type: TransactionType,
        $paymentMethodType: [PaymentMethodType],
        $dateFrom: DateTime,
        $dateTo: DateTime,
        $kind: [TransactionKind],
        $includeIncognitoTransactions: Boolean,
        $includeGiftCardTransactions: Boolean,
        $includeChildrenTransactions: Boolean,
        $virtualCard: [VirtualCardReferenceInput],
        $orderBy: ChronologicalOrderInput,
        $group: String,
        $includeHost: Boolean,
        $expense: ExpenseReferenceInput,
        $order: OrderReferenceInput
      ) {
        transactions(
          host: $hostAccount
          limit: $limit
          offset: $offset
          type: $type
          paymentMethodType: $paymentMethodType
          dateFrom: $dateFrom
          dateTo: $dateTo
          kind: $kind
          includeIncognitoTransactions: $includeIncognitoTransactions
          includeGiftCardTransactions: $includeGiftCardTransactions
          includeChildrenTransactions: $includeChildrenTransactions
          includeDebts: true
          virtualCard: $virtualCard
          orderBy: $orderBy
          group: $group
          includeHost: $includeHost
          expense: $expense
          order: $order
        ) {
          ...TransactionsTableQueryCollectionFragment
          __typename
        }
      }
      
      fragment TransactionsTableQueryCollectionFragment on TransactionCollection {
        totalCount
        offset
        limit
        nodes {
          id
          uuid
          kind
          amount {
            currency
            valueInCents
            __typename
          }
          amountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          netAmount {
            currency
            valueInCents
            __typename
          }
          netAmountInHostCurrency {
            currency
            valueInCents
            __typename
          }
          paymentProcessorFee {
            currency
            valueInCents
            __typename
          }
          paymentMethod {
            name
            service
            sourcePaymentMethod {
              id
              name
              service
              __typename
            }
            type
          }
          giftCardEmitterAccount {  
            id
            name
            slug
            __typename
          }
          group
          type
          description
          createdAt
          merchantId
          isRefunded
          isRefund
          refundTransaction {
            id
          }
          isOrderRejected
          account {
            ... on AccountWithParent {
              parent {
                id
                slug
                name
              }
            }
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          oppositeAccount {
            id
            legacyId
            name
            slug
            isIncognito
            type
            __typename
          }
          expense {
            id
            type
            description
            invoiceInfo 
            tags
            payee {
              id
              name
              slug
              type
              __typename
            }
            __typename
          }
          permissions {
            id
            canRefund
            canDownloadInvoice
            canReject
            __typename
          }
          __typename
        }
        __typename
      }
    """

    headers = {
        "Authorization": f"Bearer {octoken}",
        "Content-Type": "application/json"
    }

    # Initial variables setup
    variables = {
        "hostAccount": {"slug": host},
        "includeIncognitoTransactions": True,
        "includeChildrenTransactions": True,
        "limit": 10000,  # Adjust if necessary but keep a sensible number to avoid server strain
        "offset": 0,  # Will be adjusted for each subsequent request
        "dateFrom": dateFrom,
        "dateTo": dateTo,
        "orderBy": {"field": "CREATED_AT", "direction": "DESC"},
        "includeHost": True
    }

    all_transactions = []  # To hold all transactions
    while True:
        # Make the HTTP request
        response = requests.post(
            url, json={'query': query, 'variables': variables}, headers=headers)
        json_data = response.json()

        # Error handling: Ensure to include proper error handling here
        if 'errors' in json_data:
            print(json.dumps(json_data['errors']))
            break

        # Extract data
        transactions = json_data['data']['transactions']['nodes']
        all_transactions.extend(transactions)

        # Pagination: Update offset
        variables['offset'] += variables['limit']

        # Check if all transactions are fetched
        if len(all_transactions) >= json_data['data']['transactions']['totalCount']:
            break
        
        # print progress
        print(f'Fetched {len(all_transactions)} transactions')
        # sleep for 10 seconds to avoid server strain
        time.sleep(1)

    return all_transactions

# if account has property parent, replace account with parent
def replace_account_with_parent(transaction):
  if 'parent' in transaction['account']:
    transaction['account'] = transaction['account']['parent']
  return transaction

# apply the function to all transactions
def post_process_transactions(all_transactions):
  return list(map(replace_account_with_parent, all_transactions))

if reloadOC:
  all_transactions = fetch_transactions()
  # dump all transactions to a json file
  post_process_transactions(all_transactions)
  with open(f'data/{host}/{year}/{host}_{year}_all_platform_transactions.json', 'w') as f:
      json.dump(all_transactions, f, indent=2)
  # convert the json file to a dataframe
  df_platform_transactions = pd.json_normalize(all_transactions)
  # save dataframe as pickle
  df_platform_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')
else:
  df_platform_transactions = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')


Fetched 10000 transactions
Fetched 20000 transactions
Fetched 30000 transactions
Fetched 40000 transactions


In [60]:
df_platform_transactions.head(20)

Unnamed: 0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,refundTransaction.id,expense,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
0,5cd2b637-1bb7-478c-8a41-c4ddfe334a65,5cd2b637-1bb7-478c-8a41-c4ddfe334a65,EXPENSE,,,f7aa0ca7-da7f-4b29-9aa0-745bd0385086,DEBIT,Gusto 2022 expenses,2023-01-05T22:52:47.280Z,,False,False,,False,Debit,USD,-27300,Amount,USD,-27300,Amount,USD,-27300,Amount,USD,-27300,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,e0adkjrv-8xwm6974-97lq7z5n-4l93bgoy,593894,Gusto,gusto1,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq37-75dp0ej5-lavnodgx,RECEIPT,Gusto 2022 expenses,,[expenses],e0adkjrv-8xwm6974-97lq7z5n-4l93bgoy,Gusto,gusto1,ORGANIZATION,Organization,Expense,a47byg9n-xozdp8wm-jwbpmjlv-03rek5w8,False,True,False,TransactionPermissions,,,,,,,,,,
1,9fbfa17b-527d-415c-89d3-191c19b91f1f,9fbfa17b-527d-415c-89d3-191c19b91f1f,EXPENSE,,,654a0ed4-b01c-40c0-a641-6267c8b08fd1,CREDIT,"Refund of ""Gusto 2022 expenses""",2023-01-05T22:50:01.188Z,,False,True,,False,Credit,USD,27300,Amount,USD,27300,Amount,USD,27300,Amount,USD,27300,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,vrgbk357-l4x96ea0-v5apomew-a0jdyzn8,488377,Lauren Gardner,lauren-gardner,False,INDIVIDUAL,Individual,lk9mbw7y-48r3zq37-75dp0ej5-lavnodgx,RECEIPT,Gusto 2022 expenses,,[expenses],e0adkjrv-8xwm6974-97lq7z5n-4l93bgoy,Gusto,gusto1,ORGANIZATION,Organization,Expense,mlo94zn7-x08dpomd-8m36ewga-3vjbrky5,False,True,False,TransactionPermissions,5b7055ac-51e3-477c-a0be-bfd99bcde510,,,,,,,,,
2,c41b3327-7937-4895-97d9-4b455260ce1c,c41b3327-7937-4895-97d9-4b455260ce1c,EXPENSE,,,8ad6db44-04e3-41f0-bc52-8141184a65b8,DEBIT,Swiper maintenance & support (December): 9.0.0...,2023-01-05T22:45:13.395Z,578584322,False,False,,False,Debit,USD,-700000,Amount,USD,-700000,Amount,USD,-700039,Amount,USD,-700039,Amount,USD,-39,Amount,8a47byg9-nxozdp8l-zlr6mjlv-03rek5w8,196752.0,Swiper,swiper,False,COLLECTIVE,Collective,5ax8emk7-lnw54q75-05dpgyvj-0o93zdrb,13867,Vladimir Kharlampidi,vladimirkharlampidi,False,INDIVIDUAL,Individual,bvrgbk35-7l4x96e9-0waqomew-a0jdyzn8,INVOICE,Swiper maintenance & support (December): 9.0.0...,,[],5ax8emk7-lnw54q75-05dpgyvj-0o93zdrb,Vladimir Kharlampidi,vladimirkharlampidi,INDIVIDUAL,Individual,Expense,e0adkjrv-8xwm69wv-mmbp7z5n-4l93bgoy,False,False,False,TransactionPermissions,,,,,,,,,,
3,6d820938-94f3-45bb-86d5-388dc4784534,6d820938-94f3-45bb-86d5-388dc4784534,EXPENSE,,,6850d3aa-2d28-4a1f-b476-ac31707ce314,DEBIT,Framework7 maintenance & support (December): 7...,2023-01-05T22:45:13.418Z,578584188,False,False,,False,Debit,USD,-280000,Amount,USD,-280000,Amount,USD,-280039,Amount,USD,-280039,Amount,USD,-39,Amount,7mywxoz3-409rl6kg-wgnpvenb-dj7gk85a,13868.0,Framework7,framework7,False,COLLECTIVE,Collective,5ax8emk7-lnw54q75-05dpgyvj-0o93zdrb,13867,Vladimir Kharlampidi,vladimirkharlampidi,False,INDIVIDUAL,Individual,rvedj9wr-oz3a56db-d9gq7blg-8x4m0ykn,INVOICE,Framework7 maintenance & support (December): 7...,,[],5ax8emk7-lnw54q75-05dpgyvj-0o93zdrb,Vladimir Kharlampidi,vladimirkharlampidi,INDIVIDUAL,Individual,Expense,rxg0j35l-zkwm6vkm-3kbqvoe9-8n47daby,False,False,False,TransactionPermissions,,,,,,,,,,
4,c3207e07-e519-4204-94cd-f4cba48e8d99,c3207e07-e519-4204-94cd-f4cba48e8d99,HOST_FEE_SHARE,,,07ec6e68-f456-4e58-87b6-f5b7c350e003,DEBIT,Host Fee Share,2023-01-05T22:38:24.718Z,,False,False,,False,Debit,USD,-60,Amount,USD,-60,Amount,USD,-60,Amount,USD,-60,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,,,,,,,,,,,,vedj9wro-z3a56dd0-ooa67blg-8x4m0ykn,False,True,False,TransactionPermissions,,,,,,,,,,
5,602e5a40-5a3f-49a7-bdf9-f84e52dc91d8,602e5a40-5a3f-49a7-bdf9-f84e52dc91d8,HOST_FEE,,,07ec6e68-f456-4e58-87b6-f5b7c350e003,CREDIT,Host Fee,2023-01-05T22:38:24.718Z,,False,False,,False,Credit,USD,120,Amount,USD,120,Amount,USD,120,Amount,USD,120,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,5ax8emk7-lnw54q7z-5kxpgyvj-0o93zdrb,28419,Tusky,tusky,False,COLLECTIVE,Collective,,,,,,,,,,,,vrgbk357-l4x96e0r-88zqomew-a0jdyzn8,True,True,True,TransactionPermissions,,,,,,,,,,
6,c4ed53a1-9f9b-41c5-93e1-7dc3e4422c0e,c4ed53a1-9f9b-41c5-93e1-7dc3e4422c0e,HOST_FEE,,,07ec6e68-f456-4e58-87b6-f5b7c350e003,DEBIT,Host Fee,2023-01-05T22:38:24.710Z,,False,False,,False,Debit,USD,-120,Amount,USD,-120,Amount,USD,-120,Amount,USD,-120,Amount,USD,0,Amount,5ax8emk7-lnw54q7z-5kxpgyvj-0o93zdrb,28419.0,Tusky,tusky,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,,,,,,,,,,,,a47byg9n-xozdp8wm-jjbpmjlv-03rek5w8,False,True,False,TransactionPermissions,,,,,,,,,,
7,edfefb0a-6daf-4686-a861-c4cb4b6a8891,edfefb0a-6daf-4686-a861-c4cb4b6a8891,CONTRIBUTION,,,07ec6e68-f456-4e58-87b6-f5b7c350e003,CREDIT,Yearly financial contribution to Tusky (A doll...,2023-01-05T22:38:24.774Z,ch_3MN24nBYycQg1OMf1ecefPjV,False,False,,False,Credit,USD,1200,Amount,USD,1200,Amount,USD,1123,Amount,USD,1123,Amount,USD,-77,Amount,5ax8emk7-lnw54q7z-5kxpgyvj-0o93zdrb,28419.0,Tusky,tusky,False,COLLECTIVE,Collective,jrkx5lmn-v904qjw0-x9np8bwa-7zdygoe3,593416,Simoto,simoto,False,INDIVIDUAL,Individual,,,,,,,,,,,,8k03reyd-5agmq5wm-jjkqlbwo-z7j4nxv9,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
8,e020d3c6-d94f-4eeb-8a4b-1634b5935707,e020d3c6-d94f-4eeb-8a4b-1634b5935707,HOST_FEE_SHARE_DEBT,,,221ba575-4d1b-481c-8f81-3a0559f55f18,CREDIT,Host Fee Share owed to Open Collective,2023-01-05T22:27:04.118Z,,False,False,,False,Credit,USD,500,Amount,USD,500,Amount,USD,500,Amount,USD,500,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,,,,,,,,,,,,eng0kzdy-vor4pzkm-44bpbma8-37xlw95j,True,True,True,TransactionPermissions,,,,,,,,,,
9,3a570b48-ff96-45b9-a914-c5030acc79f9,3a570b48-ff96-45b9-a914-c5030acc79f9,HOST_FEE_SHARE,,,221ba575-4d1b-481c-8f81-3a0559f55f18,DEBIT,Host Fee Share,2023-01-05T22:27:04.118Z,,False,False,,False,Debit,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,,,,,,,,,,,,e0adkjrv-8xwm69wv-mabp7z5n-4l93bgoy,False,True,False,TransactionPermissions,,,,,,,,,,


In [61]:
# load transactions from wise

# Define the base URL for the Wise API
BASE_URL = 'https://api.transferwise.com/v1/transfers'

def fetch_all_transfers(profile_id=None, status=None, source_currency=None, 
                        target_currency=None, created_date_start=None, 
                        created_date_end=None, limit=100):
    headers = {
        'Authorization': f'Bearer {wisetoken}'
    }
    offset = 0
    all_transfers = []

    while True:
        # Prepare query parameters
        query_params = {'limit': limit, 'offset': offset}
        if profile_id:
            query_params['profile'] = profile_id
        if status:
            query_params['status'] = status
        if source_currency:
            query_params['sourceCurrency'] = source_currency
        if target_currency:
            query_params['targetCurrency'] = target_currency
        if created_date_start:
            query_params['createdDateStart'] = created_date_start
        if created_date_end:
            query_params['createdDateEnd'] = created_date_end

        # Make the GET request to the Wise API
        response = requests.get(BASE_URL, headers=headers, params=query_params)
        data = response.json()

        print(data)

        # Break if there are no more results
        if not data:
            break

        for transfer in data:
            all_transfers.append({
                'id': transfer['id'],
                'user': transfer['user'],
                'targetAccount': transfer['targetAccount'],
                'sourceAccount': transfer['sourceAccount'],
                'quote': transfer['quote'],
                'quoteUuid': transfer['quoteUuid'],
                'status': transfer['status'],
                'reference': transfer['reference'],
                'rate': transfer['rate'],
                'created': transfer['created'],
                'business': transfer['business'],
                'transferRequest': transfer['transferRequest'],
                'details.reference': transfer.get('details', {}).get('reference'),
                'hasActiveIssues': transfer['hasActiveIssues'],
                'sourceCurrency': transfer['sourceCurrency'],
                'sourceValue': transfer['sourceValue'],
                'targetCurrency': transfer['targetCurrency'],
                'targetValue': transfer['targetValue'],
                'customerTransactionId': transfer['customerTransactionId']
            })

        # Update the offset for the next iteration
        offset += limit

    return all_transfers

if reloadWise:
    transfers = fetch_all_transfers(profile_id=wiseprofile, created_date_start=dateFrom, created_date_end=dateTo)

    # create transfers dataframe
    df_wise_transactions = pd.DataFrame()

    for transfer in transfers:
        # add to dataframe with concat
        df_wise_transactions = pd.concat([df_wise_transactions, pd.DataFrame([transfer])])

    # save dataframe as pickle
    df_wise_transactions.to_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

In [62]:
# load transactions from stripe export
if reloadStripe:
    df_stripe_in = pd.read_csv(f'data/{host}/{year}/{host}_{year}_stripe.csv')
    # ensure only relevant columns
    df_stripe_in = df_stripe_in[[
    'id',
    'Description',
    'Seller Message',
    'Amount',
    'Amount Refunded',
    'Currency',
    'Converted Amount',
    'Converted Amount Refunded',
    'Fee',
    'Taxes On Fee',
    'Converted Currency',
    'Mode',
    'Status',
    'Statement Descriptor',
    'Customer ID',
    'Customer Description',
    'Captured',
    'Card ID',
    'PaymentIntent ID',
    'Application Fee',
    'Application ID',
    'from (metadata)',
    'to (metadata)'
    ]]
    df_stripe_in.to_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')
    


In [63]:
# load open collective data
df_platform = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_all_platform_transactions.pkl')

# load stripe data
df_stripe = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_stripe_transactions.pkl')

# load wise data
df_wise = pd.read_pickle(f'data/{host}/{year}/df_{host}_{year}_wise_transactions.pkl')

# load bank data
dir = f'data/{host}/{year}/oscbank_1'
df_bank_1 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_1 = df_bank_1.dropna(axis=1, how='all')

dir = f'data/{host}/{year}/oscbank_2'
df_bank_2 = pd.concat([pd.read_csv(f) for f in glob.glob(f'{dir}/*.csv')], ignore_index=True)
df_bank_2 = df_bank_2.dropna(axis=1, how='all')

# Post process data

In [64]:
# post process

# remove failed and pending transactions from Stripe
df_stripe = df_stripe[df_stripe['Status'] != 'Failed']
df_stripe = df_stripe[df_stripe['Status'] != 'Pending']

# remove cancelled from Wise
df_wise = df_wise[df_wise['status'] != 'cancelled']

# Inspect transactions dataframe

In [65]:
# show all columns
pd.set_option('display.max_columns', None)
df_platform.sample(10)

Unnamed: 0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,refundTransaction.id,expense,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
31273,b86d6804-dd81-46cf-bf06-fb321a2cec11,b86d6804-dd81-46cf-bf06-fb321a2cec11,CONTRIBUTION,,,b7b0e213-8beb-41c0-8cd8-48ecd895c3cb,CREDIT,Monthly financial contribution to Svelte (backer),2023-01-01T07:08:21.253Z,ch_3MLLeYBYycQg1OMf0IpVfK5B,False,False,,False,Credit,USD,500,Amount,USD,500,Amount,USD,450,Amount,USD,450,Amount,USD,-50,Amount,0n4gx0br-ov5m96nj-arwpd8lk-3ey7jzwa,159983.0,Svelte,svelte,False,COLLECTIVE,Collective,7mywxoz3-409rl6kd-lkwpvenb-dj7gk85a,101592,Phillip Malboeuf,phillip-malboeuf,False,INDIVIDUAL,Individual,,,,,,,,,,,,jrkx5lmn-v904qj49-rbzq8bwa-7zdygoe3,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
11277,70881eaa-916a-41ce-95bc-6c03e9e56cec,70881eaa-916a-41ce-95bc-6c03e9e56cec,CONTRIBUTION,,,682792f4-c505-415d-a9ff-8bcfab76ef85,CREDIT,Monthly financial contribution to date-fns (Br...,2023-01-02T05:08:36.291Z,ch_3MLgGDBYycQg1OMf1szyiJrd,False,False,,False,Credit,USD,5000,Amount,USD,5000,Amount,USD,4775,Amount,USD,4775,Amount,USD,-225,Amount,3vjrkx5l-mnv904qj-0k68bwa7-zdygoe3d,440.0,date-fns,date-fns,False,COLLECTIVE,Collective,re0adkjr-v8xwm695-vk4p7z5n-4l93bgoy,48546,One Beyond,dcsl-software,False,ORGANIZATION,Organization,,,,,,,,,,,,a47byg9n-xozdp8wa-k39pmjlv-03rek5w8,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
20794,594d754d-f8c3-49e1-abb0-3237ac4e6e71,594d754d-f8c3-49e1-abb0-3237ac4e6e71,CONTRIBUTION,,,814867d0-2af3-4791-8a8e-9419d2224d26,CREDIT,Monthly financial contribution to Open Web Doc...,2023-01-01T18:15:41.460Z,ch_3MLW4MBYycQg1OMf1neP7uMS,False,False,,False,Credit,USD,500,Amount,USD,500,Amount,USD,455,Amount,USD,455,Amount,USD,-45,Amount,vjrkx5lm-nv904qjz-aamp8bwa-7zdygoe3,173688.0,Open Web Docs,open-web-docs,False,COLLECTIVE,Collective,7mywxoz3-409rl6kg-l4bpvenb-dj7gk85a,18820,Mark Reeves,heymarkreeves,False,INDIVIDUAL,Individual,,,,,,,,,,,,ywz9j4av-god8pg5y-9g36mr35-nxklb0e7,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
14708,824231bd-b425-4560-918a-3bf6f57bbcf2,824231bd-b425-4560-918a-3bf6f57bbcf2,HOST_FEE,,,1198acdc-b4bb-4825-a5cc-6acf90dd582a,CREDIT,Host Fee,2023-01-02T01:07:43.200Z,,False,False,,False,Credit,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,50,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,rvedj9wr-oz3a56dx-ngp7blg8-x4m0ykny,10106,Laradock,laradock,False,COLLECTIVE,Collective,,,,,,,,,,,,rxg0j35l-zkwm6vk8-agyqvoe9-8n47daby,True,True,True,TransactionPermissions,,,,,,,,,,
29867,c7226e62-0e5c-4c3d-a9fc-c2c315fcafcc,c7226e62-0e5c-4c3d-a9fc-c2c315fcafcc,HOST_FEE,,,ea52578e-1144-4399-9777-6501495e8b69,CREDIT,Host Fee,2023-01-01T09:04:04.792Z,,False,False,,False,Credit,USD,5000,Amount,USD,5000,Amount,USD,5000,Amount,USD,5000,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,8a47byg9-nxozdp8m-xx9qmjlv-03rek5w8,167104,Ruffle,ruffle,False,COLLECTIVE,Collective,,,,,,,,,,,,3k0exgzn-m8yj64wd-4j8q5wao-9r7b4dlv,True,True,True,TransactionPermissions,,,,,,,,,,
31973,857660cc-8942-40c2-93c7-38574b57007f,857660cc-8942-40c2-93c7-38574b57007f,CONTRIBUTION,,,cf4d1df9-6d2b-4b98-a50c-db60fcc73fab,CREDIT,Monthly financial contribution to Dark Reader ...,2023-01-01T06:12:44.095Z,ch_3MLKmjBYycQg1OMf0s3XeoDZ,False,False,,False,Credit,USD,200,Amount,USD,200,Amount,USD,164,Amount,USD,164,Amount,USD,-36,Amount,nmlo94zn-7x08dpox-7xpewga3-vjbrky5m,13775.0,Dark Reader,darkreader,False,COLLECTIVE,Collective,7ywz9j4a-vgod8pgb-7836mr35-nxklb0e7,166323,incognito,incognito-2252b359,True,INDIVIDUAL,Individual,,,,,,,,,,,,rxg0j35l-zkwm6vka-maxqvoe9-8n47daby,True,True,True,TransactionPermissions,,,,,,,,,,
7601,79b8cd77-0f32-4d3d-b147-994218ad25c8,79b8cd77-0f32-4d3d-b147-994218ad25c8,HOST_FEE,,,5bd78141-4188-4362-84c3-abb199b88358,DEBIT,Host Fee,2023-01-02T10:08:34.382Z,,False,False,,False,Debit,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,-50,Amount,USD,0,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,,,,,,,,,,,,3z8arxve-ymko605j-zx5pgl5n-bj9w704d,False,True,False,TransactionPermissions,,,,,,,,,,
39008,930279e2-ce12-4696-a96c-83e926ab74f9,930279e2-ce12-4696-a96c-83e926ab74f9,HOST_FEE_SHARE,,,8aceea64-da18-4a7a-a879-89fa9f6f7784,DEBIT,Host Fee Share,2023-01-01T01:06:21.511Z,,False,False,,False,Debit,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,0,Amount,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004.0,Open Source Collective,opensource,False,ORGANIZATION,Organization,lk9mbw7y-48r3zq3w-g860ej5l-avnodgx7,8686,Open Collective,opencollective,False,ORGANIZATION,Organization,,,,,,,,,,,,vrgbk357-l4x96e0g-0r5qomew-a0jdyzn8,False,True,False,TransactionPermissions,,,,,,,,,,
18318,def57b2d-d580-4f88-8ade-b98483cf8658,def57b2d-d580-4f88-8ade-b98483cf8658,CONTRIBUTION,,,3db07957-00b3-4cb1-a021-a931bc802493,CREDIT,Monthly financial contribution to StashApp (ba...,2023-01-01T21:05:30.885Z,ch_3MLYiiBYycQg1OMf1RYf97hB,False,False,,False,Credit,USD,500,Amount,USD,500,Amount,USD,455,Amount,USD,455,Amount,USD,-45,Amount,nmlo94zn-7x08dpor-933pewga-3vjbrky5,73803.0,StashApp,stashapp,False,COLLECTIVE,Collective,mywxoz34-09rl6kg9-deypvenb-dj7gk85a,488292,posttost147,guest-f7956a12,False,INDIVIDUAL,Individual,,,,,,,,,,,,a47byg9n-xozdp8w7-j07pmjlv-03rek5w8,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
12957,ad9452db-b834-4126-8d94-018e3948a7c8,ad9452db-b834-4126-8d94-018e3948a7c8,HOST_FEE,,,a1c4b94b-456d-4628-8779-0c8c26a2c0f1,DEBIT,Host Fee,2023-01-02T03:13:03.840Z,,False,False,,False,Debit,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,-500,Amount,USD,0,Amount,eng0kzdy-vor4pzb4-4vgpbma8-37xlw95j,475949.0,IFC.js,ifcjs,False,COLLECTIVE,Collective,8a47byg9-nxozdp80-xm6mjlv0-3rek5w8k,11004,Open Source Collective,opensource,False,ORGANIZATION,Organization,,,,,,,,,,,,zaxon793-jy8gplkn-brnqbrkd-emwl5v04,False,True,False,TransactionPermissions,,,,,,,,,,


In [66]:
# count transactions by kind
df_platform.groupby(['kind']).size()

kind
ADDED_FUNDS                    1
CONTRIBUTION               10194
EXPENSE                      140
HOST_FEE                   19912
HOST_FEE_SHARE              9956
HOST_FEE_SHARE_DEBT          847
PAYMENT_PROCESSOR_COVER       22
dtype: int64

# Reconciling Stripe

TODO: Before we can do this, we need to get the merchantIDs.

In [67]:
# copy df_strip to df_s
df_s = df_stripe.copy()

In [68]:
df_s.head()

Unnamed: 0,id,Description,Seller Message,Amount,Amount Refunded,Currency,Converted Amount,Converted Amount Refunded,Fee,Taxes On Fee,Converted Currency,Mode,Status,Statement Descriptor,Customer ID,Customer Description,Captured,Card ID,PaymentIntent ID,Application Fee,Application ID,from (metadata),to (metadata)
0,ch_3OTXgHBYycQg1OMf0IfKN8Sw,Monthly financial contribution to Logseq (Back...,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI7wZ2sLFYmMPi,https://opencollective.com/sanket-sharma,True,pm_1OTXmVBYycQg1OMfzXUwCvAm,pi_3OTXgHBYycQg1OMf0PkaNI3M,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/sanket-sharma,https://opencollective.com/logseq
3,ch_3OTXVMBYycQg1OMf0qVZ5tjm,Financial contribution to OpenSCAD,Payment complete.,50.0,0.0,usd,50.0,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTXVpBYycQg1OMfien2j8Cr,pi_3OTXVMBYycQg1OMf02f4Vhyl,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/openscad
4,ch_3OTULSBYycQg1OMf10wlDceK,Monthly financial contribution to LibreELEC,Payment complete.,10.0,0.0,usd,10.0,0.0,1.24,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,cus_PI4UZkic5DVDO1,https://opencollective.com/daniel-dolejska,True,pm_1OTULSBYycQg1OMf7Lt0BwEi,pi_3OTULSBYycQg1OMf1saV7WWG,0.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,https://opencollective.com/daniel-dolejska,https://opencollective.com/libreelec
5,ch_3OTUG8BYycQg1OMf0FRuZULZ,Financial contribution to Spotube,Payment complete.,5.0,0.0,usd,5.0,0.0,0.77,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTUI5BYycQg1OMfAbkDmXhS,pi_3OTUG8BYycQg1OMf0AcGFa2r,0.25,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/spotube
7,ch_3OTT8EBYycQg1OMf0gR3PzrY,Financial contribution to .fmbot (.fmbot lifet...,Payment complete.,49.99,0.0,usd,49.99,0.0,4.25,0.0,usd,Live,Paid,OPEN SOURCE COLLECTIVE,,,True,pm_1OTT94BYycQg1OMf9AOHyAVx,pi_3OTT8EBYycQg1OMf0xdn31JV,2.5,ca_68FQ4jN0XMVhxpnk6gAptwvx90S9VYXF,,https://opencollective.com/fmbot


In [69]:
# df_platform where kind is CONTRIBUTION and paymentMethod.service is STRIPE
df_p = df_platform[(df_platform['kind'] == 'CONTRIBUTION') & (df_platform['paymentMethod.service'] == 'STRIPE')]

In [70]:
df_p.head()

Unnamed: 0,id,uuid,kind,paymentMethod,giftCardEmitterAccount,group,type,description,createdAt,merchantId,isRefunded,isRefund,refundTransaction,isOrderRejected,__typename,amount.currency,amount.valueInCents,amount.__typename,amountInHostCurrency.currency,amountInHostCurrency.valueInCents,amountInHostCurrency.__typename,netAmount.currency,netAmount.valueInCents,netAmount.__typename,netAmountInHostCurrency.currency,netAmountInHostCurrency.valueInCents,netAmountInHostCurrency.__typename,paymentProcessorFee.currency,paymentProcessorFee.valueInCents,paymentProcessorFee.__typename,account.id,account.legacyId,account.name,account.slug,account.isIncognito,account.type,account.__typename,oppositeAccount.id,oppositeAccount.legacyId,oppositeAccount.name,oppositeAccount.slug,oppositeAccount.isIncognito,oppositeAccount.type,oppositeAccount.__typename,expense.id,expense.type,expense.description,expense.invoiceInfo,expense.tags,expense.payee.id,expense.payee.name,expense.payee.slug,expense.payee.type,expense.payee.__typename,expense.__typename,permissions.id,permissions.canRefund,permissions.canDownloadInvoice,permissions.canReject,permissions.__typename,refundTransaction.id,expense,paymentMethod.name,paymentMethod.service,paymentMethod.sourcePaymentMethod,paymentMethod.type,giftCardEmitterAccount.id,giftCardEmitterAccount.name,giftCardEmitterAccount.slug,giftCardEmitterAccount.__typename
7,edfefb0a-6daf-4686-a861-c4cb4b6a8891,edfefb0a-6daf-4686-a861-c4cb4b6a8891,CONTRIBUTION,,,07ec6e68-f456-4e58-87b6-f5b7c350e003,CREDIT,Yearly financial contribution to Tusky (A doll...,2023-01-05T22:38:24.774Z,ch_3MN24nBYycQg1OMf1ecefPjV,False,False,,False,Credit,USD,1200,Amount,USD,1200,Amount,USD,1123,Amount,USD,1123,Amount,USD,-77,Amount,5ax8emk7-lnw54q7z-5kxpgyvj-0o93zdrb,28419.0,Tusky,tusky,False,COLLECTIVE,Collective,jrkx5lmn-v904qjw0-x9np8bwa-7zdygoe3,593416,Simoto,simoto,False,INDIVIDUAL,Individual,,,,,,,,,,,,8k03reyd-5agmq5wm-jjkqlbwo-z7j4nxv9,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
38,dfe894e2-697e-4f9b-9f0e-9d03786407bd,dfe894e2-697e-4f9b-9f0e-9d03786407bd,CONTRIBUTION,,,d95ca6a7-b65c-4822-ace7-f7cef41ad9b7,CREDIT,Monthly financial contribution to Logseq (Back...,2023-01-05T21:33:03.573Z,ch_3MN13YBYycQg1OMf1UQcVB0a,False,False,,False,Credit,USD,500,Amount,USD,500,Amount,USD,450,Amount,USD,450,Amount,USD,-50,Amount,lk9mbw7y-48r3zq3m-gddp0ej5-lavnodgx,166914.0,Logseq,logseq,False,COLLECTIVE,Collective,ejoxl3az-45w9pawj-b0k6y870-mgkbrvdn,593865,Steve Kwan,steve-kwan,False,INDIVIDUAL,Individual,,,,,,,,,,,,n4gx0bro-v5m96nje-ovypd8lk-3ey7jzwa,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
47,6f9ab182-241b-48ec-bc8b-610bdcab4505,6f9ab182-241b-48ec-bc8b-610bdcab4505,CONTRIBUTION,,,5241ed21-53f1-4935-b23b-4e9b614ff3c5,CREDIT,Financial contribution to StashApp,2023-01-05T21:10:34.033Z,ch_3MN0hnBYycQg1OMf1ODqgAyu,False,False,,False,Credit,USD,2000,Amount,USD,2000,Amount,USD,1912,Amount,USD,1912,Amount,USD,-88,Amount,nmlo94zn-7x08dpor-933pewga-3vjbrky5,73803.0,StashApp,stashapp,False,COLLECTIVE,Collective,3k0exgzn-m8yj64r0-485p5wao-9r7b4dlv,593860,Guest,guest-df2cf5e0,False,INDIVIDUAL,Individual,,,,,,,,,,,,ax8emk7l-nw54q7wg-jvypgyvj-0o93zdrb,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
51,d0cb2f78-f31d-4782-8ed7-2c42418db162,d0cb2f78-f31d-4782-8ed7-2c42418db162,CONTRIBUTION,,,05e93c6e-ed60-46f9-86a3-50ff7ed28442,CREDIT,Monthly financial contribution to Stride Game ...,2023-01-05T21:06:09.434Z,ch_3MN0dWBYycQg1OMf0XBqnOIb,False,False,,False,Credit,USD,1000,Amount,USD,1000,Amount,USD,941,Amount,USD,941,Amount,USD,-59,Amount,gnxdwzj3-le5mpwj3-wg8qy8rv-bko04a97,439885.0,Stride Game Engine,stride3d,False,COLLECTIVE,Collective,v349mrwg-z75lpy78-rr8pa08d-jeybknox,593826,Code Monkey,shaun-kohanowski,False,INDIVIDUAL,Individual,,,,,,,,,,,,gnxdwzj3-le5mpwkm-40epy8rv-bko04a97,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,
55,1cb859ec-bcb9-439e-96dd-3615055b612a,1cb859ec-bcb9-439e-96dd-3615055b612a,CONTRIBUTION,,,e970a853-fb8b-46c5-8174-2217eba8a88c,CREDIT,Financial contribution to Embed into UI framew...,2023-01-05T21:04:49.059Z,ch_3MN0cEBYycQg1OMf07pPLLP9,False,False,,False,Credit,USD,2000,Amount,USD,2000,Amount,USD,1912,Amount,USD,1912,Amount,USD,-88,Amount,gnxdwzj3-le5mpwj3-wg8qy8rv-bko04a97,,Stride Game Engine,stride3d,,,,v349mrwg-z75lpy78-rr8pa08d-jeybknox,593826,Code Monkey,shaun-kohanowski,False,INDIVIDUAL,Individual,,,,,,,,,,,,3z8arxve-ymko605l-jrwpgl5n-bj9w704d,True,True,True,TransactionPermissions,,,,STRIPE,,CREDITCARD,,,,


In [71]:
# for each row in df_stripe, find the corresponding row in df_platform, match df_stripe['id'] with df_platform['merchantId']
# if the row exists, add the platform transaction id to df_stripe in a new column called platform_transaction_id
# if the row does not exist, set the platform_transaction_id to None

# Merge the two dataframes on the 'id' from df_stripe and 'merchantId' from df_platform
merged_df = pd.merge(df_s, df_platform[['merchantId', 'id']], left_on='id', right_on='merchantId', how='left', suffixes=('', '_platform_transaction'))

# Drop the extra 'merchantId' column from the merge
merged_df.drop(columns='merchantId', inplace=True)

# Assign the resulting dataframe back to df_stripe
df_s = merged_df

In [72]:
# count the number of transactions that have a id_platform_transaction
df_s['id_platform_transaction'].count()

9114

In [73]:
# count the number of transactions that do not have a id_platform_transaction
df_s['id_platform_transaction'].isna().sum()

122653