In [1]:
import pdftotext
import os
import re
import pandas as pd
import gspread

BANK_STATEMENTS_FILE_PATH='/Users/jaredyu/Desktop/finances/finance_tracker_app/data/bank_statements'
CREDIT_CARD_STATEMENTS_FILE_PATH='/Users/jaredyu/Desktop/finances/finance_tracker_app/data/credit_card_statements'
SPREADSHEET_KEY=os.environ['SPREADSHEET_KEY']

# Update Reference Table

In [2]:
update_date_df = pd.DataFrame({
    'update_date': [
        '05/27/2024'
    ]
})

In [3]:
gc = gspread.service_account()
finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)
update_reference_table_worksheet = finance_tracker_db_spreadsheet.worksheet('update_reference_table')
update_reference_table_worksheet.update([update_date_df.columns.values.tolist()] + update_date_df.values.tolist())
update_reference_table_worksheet.format("C:D", {"numberFormat": {"type": "DATE_TIME"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'replies': [{}]}

# East West Bank

In [4]:
# EDGE CASE: MULTI-PAGE STATEMENTS
# EDGE CASE: MULTI-LINE TRANSACTIONS
# EDGE CASE: CHECK DEPOSITS BREAK PYPDF

In [5]:
def pair_sequentially(numbers):
    # Check if the list has an even length
    if len(numbers) % 2 != 0:
        raise ValueError("The list must have an even length")

    # Create the list of pairs
    paired_list = []
    for i in range(0, len(numbers), 2):
        paired_list.append(numbers[i:i+2])
    
    return paired_list

def remove_indices(lst, indices):
    # Sort indices in descending order to avoid reindexing issues
    indices = sorted(indices, reverse=True)
    
    # Remove elements at each index
    for index in indices:
        if 0 <= index < len(lst):
            lst.pop(index)
        else:
            raise IndexError(f"Index {index} is out of bounds for list of length {len(lst)}")
    
    return lst

def has_consecutive_values(data):
  """
  This function checks if a list contains any consecutive values (adjacent duplicates).

  Args:
      data: A list of any data type.

  Returns:
      True if there are consecutive values, False otherwise.
  """
  if len(data) <= 1:
    return False  # Need at least 2 elements for consecutive values

  # Iterate through the list, checking for adjacent duplicates
  for i in range(1, len(data)):
    if data[i] == data[i-1] + 1:
      return True
  return False

def concatenate_multi_line_transactions(transaction_lines_list, bank, year, month):
    """
    Edit credit and debit line lists to concatenate strings from multi-line items.
    """
    transaction_lines_list = transaction_lines_list.copy()
    date_pattern = r"^\d{2}-\d{2}$"
    bad_idx_list = []
    for idx, txc_line in enumerate(transaction_lines_list):
        if not bool(re.match(date_pattern, txc_line[:5])):
            bad_idx_list.append(idx)

    if has_consecutive_values(data=bad_idx_list):
        raise Exception(f'Transactions with more than two lines found. Info: {bank}, {year}, {month}')

    if len(bad_idx_list) > 0:
        complete_bad_idx_list = []
        for i in bad_idx_list:
            complete_bad_idx_list.append(i - 1)
            complete_bad_idx_list.append(i)

        paired_short_line_list = pair_sequentially(complete_bad_idx_list)

        # extract them from the original list and concatenate them together
        combined_line_list = []
        for paired_lines in paired_short_line_list:
            combined_line = ''.join(transaction_lines_list[paired_lines[0]:paired_lines[1]+1])
            combined_line_list.append(combined_line)

        # drop the original items
        transaction_lines_list = remove_indices(transaction_lines_list, complete_bad_idx_list)

        return transaction_lines_list + combined_line_list
    else:
        return transaction_lines_list

def parse_lines_with_regex(lines, transaction_pattern):
    # Ref.: https://levelup.gitconnected.com/creating-a-bank-statement-parser-with-python-9223b895ebae
    transactions = []
    for line in lines:
        match = re.search(pattern=transaction_pattern, string=line)
        if match:
            transactions.append(match.groupdict())

    return pd.DataFrame(transactions)

transaction_pattern = (
        r"(?P<transaction_date>\d+-\d+)\s*"
        r"(?P<description>.*?)\s*"
        r"(?P<amount>[\d.,]+)$"
)

def parse_east_west_bank_bank_statements_by_year(bank, year, bank_statements_file_path):
    """
    Go through a year of monthly bank statements for a given bank and parse
    the statements and return a df.
    """
    file_path = os.path.join(bank_statements_file_path, bank, year)
    monthly_bank_statement_list = os.listdir(file_path)
    monthly_bank_statement_list = [i for i in monthly_bank_statement_list if i != '.DS_Store']
    transactions_df_list = []

    for monthly_bank_statement_file in monthly_bank_statement_list:
        month = monthly_bank_statement_file.split('_')[1].split('.')[0]
        with open(os.path.join(file_path, monthly_bank_statement_file), "rb") as file:
            pdf = pdftotext.PDF(file, physical=True)
            if len(pdf) == 2:
                first_page = pdf[0]
                lines1 = first_page.split("\n")
                lines1 = [i.lstrip() for i in lines1]
                lines1 = [i for i in lines1 if i != '']
                lines2 = None
            elif len(pdf) == 3:
                first_page = pdf[0]
                second_page = pdf[1]
                lines1 = first_page.split("\n")
                second_page = pdf[1]
                lines2 = second_page.split("\n")

                lines1 = [i.lstrip() for i in lines1]
                lines1 = [i for i in lines1 if i != '']
                lines2 = [i.lstrip() for i in lines2]
                lines2 = [i for i in lines2 if i != '']
            else:
                pdf_length = len(pdf)
                raise Exception(f'New length for pdf ({pdf_length}), time to set new rules. Info: {bank}, {year}, {month}')
            
            # for line in lines1:
            #     print(line.lstrip())

        try:
            credits_line_idx = lines1.index('CREDITS')
            credit_balance_exists = True
        except:
            credit_balance_exists = False
        try:
            debits_line_idx = lines1.index('DEBITS')
            debits_line_on_first_page = True
        except:
            debits_line_on_first_page = False
            raise Exception(f'DEBITS not on first page, time to set new rules. Info: {bank}, {year}, {month}')

        try:
            daily_balances_line_idx = lines1.index('DAILY BALANCES')
            daily_balances_on_first_page = True
        except:
            daily_balances_line_idx = lines2.index('DAILY BALANCES')
            daily_balances_on_first_page = False
        credit_lines = lines1[credits_line_idx + 2:debits_line_idx]

        try: # check if the DEBITS balance is multi-page
            if len([i for i in lines2 if i[:4] == 'Date']) > 1:
                debit_balance_multi_page = True
            else:
                debit_balance_multi_page = False
        except:
            debit_balance_multi_page = False

        # No CREDITS and DEBITS does not extend to second
        if not credit_balance_exists and not debit_balance_multi_page:
            debit_lines = lines1[debits_line_idx + 2:daily_balances_line_idx]
            debit_lines = concatenate_multi_line_transactions(debit_lines, bank, year, month)
            debit_df = parse_lines_with_regex(lines=debit_lines, transaction_pattern=transaction_pattern)
            debit_df['amount'] = debit_df['amount'].apply(lambda x: -1 * float(x.replace(',' , '')))

            # debit_df['bank'] = bank
            debit_df['year'] = year
            debit_df['month'] = month
            transactions_df_list.append(debit_df)
            continue
        # DAILY BALANCES on first page and DEBITS does not extend to second
        elif daily_balances_on_first_page and not debit_balance_multi_page:
            debit_lines = lines1[debits_line_idx + 2:daily_balances_line_idx]
        # DEBITS on first page but extends to second
        elif debits_line_on_first_page and debit_balance_multi_page:
            second_page_debit_date_line_idx = lines2.index([i for i in lines2 if i[:4] == 'Date'][0])
            debit_lines = lines1[debits_line_idx + 2:] + lines2[second_page_debit_date_line_idx + 1:daily_balances_line_idx]
        # DAILY BALANCES not on first page and DEBITS does not extend to second
        elif not daily_balances_on_first_page and not debit_balance_multi_page:
            debit_lines = lines1[debits_line_idx + 2:]
        else:
            raise Exception(f'Uncaught case. Info: {bank}, {year}, {month}')

        credit_lines = concatenate_multi_line_transactions(credit_lines, bank, year, month)
        debit_lines = concatenate_multi_line_transactions(debit_lines, bank, year, month)

        credit_df = parse_lines_with_regex(lines=credit_lines, transaction_pattern=transaction_pattern)
        debit_df = parse_lines_with_regex(lines=debit_lines, transaction_pattern=transaction_pattern)

        credit_df['amount'] = credit_df['amount'].apply(lambda x: float(x.replace(',' , '')))
        debit_df['amount'] = debit_df['amount'].apply(lambda x: -1 * float(x.replace(',' , '')))

        transactions_df = pd.concat([credit_df, debit_df])

        # transactions_df['bank'] = bank
        transactions_df['year'] = year
        transactions_df['month'] = month
        transactions_df_list.append(transactions_df)

    combined_transactions_df = pd.concat(transactions_df_list)

    return combined_transactions_df

In [6]:
east_west_bank_annual_beginning_ending_balance_df = pd.DataFrame({
    '2020': [2459.25, 15619.65],
    '2021': [15619.65, 14552.04],
    '2022': [14552.04, 3046.51],
    '2023': [3046.51, 19634.88],
    '2024': [19634.88, 2982.74],
})

# transactions_2019_df = parse_bank_statements_by_year(
#     bank='east_west_bank',
#     year='2019',
#     bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
# )

transactions_2020_df = parse_east_west_bank_bank_statements_by_year(
    bank='east_west_bank',
    year='2020',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2021_df = parse_east_west_bank_bank_statements_by_year(
    bank='east_west_bank',
    year='2021',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2022_df = parse_east_west_bank_bank_statements_by_year(
    bank='east_west_bank',
    year='2022',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2023_df = parse_east_west_bank_bank_statements_by_year(
    bank='east_west_bank',
    year='2023',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2024_df = parse_east_west_bank_bank_statements_by_year(
    bank='east_west_bank',
    year='2024',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)

def approx_sum(x):
    return round(sum(x), 2)

def test_annual_balance_east_west_bank(df, year, ref_df):
    tmp = round(ref_df[year][0] + approx_sum(df['amount']), 2)
    assert tmp == ref_df[year][1]

# test for regressions
test_annual_balance_east_west_bank(
    df=transactions_2020_df,
    year='2020',
    ref_df=east_west_bank_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank(
    df=transactions_2021_df,
    year='2021',
    ref_df=east_west_bank_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank(
    df=transactions_2022_df,
    year='2022',
    ref_df=east_west_bank_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank(
    df=transactions_2023_df,
    year='2023',
    ref_df=east_west_bank_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank(
    df=transactions_2024_df,
    year='2024',
    ref_df=east_west_bank_annual_beginning_ending_balance_df
)

total_transactions_df = pd.concat([
    transactions_2020_df,
    transactions_2021_df,
    transactions_2022_df,
    transactions_2023_df,
    transactions_2024_df
])

total_transactions_df['transaction_date_month'] = total_transactions_df['transaction_date'].apply(lambda x: x.split('-')[0])
assert sum(total_transactions_df['transaction_date_month'] != total_transactions_df['month']) == 0
total_transactions_df['transaction_date'] = total_transactions_df['year'] + '-' + total_transactions_df['transaction_date']
total_transactions_df.drop(['year', 'month', 'transaction_date_month'], inplace=True, axis=1)
total_transactions_df.sort_values(by=['transaction_date', 'amount', 'description'], ascending=[True, False, True], inplace=True)
total_transactions_df.reset_index(drop=True, inplace=True)

In [7]:
total_transactions_df

Unnamed: 0,transaction_date,description,amount
0,2020-01-02,Deposit,510.00
1,2020-01-06,Preauth Debit VENMO PAYMENT 200106,-160.00
2,2020-01-06,Preauth Debit VENMO PAYMENT 200106,-180.00
3,2020-01-06,Preauth Debit VENMO PAYMENT 200106,-240.00
4,2020-01-06,Preauth Debit VENMO PAYMENT 200106,-370.00
...,...,...,...
621,2024-04-08,Pre-Auth Credit CPRIME INC DIRECT ...,5839.26
622,2024-04-08,Preauth Debit GOLDMAN SACHS BA COLLECTI...,-6300.00
623,2024-04-23,Pre-Auth Credit CPRIME INC DIRECT ...,3235.25
624,2024-04-23,Preauth Debit GOLDMAN SACHS BA COLLECTI...,-3000.00


In [31]:
gc = gspread.service_account()

finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)
east_west_bank_worksheet = finance_tracker_db_spreadsheet.worksheet('east_west_bank_bank_statements')
east_west_bank_worksheet.update([total_transactions_df.columns.values.tolist()] + total_transactions_df.values.tolist())
# east_west_bank_worksheet.format("C:C", {"numberFormat": {"type": "CURRENCY"}})
# east_west_bank_worksheet.format("A", {"numberFormat": {"type": "DATE_TIME"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'updatedRange': 'east_west_bank_bank_statements!A1:C627',
 'updatedRows': 627,
 'updatedColumns': 3,
 'updatedCells': 1881}

In [8]:
# build some unit tests
# create truth table, check for regressions
# rules:
# pdf generally has length +1, ignore last blank page
# CREDITS should be in first page
# DEBITS should be in first page (could be in second, haven't seen it)
# DAILY BALANCES could be in first or second
# some transactions are multi-line
# capture the transactions under CREDITS and DEBITS correctly

# get the beginning and ending balance per month/year in a reference table

# Marcus

In [87]:
def parse_lines_with_regex(lines, transaction_pattern):
    # Ref.: https://levelup.gitconnected.com/creating-a-bank-statement-parser-with-python-9223b895ebae
    transactions = []
    for line in lines:
        match = re.search(pattern=transaction_pattern, string=line)
        if match:
            transactions.append(match.groupdict())

    return pd.DataFrame(transactions)

transaction_pattern = (
    r"(?P<transaction_date>\d+/\d+/\d+)\s*"
    r"(?P<description>.*?)(?=\$)"
    r"(?P<credit_debit>.*?)(?=\s)\s*"
    r"(?P<balance>.*)"
)

def currency_to_float(x):
    return float(x.replace('$', '').replace(',', ''))

def parse_marcus_bank_statements_by_year(bank, year, bank_statements_file_path):
    """
    Go through a year of monthly bank statements for a given bank and parse
    the statements and return a df.
    """
    file_path = os.path.join(bank_statements_file_path, bank, year)
    monthly_bank_statement_list = os.listdir(file_path)
    monthly_bank_statement_list = [i for i in monthly_bank_statement_list if i != '.DS_Store']
    transactions_df_list = []
    for monthly_bank_statement_file in monthly_bank_statement_list:
        month = monthly_bank_statement_file.split('_')[1].split('.')[0]
        with open(os.path.join(file_path, monthly_bank_statement_file), "rb") as file:
            pdf = pdftotext.PDF(file, physical=True)
            if len(pdf) == 1:
                first_page = pdf[0]
                lines1 = first_page.split("\n")
                lines1 = [i.lstrip() for i in lines1]
                lines1 = [i for i in lines1 if i != '']
                lines2 = None
            elif len(pdf) == 2:
                first_page = pdf[0]
                lines1 = first_page.split("\n")
                second_page = pdf[1]
                lines2 = second_page.split("\n")

                lines1 = [i.lstrip() for i in lines1]
                lines1 = [i for i in lines1 if i != '']
                lines2 = [i.lstrip() for i in lines2]
                lines2 = [i for i in lines2 if i != '']
            else:
                pdf_length = len(pdf)
                raise Exception(f'New length for pdf ({pdf_length}), time to set new rules. Info: {bank}, {year}, {month}')

        if lines2 is None:
            transactions_forward_list = lines1[lines1.index('ACCOUNT ACTIVITY'):]
        else:
            transactions_forward_list = lines1[lines1.index('ACCOUNT ACTIVITY'):] + lines2[lines2.index('ACCOUNT ACTIVITY (continued)'):]

        transaction_lines_list = transactions_forward_list
        transaction_lines_list = transaction_lines_list.copy()
        beginning_balance_list = transaction_lines_list.copy()

        # get the beginning entry for reference
        beginning_balance_entry = [i for i in beginning_balance_list if 'Beginning Balance' in i][0]
        beginning_balance_dict = re.search(
            pattern=(
                r"(?P<transaction_date>\d+/\d+/\d+)\s*"
                r"(?P<description>.*?)(?=\$)"
                r"(?P<balance>.*)"
            ),
            string=beginning_balance_entry
        ).groupdict()

        # parse the other lines
        date_pattern = r"^\d{2}/\d{2}/\d{4}"
        bad_idx_list = []
        for idx, txc_line in enumerate(transaction_lines_list):
            if not bool(re.match(date_pattern, txc_line[:10])):
                bad_idx_list.append(idx)

        transaction_lines_list = remove_indices(transaction_lines_list, bad_idx_list)
        transaction_lines_list = [
            i for i in transaction_lines_list if all(substring not in i for substring in ['Beginning Balance', 'Ending Balance'])
        ]

        transactions_df = parse_lines_with_regex(transaction_lines_list, transaction_pattern)
        transactions_df['credit_debit'] = transactions_df['credit_debit'].apply(currency_to_float)
        transactions_df['balance'] = transactions_df['balance'].apply(currency_to_float)
        beginning_balance_df = pd.DataFrame([beginning_balance_dict])
        beginning_balance_df['balance'] = beginning_balance_df['balance'].apply(currency_to_float)
        transactions_df = pd.concat(
            [
                beginning_balance_df,
                transactions_df
            ]
        )
        transactions_df['description'] = transactions_df['description'].apply(lambda x: x.rstrip())

        # for reversal charges which have negative values in the credit statement
        transactions_df['credit_debit'] = abs(transactions_df['credit_debit'])

        credit_debit_multiplier_list = []
        transactions_df.reset_index(drop=True, inplace=True) # fix idx for the iterrows
        for idx, row in transactions_df.iterrows():
            cur_balance = row['balance']
            if idx > 0:
                if cur_balance > prev_balance:
                    credit_debit_multiplier = 1
                else:
                    credit_debit_multiplier = -1
            else:
                credit_debit_multiplier = 1
            credit_debit_multiplier_list.append(credit_debit_multiplier)
            prev_balance = row['balance']

        transactions_df['credit_debit_multiplier'] = credit_debit_multiplier_list
        transactions_df['credit_debit'] = transactions_df['credit_debit'] * transactions_df['credit_debit_multiplier']
        transactions_df.drop(['credit_debit_multiplier'], axis=1, inplace=True)
        transactions_df = transactions_df.iloc[1:,:].copy() # drop the Beginning Balance
        transactions_df.sort_values(by='transaction_date', ascending=True, inplace=True)
        transactions_df_list.append(transactions_df)

    return pd.concat(transactions_df_list)

## Need to delete the extra pages in the pdf

In [102]:
marcus_annual_beginning_ending_balance_df = pd.DataFrame({
    '2021': [0, 11719.53],
    '2022': [11719.53, 4877.32],
    '2023': [4877.32, 28770.03],
    '2024': [28770.03, 30478.36],
})

def approx_sum(x):
    return round(sum(x), 2)

def test_annual_balance_marcus(df, year, ref_df):
    tmp = round(ref_df[year][0] + approx_sum(df['credit_debit']), 2)
    assert tmp == ref_df[year][1]

transactions_2021_df = parse_marcus_bank_statements_by_year(
    bank='marcus',
    year='2021',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2022_df = parse_marcus_bank_statements_by_year(
    bank='marcus',
    year='2022',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2023_df = parse_marcus_bank_statements_by_year(
    bank='marcus',
    year='2023',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)
transactions_2024_df = parse_marcus_bank_statements_by_year(
    bank='marcus',
    year='2024',
    bank_statements_file_path=BANK_STATEMENTS_FILE_PATH,
)

# test for regressions
test_annual_balance_marcus(
    df=transactions_2021_df,
    year='2021',
    ref_df=marcus_annual_beginning_ending_balance_df
)
test_annual_balance_marcus(
    df=transactions_2022_df,
    year='2022',
    ref_df=marcus_annual_beginning_ending_balance_df
)
test_annual_balance_marcus(
    df=transactions_2023_df,
    year='2023',
    ref_df=marcus_annual_beginning_ending_balance_df
)
test_annual_balance_marcus(
    df=transactions_2024_df,
    year='2024',
    ref_df=marcus_annual_beginning_ending_balance_df
)

transactions_df = pd.concat([
    transactions_2021_df,
    transactions_2022_df,
    transactions_2023_df,
    transactions_2024_df,
])

transactions_df['transaction_date'] = pd.to_datetime(transactions_df['transaction_date'])
transactions_df.sort_values(by='transaction_date', ascending=True, inplace=True)
transactions_df.reset_index(drop=True, inplace=True)
transactions_df['transaction_date'] = transactions_df['transaction_date'].astype(str)

KeyError: 'credit_debit'

In [None]:
description_category_mapping = {
    'ACH Deposit': 'Deposit',
    'ACH Withdrawal': 'Withdrawal',
    'Interest Paid': 'Interest',
    'SAV Decrease Int Paid': 'Interest',
    'SAV Increase Int Paid': 'Interest',
}


for k, v in description_category_mapping.items():
    transactions_df.loc[
        transactions_df['description'].str.contains(k),
        'category'
    ] = v

transactions_df.loc[transactions_df['category'].isna(), 'category'] = 'Other'

In [103]:
gc = gspread.service_account()
finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)
marcus_worksheet = finance_tracker_db_spreadsheet.worksheet('marcus_bank_statements')
marcus_worksheet.update([transactions_df.columns.values.tolist()] + transactions_df.values.tolist())
marcus_worksheet.format("C:D", {"numberFormat": {"type": "CURRENCY"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'replies': [{}]}

# East West Bank Credit Card

In [98]:
def parse_lines_with_regex(lines, transaction_pattern):
    # Ref.: https://levelup.gitconnected.com/creating-a-bank-statement-parser-with-python-9223b895ebae
    transactions = []
    for line in lines:
        match = re.search(pattern=transaction_pattern, string=line)
        if match:
            transactions.append(match.groupdict())

    return pd.DataFrame(transactions)

transaction_pattern = (
    r"(?P<post_date>\d{2}/\d{2})\s*"
    r"(?P<transaction_date>\d{2}/\d{2})\s*"
    r"(?P<ref_num>\S*)\s*"
    r"(?P<description>.*?)(?=\$)"
    r"(?P<amount>\S.*)"
)

def clean_amount_col(x):
    # remove ($), (,), (alphabetical)
    return float(re.sub(r"[^\d|\.]", "", x.replace('$', '')))

def check_list_len_bool(l):
    if len(l) > 0:
        return True
    else:
        return False

def remove_indices(lst, indices):
    # Sort indices in descending order to avoid reindexing issues
    indices = sorted(indices, reverse=True)
    
    # Remove elements at each index
    for index in indices:
        if 0 <= index < len(lst):
            lst.pop(index)
        else:
            raise IndexError(f"Index {index} is out of bounds for list of length {len(lst)}")
    
    return lst

def drop_multiline_transactions(transaction_lines):
    transaction_lines = transaction_lines.copy()
    bad_idx_list = []
    for idx, i in enumerate(transaction_lines):
        # if not bool(re.match(r"^(\d{2}/\d{2})\s*(\d{2}/\d{2})", i)):
        if not bool(re.match(r"^(\d{2}/\d{2})\s*", i)):
            bad_idx_list.append(idx)
    transaction_lines = remove_indices(lst=transaction_lines, indices=bad_idx_list)
    return transaction_lines

def insert_na_for_missing_txn_date_and_ref_num(transaction_lines):
    # works only for when there's a post date and no transaction date
    null_idx_list = []
    for idx, i in enumerate(transaction_lines):
        if bool(re.match(r"^(\d{2}/\d{2})\s{8,}", i)):
            null_idx_list.append(idx)

    for idx in null_idx_list:
        transaction_lines[idx] = transaction_lines[idx][:8] + \
            transaction_lines[idx][:5] + \
            '    NA' + \
            transaction_lines[idx][10:]
    return transaction_lines

def process_transaction_lines(
    payments_and_other_credits_lines=[],
    purchases_and_other_debits_lines=[],
    fees_lines=[],
    interest_charged_lines=[],
):
    """
    Return the transactions df from the transaction lines.
    """
    try:
        payments_and_other_credits_lines = drop_multiline_transactions(payments_and_other_credits_lines)
        payments_and_other_credits_lines = insert_na_for_missing_txn_date_and_ref_num(payments_and_other_credits_lines)
        payments_and_other_credits_df = parse_lines_with_regex(payments_and_other_credits_lines, transaction_pattern)
        payments_and_other_credits_df['amount'] = payments_and_other_credits_df['amount'].apply(clean_amount_col)
        payments_and_other_credits_df['amount'] = payments_and_other_credits_df['amount'] * -1 # make negative
    except:
        payments_and_other_credits_df = pd.DataFrame([])
    try:
        purchases_and_other_debits_lines = drop_multiline_transactions(purchases_and_other_debits_lines)
        purchases_and_other_debits_lines = insert_na_for_missing_txn_date_and_ref_num(purchases_and_other_debits_lines)
        purchases_and_other_debits_df = parse_lines_with_regex(purchases_and_other_debits_lines, transaction_pattern)
        purchases_and_other_debits_df['amount'] = purchases_and_other_debits_df['amount'].apply(clean_amount_col)
    except:
        purchases_and_other_debits_df = pd.DataFrame([])
    try:
        fees_lines = drop_multiline_transactions(fees_lines)
        fees_lines = insert_na_for_missing_txn_date_and_ref_num(fees_lines)
        fees_df = parse_lines_with_regex(fees_lines, transaction_pattern)
        fees_df['amount'] = fees_df['amount'].apply(clean_amount_col)
    except:
        fees_lines_df = pd.DataFrame([])
    try:
        interest_charged_lines = drop_multiline_transactions(interest_charged_lines)
        interest_charged_lines = insert_na_for_missing_txn_date_and_ref_num(interest_charged_lines)
        interest_charged_df = parse_lines_with_regex(interest_charged_lines, transaction_pattern)
        interest_charged_df['amount'] = interest_charged_df['amount'].apply(clean_amount_col)
    except:
        interest_charged_df = pd.DataFrame([])

    transactions_df = pd.concat([
        payments_and_other_credits_df,
        purchases_and_other_debits_df,
        fees_df,
        interest_charged_df,
    ])
    transactions_df.drop(['ref_num'], axis=1, inplace=True)
    return transactions_df

def add_year_to_date(first_page, transactions_df):
    """
    Add the year to the date columns. Handle cases with two years (Jan/Dec).
    """
    date_range = first_page.split("\n")[0].split('Statement')[1].split('Page')[0].lstrip().rstrip()
    start_date = date_range.split(' - ')[0]
    end_date = date_range.split(' - ')[1]
    start_year = start_date[6:]
    start_month = start_date[:2]
    end_year = end_date[6:]
    end_month = end_date[:2]
    if start_year == end_year:
        transactions_df['post_date'] = transactions_df['post_date'] + '/' + start_year
        transactions_df['transaction_date'] = transactions_df['transaction_date'] + '/' + start_year
    else:
        month_year_mapping = {
            start_month: start_year,
            end_month: end_year,
        }
        transactions_df['post_date_month'] = transactions_df['post_date'].apply(lambda x: x.split('/')[0])
        transactions_df['transaction_date_month'] = transactions_df['transaction_date'].apply(lambda x: x.split('/')[0])
        transactions_df['post_date'] = transactions_df['post_date'] + \
            '/' + \
            transactions_df['post_date_month'].apply(lambda x: month_year_mapping[x])
        transactions_df['transaction_date'] = transactions_df['transaction_date'] + \
            '/' + \
            transactions_df['transaction_date_month'].apply(lambda x: month_year_mapping[x])
        transactions_df.drop(['post_date_month', 'transaction_date_month'], axis=1, inplace=True)

    transactions_df['date_range'] = date_range
    return transactions_df

def check_for_indices(lines, keyword, exact_match=True):
    idx_list = []
    for idx, i in enumerate(lines):
        if exact_match:
            if i == keyword:
                idx_list.append(idx)
        else:
            if i[:len(keyword)] == keyword:
                idx_list.append(idx)
    return idx_list

def parse_east_west_bank_credit_card_statements_by_year(credit_card, year, credit_card_statements_file_path):
    """
    Go through a year of monthly bank statements for a given bank and parse
    the statements and return a df.
    """
    file_path = os.path.join(credit_card_statements_file_path, credit_card, year)
    monthly_credit_card_statement_list = os.listdir(file_path)
    monthly_credit_card_statement_list = [i for i in monthly_credit_card_statement_list if i != '.DS_Store']
    transactions_df_list = []
    for monthly_credit_card_statement_file in monthly_credit_card_statement_list:
        month = monthly_credit_card_statement_file.split('_')[1].split('.')[0]
        with open(os.path.join(file_path, monthly_credit_card_statement_file), "rb") as file:
            pdf = pdftotext.PDF(file, physical=True)

        # collect lines
        lines_list = []
        for page in pdf:
            lines = page.split("\n")
            lines = [i.lstrip() for i in lines]
            lines = [i for i in lines if i != '']
            if [i for i in lines if 'Transactions' in i]:
                lines_list += lines

        payments_and_other_credits_start_idx_list = check_for_indices(lines_list, 'Payments and Other Credits')
        purchases_and_other_debits_start_idx_list = check_for_indices(lines_list, 'Purchases and Other Debits')
        total_this_period_idx_list = check_for_indices(lines_list, 'TOTAL THIS PERIOD', False)
        fees_start_idx_list = check_for_indices(lines_list, 'Fees')
        total_fees_this_period_idx_list = check_for_indices(lines_list, 'TOTAL FEES THIS PERIOD', False)
        interest_charged_start_idx_list = check_for_indices(lines_list, 'Interest Charged')
        interest_charged_idx_list = check_for_indices(lines_list, 'TOTAL INTEREST THIS PERIOD', False)
        if payments_and_other_credits_start_idx_list:
            payments_and_other_credits_lines = lines_list[
                payments_and_other_credits_start_idx_list[0] + 3: \
                total_this_period_idx_list[0]
            ]
        else:
            payments_and_other_credits_lines = []
        if purchases_and_other_debits_start_idx_list:
            purchases_and_other_debits_lines = lines_list[
                purchases_and_other_debits_start_idx_list[0] + 3: \
                total_this_period_idx_list[-1]
            ]
        else:
            purchases_and_other_debits_lines = []
        if fees_start_idx_list:
            fees_lines = lines_list[
                fees_start_idx_list[0] + 3: \
                total_fees_this_period_idx_list[0]
            ]
        else:
            fees_lines = []
        if interest_charged_start_idx_list:
            interest_charged_lines = lines_list[
                interest_charged_start_idx_list[0] + 3: \
                interest_charged_idx_list[0]
            ]
        else:
            interest_charged_lines = []

        transactions_df = process_transaction_lines(
            payments_and_other_credits_lines,
            purchases_and_other_debits_lines,
            fees_lines,
            interest_charged_lines,
        )
        transactions_df = add_year_to_date(pdf[2], transactions_df)
        transactions_df_list.append(transactions_df)
    return pd.concat(transactions_df_list)

In [99]:
east_west_bank_credit_card_annual_beginning_ending_balance_df = pd.DataFrame({
    '2017': [1937.56, 1748.56],
    '2018': [1748.56, 1790.84],
    '2019': [1790.84, 45.21],
    '2020': [45.21, 1080.03],
    '2021': [1080.03, 99.31],
    '2022': [99.31, 137.52],
    '2023': [137.52, 82.56],
    '2024': [82.56, -275.03],
})

transactions_2017_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2017',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2018_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2018',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2019_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2019',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2020_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2020',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2021_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2021',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2022_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2022',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2023_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2023',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)
transactions_2024_df = parse_east_west_bank_credit_card_statements_by_year(
    credit_card='east_west_bank',
    year='2024',
    credit_card_statements_file_path=CREDIT_CARD_STATEMENTS_FILE_PATH,
)

def approx_sum(x):
    return round(sum(x), 2)

def test_annual_balance_east_west_bank_credit_card(df, year, ref_df):
    tmp = round(ref_df[year][0] + approx_sum(df['amount']), 2)
    assert tmp == ref_df[year][1]

# test for regressions
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2017_df,
    year='2017',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2018_df,
    year='2018',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2019_df,
    year='2019',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2020_df,
    year='2020',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2021_df,
    year='2021',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2022_df,
    year='2022',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2023_df,
    year='2023',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)
test_annual_balance_east_west_bank_credit_card(
    df=transactions_2024_df,
    year='2024',
    ref_df=east_west_bank_credit_card_annual_beginning_ending_balance_df
)

total_transactions_df = pd.concat([
    transactions_2017_df,
    transactions_2018_df,
    transactions_2019_df,
    transactions_2020_df,
    transactions_2021_df,
    transactions_2022_df,
    transactions_2023_df,
    transactions_2024_df
])

total_transactions_df['post_date_fmt'] = pd.to_datetime(total_transactions_df['post_date'])
total_transactions_df['transaction_date_fmt'] = pd.to_datetime(total_transactions_df['transaction_date'])
total_transactions_df.sort_values(
    by=['post_date_fmt', 'transaction_date_fmt', 'amount', 'description'],
    ascending=[True, True, True, True],
    inplace=True
)
total_transactions_df.drop(['post_date_fmt', 'transaction_date_fmt'], axis=1, inplace=True)
total_transactions_df.reset_index(drop=True, inplace=True)

In [100]:
description_category_mapping = {
    'WAL-MART': 'Groceries',
    'WALMART': 'Groceries',
    'COSTCO WHSE': 'Groceries',
    '99 RANCH': 'Groceries',
    "KIM'S MART": 'Groceries',
    'ALBERTSONS': 'Groceries',
    'SAFEWAY': 'Groceries',
    'MANNA ORIENTAL': 'Groceries',
    'LUCKY ': 'Groceries',
    'MARINA FOOD': 'Groceries',
    'SPIRIT MARKET': 'Groceries',
    'WESTLAKE IGA MARKE': 'Groceries',
    'FOOD 4 LESS': 'Groceries',
    'SMART AND FINAL': 'Groceries',
    "TRADER JOE'S": 'Groceries',
    'OSAKA MARKET': 'Groceries',
    'DUFFL': 'Groceries',
    '7-ELEVEN': 'Groceries',
    'WHOLEFDS': 'Groceries',
    'KROGER': 'Groceries',
    'HANKOOK': 'Groceries',
    'Prime Video': 'Amazon',
    'AMZN': 'Amazon',
    'AmazonPckupCampusLckr': 'Amazon',
    'Amazon': 'Amazon',
    'aliexpress': 'E-commerce',
    'GEARBEST': 'E-commerce',
    'TARGET': 'Target',
    'CVS': 'CVS',
    'BEST BUY': 'Best Buy',
    'BESTBUY': 'Best Buy',
    'VENMO': 'Venmo',
    'PAYPAL': 'PayPal',
    '76 - ': 'Gas',
    'COSTCO GAS': 'Gas',
    'SHELL OIL': 'Gas',
    'CHEVRON': 'Gas',
    'ROTTEN ROBBIE': 'Gas',
    'EXXONMOBIL': 'Gas',
    'CONSERV FUEL': 'Gas',
    'SAN PASO TRUCK STOP': 'Gas',
    'FUEL DEPOT': 'Gas',
    'VALERO': 'Gas',
    'BEL AIR': 'Gas',
    'PARKING': 'Parking',
    'UCD TAPS': 'Parking',
    'UCSB TPS': 'Parking',
    'MTA METER': 'Parking',
    'PARKMOBILE': 'Parking',
    'CHIPOTLE': 'Restaurant',
    'PHO KING 4': 'Restaurant',
    'PHO MAI': 'Restaurant',
    'SUBWAY': 'Restaurant',
    'HUB MARKET': 'Restaurant',
    'UCD SOUTH SILO': 'Restaurant',
    'MCDONALD': 'Restaurant',
    'Taqueria': 'Restaurant',
    'STARBUCKS': 'Restaurant',
    'SUJU': 'Restaurant',
    'SLAP FACE': 'Restaurant',
    'TACOS': 'Restaurant',
    'CMSVEND': 'Restaurant',
    'SWEETHONEY': 'Restaurant',
    'TEASPOON': 'Restaurant',
    'JOE THE JUICE': 'Restaurant',
    'SWEETFIN': 'Restaurant',
    'TEXAS RDHSE': 'Restaurant',
    'YOSHINOYA': 'Restaurant',
    'HUNGRYPANDA': 'Restaurant',
    'Nikka Ramen': 'Restaurant',
    'EINSTEINMOBILEAPP': 'Restaurant',
    'COLDSTONE': 'Restaurant',
    'BUFFALO WILD WINGS': 'Restaurant',
    'BUFFET': 'Restaurant',
    "PING'S BISTRO": 'Restaurant',
    'SUSHI': 'Restaurant',
    "OPPI'Z": 'Restaurant',
    "McDonald's": 'Restaurant',
    'RESTAURANT': 'Restaurant',
    'DINER': 'Restaurant',
    'UNA MAS': 'Restaurant',
    'CANTEEN VENDING': 'Restaurant', 
    'SWEET AND SHAVE': 'Restaurant',
    'ABERDEEN CAFE': 'Restaurant',
    "RAJA'S TANDOOR": 'Restaurant',
    "DOMINO'S": 'Restaurant',
    'CAFE': 'Restaurant',
    'THE HABIT': 'Restaurant',
    'THE SHOP CAFE': 'Restaurant',
    'PEETS COFFEE': 'Restaurant',
    'BLAZE PIZZA': 'Restaurant',
    'DUTCHBROSCO': 'Restaurant',
    'THE LUNCH BOX': 'Restaurant',
    'THE GURKHA KITCHEN': 'Restaurant',
    'RESTAURA': 'Restaurant',
    'LANZHOU': 'Restaurant',
    'GEN KOREAN BBQ': 'Restaurant',
    'Barbareno': 'Restaurant',
    'FASTRAK': 'Government',
    'DMV': 'Government',
    'KAISER': 'Kaiser',
    'APPLE.COM/BILL': 'Recurring Payments',
    'KAISER PAY': 'Recurring Payments',
    'Audible': 'Recurring Payments',
    'LASTPASS': 'Recurring Payments',
    'ITUNES.COM/BILL': 'Recurring Payments',
    'PROACTIV': 'Recurring Payments',
    'AAA INSURANCE': 'Recurring Payments',
    'ELLIOTT WAVE': 'Trading',
    'TRADE IDEAS': 'Trading',
    '4X SOLUTIONS': 'Trading',
    'LIVETRADERS': 'Trading',
    'SIERRA CHART': 'Trading',
    'G7FX': 'Trading',
    'WASH LAUNDRY': 'Laundry',
    'HOSTEL WORLD': 'Travel',
    ' HK ': 'Travel',
    'HONG KO': 'Travel',
    'ISQUARE': 'Travel',
    'JASONS': 'Travel',
    "WATSON'S": 'Travel',
    ' HO': 'Travel',
    'MANNINGS': 'Travel',
    'FRGN TRANS FEE-': 'Travel',
    'BEIJING CN': 'Travel',
    'BEIJING': 'Travel',
    'SHENZHEN CN': 'Travel',
    'SUP TAHOE': 'Travel',
    'AIRBNB': 'Travel',
    'DOLE PLANTATION': 'Travel',
    'OAHU': 'Travel',
    'HEARST CASTLE': 'Travel',
    'ISLAND PACKERS': 'Travel',
    'TURO': 'Travel',
    'FOX THEATRE': 'Travel',
    'TICKETSATWORK': 'Travel',
    'AIR CAN': 'Airplane Tickets',
    'UNITED ': 'Airplane Tickets',
    'AIR TRANS': 'Airplane Tickets',
    'SPIRIT': 'Airplane Tickets',
    'CITY COLLEGE': 'Education',
    'CCSF': 'Education',
    'OHLONE': 'Education',
    'COLLEGE': 'Education',
    'UCD': 'Education',
    'UDEMY': 'Education',
    'CHEGG': 'Education',
    'DATACAMP': 'Education',
    'COURSERA': 'Education',
    'MEMRISECOM': 'Education',
    'GRAMMARLY': 'Education',
    'CHINESE ZERO': 'Education',
    'KAPLAN': 'Education',
    'WYZANT': 'Education',
    'AMC': 'Movies',
    'adidas': 'Clothes',
    "VICTORIA'S SECRET": 'Clothes',
    'MACY': 'Clothes',
    'EXPRESS': 'Clothes',
    'SAKS': 'Clothes',
    'TANDY LEATHER': 'Clothes',
    'ADIDAS': 'Clothes',
    "LEVI'S OUTLET": 'Clothes',
    'DOCKERS': 'Clothes',
    'FITNESS': 'Fitness',
    'BADMINTON': 'Fitness',
    'UCSB RECREATION': 'Fitness',
    'luxiaojunbarbell': 'Fitness',
    'BALLROOM CONNECTION': 'Fitness',
    'PAYMENT': 'CC Payment',
    'PAYMENT THANK': 'CC Payment',
    'PAYMENT THANK YOU': 'CC Payment',
    'PAYMENT THANK YOU': 'CC Payment',
    'POINTS REDEEMED': 'Credit Card',
    'ANNUAL MEMBERSHIP FEE': 'Credit Card',
    'CREDIT ADJUSTMENT': 'Credit Card',
    'ONLINE SPEND PROMOTION': 'Credit Card',
}


for k, v in description_category_mapping.items():
    total_transactions_df.loc[
        total_transactions_df['description'].str.contains(k),
        'category'
    ] = v

In [78]:
# pd.set_option('max_colwidth', 1000)
pd.set_option("display.max_colwidth", None)
# total_transactions_df.style.set_properties(subset=['text'], **{'width': '1000px'})

In [84]:
total_transactions_df.loc[total_transactions_df['category'].isna(), 'category'] = 'Other'

In [85]:
total_transactions_df.groupby(['category'])['amount'].sum()

category
Airplane Tickets        3865.73
Amazon                  7933.23
Best Buy                1382.45
CC Payment           -118568.32
Clothes                  937.54
Credit Card            -1110.04
E-commerce               319.54
Education               6073.84
Fitness                 1198.28
Gas                     4778.37
Government               946.00
Groceries               8507.37
Kaiser                  3732.68
Laundry                  245.00
Movies                    14.74
Other                  14368.73
Parking                  712.85
PayPal                  1023.94
Recurring Payments     20467.04
Restaurant              2245.80
Target                   189.80
Trading                 9564.92
Travel                  3377.76
Venmo                  25580.16
Name: amount, dtype: float64

In [81]:
# list(set(total_transactions_df.loc[
#     total_transactions_df['category'].isna(),
#     'description'
# ]))
total_transactions_df.loc[
    total_transactions_df['category'].isna(),
    ['description', 'amount']
].sort_values(by='amount').head(50)

Unnamed: 0,description,amount
1941,EIG*BLUEHOST.COM 888-4014678 UT,-395.28
1848,GLANBIA PERFORMANCE AURORA IL,-33.86
2539,"5401 PROV CR Groupon, Inc. 312-28864",-33.04
2435,GUM.CO/CC* CURTIS EINS HTTPSGUMROAD. CA,-30.0
2567,FH* BLUE PLANET ADVENT WWW.BLUEPLANE CO,-26.58
1764,UQ MILPITAS US0048 MILPITAS CA,-22.04
1493,ERENTERPLAN1889 INSURA 888-205-8118 TX,-0.64
708,USPS PO 0585860456 WOODLAND CA,0.49
1557,THE UPS STORE 0533 WOODLAND CA,1.11
446,PCCD - MERRITT OAKLAND CA,2.0


In [86]:
gc = gspread.service_account()
finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)
east_west_bank_credit_card_statements_worksheet = finance_tracker_db_spreadsheet.worksheet('east_west_bank_credit_card_statements')
east_west_bank_credit_card_statements_worksheet.update([total_transactions_df.columns.values.tolist()] + total_transactions_df.values.tolist())
east_west_bank_credit_card_statements_worksheet.format("D:D", {"numberFormat": {"type": "CURRENCY"}})
east_west_bank_credit_card_statements_worksheet.format("A:B", {"numberFormat": {"type": "DATE_TIME"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'replies': [{}]}

# Discover Credit Card

In [19]:
DISCOVER_CREDIT_CARD_STATEMENTS_FILE_PATH='/Users/jaredyu/Desktop/finances/finance_tracker_app/data/credit_card_statements/discover'
statement_filename_list = os.listdir(DISCOVER_CREDIT_CARD_STATEMENTS_FILE_PATH)
df = pd.read_csv(
    os.path.join(
        DISCOVER_CREDIT_CARD_STATEMENTS_FILE_PATH,
        statement_filename_list[0]
    )
)

In [21]:
gc = gspread.service_account()
finance_tracker_db_spreadsheet = gc.open_by_key(SPREADSHEET_KEY)
east_west_bank_credit_card_statements_worksheet = finance_tracker_db_spreadsheet.worksheet('discover_credit_card_statements')
east_west_bank_credit_card_statements_worksheet.update([df.columns.values.tolist()] + df.values.tolist())
east_west_bank_credit_card_statements_worksheet.format("D:D", {"numberFormat": {"type": "CURRENCY"}})
east_west_bank_credit_card_statements_worksheet.format("A:B", {"numberFormat": {"type": "DATE_TIME"}})

{'spreadsheetId': '1CAyyf2kr-pS7LNX1a_0ithw6niL3Js3K4ZEOlwDViZY',
 'replies': [{}]}

In [24]:
round(sum(df['Amount']), 2)

0.0