Test NB

In [15]:
import asyncio
import json
from autogen_agentchat.agents import AssistantAgent, CodeExecutorAgent, UserProxyAgent
from autogen_agentchat.messages import TextMessage
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import TextMentionTermination, MaxMessageTermination
from agents.prompts.unstructured_text_parser_message import UNSTRUCTURED_TEXT_PARSER_SYSTEM_MESSAGE
from autogen_ext.models.openai import OpenAIChatCompletionClient
from typing import List, Dict
import pandas as pd
import json
import pypdf
from dotenv import load_dotenv
import os


In [16]:
load_dotenv()

True

In [17]:

def extract_text_from_pdf(file_path: str) -> str:
    """
    Extracts all text content from a given PDF file.

    This tool's purpose is to provide the raw text data from a PDF to an LLM agent.
    The agent will then be responsible for writing code to parse this unstructured text.

    Args:
        file_path (str): The local path to the PDF file.

    Returns:
        str: A single string containing all the extracted text from the PDF.
             Returns an error message string if extraction fails.
    """
    try:
        reader = pypdf.PdfReader(file_path)
        full_text = ""
        for page in reader.pages:
            full_text += page.extract_text() + "\n--- End of Page ---\n"
        return full_text
    except Exception as e:
        return f"Error extracting text from PDF: {str(e)}"


In [18]:
parsed_text = extract_text_from_pdf('temp/test_statement2.pdf')

In [None]:
import pypdf
import os
from typing import List

def convert_pdfs_in_dir(input_dir: str, output_dir: str = "temp") -> List[str]:
    """
    Scans a directory for PDF files, extracts text, and saves each to a text file.

    This function searches the specified input directory for any files with a '.pdf'
    extension. It then extracts the text from each PDF and saves it to a new file 
    in the output directory. The output files are named sequentially 
    (e.g., statement1.txt, statement2.txt).

    Args:
        input_dir (str): The path to the directory containing the PDF files.
        output_dir (str): The name of the directory to save the text files.
                          Defaults to 'temp'.

    Returns:
        List[str]: A list of paths to the newly created text files.
                   Returns an empty list if the input directory doesn't exist
                   or contains no PDF files.
    """
    # Check if the input directory exists
    if not os.path.isdir(input_dir):
        print(f"Error: Input directory '{input_dir}' not found.")
        return []

    # Find all files in the directory that end with .pdf (case-insensitive)
    pdf_files = [
        os.path.join(input_dir, filename)
        for filename in os.listdir(input_dir)
        if filename.lower().endswith(".pdf") and os.path.isfile(os.path.join(input_dir, filename))
    ]

    if not pdf_files:
        print(f"No PDF files found in '{input_dir}'.")
        return []

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    created_text_files = []

    # Enumerate through the list of discovered PDFs to process them
    for i, file_path in enumerate(pdf_files, start=1):
        try:
            print(f"Processing '{file_path}'...")
            reader = pypdf.PdfReader(file_path)
            full_text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    full_text += page_text + "\n"
            
            # Define the output file name and path
            output_filename = f"statement{i}.txt"
            output_path = os.path.join(output_dir, output_filename)
            
            # Write the extracted text to the new file
            with open(output_path, "w", encoding="utf-8") as f:
                f.write(full_text)
            
            created_text_files.append(output_path)
            print(f"Successfully created '{output_path}'")

        except Exception as e:
            print(f"Error processing file {file_path}: {str(e)}")
            
    return created_text_files

    

In [None]:
source_directory = "temp"

# Call the function to convert all PDFs in the directory
saved_files = convert_pdfs_in_dir(source_directory)

print("\n--- Conversion Complete ---")
if saved_files:
    print("The following text files were created in the 'temp' directory:")
    for file in saved_files:
        print(f"- {file}")
else:
    print("No files were created.")

In [19]:
parsed_text



"Account Summary Previous Balance $2,255.91 Payments - $2,255.91 Other Credits - $901.15 Transactions + $2,607.90 Cash Advances + $0.00 Fees Charged + $0.00 Interest Charged + $0.00 New Balance = $1,706.75 Credit Limit $30,000.00 Available Credit  (as of Jul 18, 2025) $28,293.25 Cash Advance Credit Limit $9,000.00 Available Credit for Cash Advances $9,000.00 \nPayment Information Payment Due Date Aug 12, 2025 For online and phone payments, the deadline is 12 midnight ET, except on the statement closing date when the deadline is 8 p.m. ET. Upcoming statement closing date: August 18, 2025 New Balance $1,706.75 Minimum Payment Due $25.00 LATE PAYMENT WARNING:  If we do not receive your minimum payment by your due date, you may have to pay a late fee of up to $40.00.  \nMINIMUM PAYMENT WARNING:  Even if you make no more charges with this card, if you make only the minimum payment each month we estimate you will never pay off the balance shown on this statement because your payment will be less than the interest charged each month. If you make more than the minimum payment each period, you will pay less in interest and pay off your balance sooner. For example, if you instead paid $68.00 per month, you would pay off the balance shown on this statement in around 3 years. \n \nIf you would like information about credit counseling services, call 888-326-8055. \n     \nAccount Notifications \nPlease visit capitalone.com for your most current Rewards Program Terms and Conditions. You can also find changes to your Rewards by logging into your account and navigating to the Rewards FAQ section. \nPage 1 of 5 Venture X Card | Visa Infinite ending in 6346 Jun 18, 2025 - Jul 18, 2025   |  31 days in Billing Cycle \nPay or manage your account at  capitalone.com Customer Service: 800-227-4825 See reverse for Important Information \n Rewards Summary               Rewards as of: 07/18/2025 Rewards Balance 31,736 Track and redeem your rewards with our mobile app or on  capitalone.com Previous Balance Earned This Period Redeemed this period 27,392 4,344 0\nMOHIT AGGARWAL 15237 BROOKSTONE ST FONTANA, CA 92336-4051 \nPayment Due Date:  Aug 12, 2025 Account ending in 6346 New Balance $1,706.75 Minimum Payment Due $25.00 Amount Enclosed $  ________ Capital One P.O. Box 60519 City of Industry\xa0\xa0 CA 91716-0519 FDDTAFFAFDDAAFAFTTAFAATDDAFAADAFATAAFDTTDFDDDDFFDATTAFAFDDATTDTFT Please send us this portion of your statement and only one check (or one money order) payable to Capital One to ensure your payment is processed promptly. Allow at least seven business days for delivery. 1 4147099322906346 18 1706752255910025004 \n--- End of Page ---\nHow can IAvoid Paying Interest Charges?  If you pay your New Balance in full by the due date  each month ,we will not charge interest on new transactions that post to the purchase balance. If you have been paying in full  without Interest Charges, but fail to pay your next New Balance in full, we will charge interest on the unpaid balance. Interest Charges on Cash Advances and Special Transfers start on the transaction date. Promotional offers may allow you to pay less than the total New Balance and avoid paying interest on new transactions that post to your purchase balance. See the front of your statement for additional information.   How is the Interest Charge Determined?  Interest Charges accrue from the date of the transaction, date the transaction is processed or the first day of the Billing Cycle. Interest accrues daily on every unpaid amount until it is paid in full. Interest accrued during aBilling Cycle posts to your account at the end of the Billing cycle and appears on your next statement. You may owe Interest Charges even if you pay the entire New Balance one month, but did not do so the prior month. Once you start accruing Interest Charges, you generally must pay your New Balance in full two consecutive Billing Cycles before Interest Charges stop being posted to your Statement. Interest Charges are added to the corresponding segment of your account.   Do you assess aMinimum Interest Charge? We may assess aminimum Interest Charge of $0.00 for each Billing Cycle if your account is subject to an Interest Charge.   How do you Calculate the Interest Charge?  We use amethod called Average Daily Balance (including new transactions).  1. First, for each segment we take the beginning balance each day and add in new transactions and the periodic Interest Charge on the previous day's balance. Then we subtract any payments and credits for that segment as of that day. The result is the daily balance for each segment. However, if your previous statement balance was zero or acredit amount, new transactions which post to your purchase segment are not added to the daily balance.   2. Next, for each segment, we add the daily balances together and divide the sum by the number of days in the Billing Cycle. The result is the Average Daily Balance for each segment.  3. At the end of each Billing Cycle, we multiply your Average Daily Balance for each segment by the daily periodic rate (APR divided by 365) for that segment, and then we multiply the result by the number of days in the Billing Cycle. We add the Interest Charges for all segments together. The result is your total Interest Charge for the Billing Cycle.  The Average Daily Balance is referred to as the Balance Subject to Interest Rate in the Interest Charge Calculation section of this Statement.  NOTE: Due to rounding or aminimum Interest Charge, this calculation may vary slightly from the Interest Charge actually assessed.   How can IAvoid Membership Fees? If aRenewal Notice is printed on this statement, you may avoid paying an annual membership Fee by contacting Customer Service fewer than 40 days after the annual membership Fee was assessed to request that we close your account. To avoid paying amonthly membership Fee, close your account and we will stop assessing your monthly membership Fee.  How can IClose My Account? You can contact Customer Service anytime to request that we close your account. \nHow do you Process Payments? When you make apayment, you authorize us to initiate an ACH or electronic payment that will be debited from your bank account or other related account. When you provide a check or check information to make a payment, you authorize us to use information from the check to make aone-time ACH or other electronic transfer from your bank account. We may also process it as acheck transaction. Funds may be withdrawn from your bank account as soon as the same day we process your payment.   How do you Apply My Payment? We generally apply payments up to your Minimum Payment first to the balance with the lowest APR (including 0% APR), and then to balances with higher APRs. We apply any part of your payment exceeding your Minimum Payment to the balance with the highest APR, and then to balances with lower APRs.  Billing Rights Summary  (Does not Apply to Small Business Accounts) What To Do If You Think You Find AMistake On Your Statement: If you think there is an error on your statement, write to us at: P.O. Box 30285, Salt Lake City, UT 84130-0285.   In your letter, give us the following information: • Account information: Your name and account number.  • Dollar amount: The dollar amount of the suspected error. •Description of Problem: If you think there is an error on your bill, describe what you believe is wrong and why you believe it is amistake. You must contact us within 60 days after the error appeared on your statement. You must notify us of any potential errors in writing. You may call us or notify us electronically, but if you do we are not required to investigate any potential errors and you may have to pay the amount in question. We will notify you in writing within 30 days of our receipt of your letter. While we investigate whether or not there has been an error, the following are true: •We cannot try to collect the amount in question, or report you as delinquent on that amount. The charge in question may remain on your statement, and we may continue to charge you interest on that amount. But, if we determine that we made amistake, you will not have to pay the amount in question or any interest or other fees related to that amount. •While you do not have to pay the amount in question until we send you anotice about the outcome of our investigation, you are responsible for the remainder of your balance. •We can apply any unpaid amount against your credit limit. Within 90 days of our receipt of your letter, we will send you awritten notice explaining either that we corrected the error (to appear on your next statement) or the reasons we believe the bill is correct.  Your Rights If You Are Dissatisfied With Your Purchase: If you are dissatisfied with the goods or services that you have purchased with your credit card, and you have tried in good faith to correct the problem with the merchant, you may have the right not to pay the remaining amount due on the purchase. To use this right, the following must be true: 1) You must have used your credit card for the purchase. Purchases made with cash advances from an ATM or with acheck that accesses your credit card account do not qualify; and  2) You must not yet have fully paid for the purchase.  If all of the criteria above are met and you are still dissatisfied with the purchase, contact us in writing at: P.O. Box 30285, Salt Lake City, UT 84130-0285. While we investigate, the same rules apply to the disputed amount as discussed above. After we finish our investigation, we will tell you our decision. At that point, if we think you owe an amount and you do not pay we may report you as delinquent.  © 2023 Capital One. Capital One is a federally registered service mark ETC-08 07/13/2023 \nHow do I Make Payments?  You may make your payment in several ways: 1. Online Banking by logging into your account; 2. Capital One Mobile Banking app for approved electronic devices; 3. Calling the telephone number listed on the front of this statement and providing the required payment information; 4. Sending mail payments to the address on the front of this statement with the payment coupon or your account information. What is the cutoff time to make a same day payment? ¨For online and phone payments, payments submitted by 12 midnight ET will typically post on the same day. However, if you pay your bill on your statement closing date, payments made after 8 p.m. ET will post on the following day. Keep in mind, it may take another business day or longer for credit to be available.\xa0 ¨For mail, as of the business day we receive it, as long as it is received by 5 p.m. local time at our processing center. You must send the bottom portion of this statement and your check to the payment address on the front of this statement. Please allow at least seven (7) business days for mail delivery. Mailed payments received by us at any other location or payments in any other form may not be credited as of the day we receive them. 6c2a9fc9-060c-4bb0-a1eb-68f5f90b2237 \nChanging your mailing address? You can change your address by signing into your account online or by calling Customer Service. Any written request on this form will not be honored. \n                Pay online at capitalone.com                Pay using the Capital One mobile app                                Customer Service 800-227-4825 \n\n--- End of Page ---\nAdditional Information on the next page \n     \n Page 2 of 5 Venture X Card | Visa Infinite ending in 6346 Jun 18, 2025 - Jul 18, 2025   |  31 days in Billing Cycle \nTransactions Visit capitalone.com  to see detailed transactions. MOHIT AGGARWAL #6346: Payments, Credits and Adjustments Trans Date Post Date Description Amount Jun 25 Jun 27 LULLABI INN920-8683135WI - $125.97 Jul 7 Jul 7 CAPITAL ONE MOBILE PYMT - $2,255.91 Jul 13 Jul 15 HOMEGOODS 275RCH CUCAMONGACA - $21.53 MOHIT AGGARWAL #6346: Transactions Trans Date Post Date Description Amount Jun 17 Jun 18 MED*POMONA VALLEY HOSP MC909-865-9500CA $249.60 Jun 22 Jun 23 BAPS SHAYONA-LA RETAILCHINO HILLSCA $25.95 Jun 22 Jun 23 BAPS SHAYONA-LA RETAILCHINO HILLSCA $8.00 Jun 25 Jun 27 TARGET        00019588FONTANACA $10.18 Jun 28 Jun 30 SQ *THE COPPER QBig Bear LakeCA $6.95 Jun 29 Jun 30 OLIVE GARDEN ZK 0026426RIALTOCA $72.80 Jun 30 Jul 1 LINQ LV DEP-NOSHW-CXL8662094732NV $186.96 Jun 30 Jul 1 BOOST MOBILEHTTPS://WWW.DCO $16.49 Jun 30 Jul 1 UCW STARBUCKSUNIVERSAL CITCA $23.33 Jun 30 Jul 1 PALACE CAFEUNIVERSAL CITCA $30.71 Jun 30 Jul 1 PALACE CAFEUNIVERSAL CITCA $15.35 Jul 1 Jul 3 ANIL P. PUNJABI MD DDSREDLANDSCA $85.00 Jul 1 Jul 3 85C BAKERY CAFE USARANCHO CUCAMOCA $27.25 Jul 2 Jul 4 VONS #1914RANCHO CUCAMOCA $5.99 Jul 2 Jul 4 SQ *HANDEL'S RANCHO CUCAMRancho CucamoCA $20.50 Jul 5 Jul 7 SQ *HANDEL'S RANCHO CUCAMRancho CucamoCA $13.50 Jul 5 Jul 7 TRADER JOE S #217RANCHO CUCAMOCA $42.49 Jul 6 Jul 7 HOMEGOODS 275RCH CUCAMONGACA $81.31 Jul 6 Jul 8 FIEND INC.RANCHO CUCAMOCA $6.36 Jul 6 Jul 8 ROSS STORES #212RCH CUCAMONGACA $8.60 MOHIT AGGARWAL #6346: Total Transactions $937.32 \nMOHIT AGGARWAL #5171: Payments, Credits and Adjustments Trans Date Post Date Description Amount Jul 16 Jul 18 GROVE ADVANCED IMAGING CERCH CUCAMONGACA - $250.00 \n--- End of Page ---\n     \nAdditional Information on the next page \nVenture X Card | Visa Infinite ending in 6346 Jun 18, 2025 - Jul 18, 2025   |  31 days in Billing Cycle Page 3 of 5 \nTransactions (Continued) MOHIT AGGARWAL #5171: Transactions Trans Date Post Date Description Amount HIMANI SOOD #4313: Payments, Credits and Adjustments Trans Date Post Date Description Amount Jun 25 Jun 26 TARGET        00019588FONTANACA - $10.86 Jun 28 Jun 30 HOTELCOM73121554950063HOTELS.COMWA - $192.25 Jun 28 Jun 30 HOTELCOM73121554950063HOTELS.COMWA - $192.25 Jul 15 Jul 17 GROVE DIAGNOSTICRCH CUCAMONGACA - $108.29 HIMANI SOOD #4313: Transactions Trans Date Post Date Description Amount HIMANI SOOD #5453: Payments, Credits and Adjustments Trans Date Post Date Description Amount HIMANI SOOD #5453: Transactions Trans Date Post Date Description Amount Jun 19 Jun 20 AHAVA MED SPA PC909-204-7373CA $150.00 Jun 22 Jun 23 BJS RESTAURANTS 433RANCHO CUCAMOCA $84.97 Jun 24 Jun 26 RED TOMATOES FARMERRANCHO CUCAMOCA $35.36 Jun 25 Jun 27 SANJHA SAUDHAFONTANACA $44.75 Jun 26 Jun 28 STATERBROS073MONTCLAIRCA $55.00 Jun 27 Jun 27 IC* INSTACART*1102INSTACART.COMCA $48.72 Jun 27 Jun 30 SANJHA SAUDHAFONTANACA $38.56 Jun 28 Jun 30 THE VILLAGE SWEET SHOPPEBIG BEAR LAKECA $10.00 Jun 29 Jul 1 JOHN WAYNE AIRPORTSANTA ANACA $4.00 Jun 30 Jun 30 WORKING ADVANTAGE800-565-3712FL $957.58 Jun 30 Jul 1 USH PARKING RCUNIVERSAL CITCA $35.00 Jul 5 Jul 7 ONT AIRPT PRKING LOT 4ONTARIOCA $10.00 Jul 6 Jul 7 MENDOCINOFARMSORDER.MENDOCICA $23.19 Jul 6 Jul 7 KOHL'S #0602FONTANACA $3.25 Jul 9 Jul 10 CVS/PHARMACY #08589800-746-7287CA $7.31 Jul 10 Jul 11 AMAZON MKTPL*NL68Z08Y1Amzn.com/billWA $15.70 Jul 11 Jul 14 AMAZON MKTPL*NL36229U1Amzn.com/billWA $19.58 Jul 11 Jul 14 AMAZON MKTPL*NL2ZY29V1Amzn.com/billWA $21.74 \n--- End of Page ---\n     \nAdditional Information on the next page \nVenture X Card | Visa Infinite ending in 6346 Jun 18, 2025 - Jul 18, 2025   |  31 days in Billing Cycle Page 4 of 5 \nTransactions (Continued) Trans Date Post Date Description Amount Jul 11 Jul 14 AMAZON MKTPL*NR8VO2OA0Amzn.com/billWA $41.76 Jul 12 Jul 12 AMAZON RETA* NR36J3ZS0WWW.AMAZON.COWA $14.84 Jul 14 Jul 15 BONITA OBSTETRICS AND GYN909-3922002CA $49.27 HIMANI SOOD #5453: Total Transactions $1,670.58 \nTotal Transactions for This Period $2,607.90 Fees Trans Date Post Date Description Amount Total Fees for This Period $0.00 Interest Charged Interest Charge on Purchases $0.00 Interest Charge on Cash Advances $0.00 Interest Charge on Other Balances $0.00 Total Interest for This Period $0.00 Totals Year-to-Date Total Fees charged $0.00 Total Interest charged $0.00 \nInterest Charge Calculation Your Annual Percentage Rate (APR) is the annual interest rate on your account. Type of Balance Annual Percentage Rate (APR) Balance Subject to Interest Rate Interest Charged Purchases 25.24% P $0.00 $0.00 Cash Advances 28.24% P $0.00 $0.00 Variable APRs:  If you have a letter code displayed next to any of the above APRs, this means they are variable APRs. They may increase or decrease based on one of the following indices (reported in The Wall Street Journal) as described below. Code next to your  APR(s) How do we calculate your APR(s)? When your APR(s) will change PL Prime Rate + margin 3 month LIBOR + margin The first day of the Billing Cycles that end in Jan., April, July and Oct. DF Prime Rate + margin 1 month LIBOR + margin The first day of each Billing Cycle \n--- End of Page ---\n     Venture X Card | Visa Infinite ending in 6346 Jun 18, 2025 - Jul 18, 2025   |  31 days in Billing Cycle Page 5 of 5 \n\n--- End of Page ---\n"

" \n \n863300\nMOHIT AGGARWAL\n \n15237 BROOKSTONE ST\n \nFONTANA  CA  92336-4051\nCostco Anywhere Visa\n®\n Card by Citi\nMOHIT AGGARWAL\nMember Since 2014  Account number ending in: 8633 \nBilling Period: \n 07/02/25-08/01/25\nBilling Inquiries and Customer Service\nPO Box 790046 ST. LOUIS, MO 63179-0046\n1-855-378-6467, (TTY: 711)\nwww.citicards.com\nAUGUST  STATEMENT\nMinimum payment due:\n \n$41.00\nNew balance as of 08/01/25:\n \n$910.51\nPayment due date:\n \n08/27/25\nLate Payment Warning:\nIf we do not receive your Minimum Payment by the\n \ndate listed above, you may have to pay a late fee of up to $41 and your APRs\n \nmay be increased up to the Penalty APR of 29.99%.\nFor information about credit counseling services, call 1-877-337-8187 (TTY: 711). \nAccount Summary\nPrevious balance\n \n$1,041.44\nPayments\n-\n$1,041.44\nCredits\n-\n$36.28\nPurchases\n \n+$946.79\nCash advances\n \n+$0.00\nFees\n \n+$0.00\nInterest\n \n+$0.00\nNew balance\n \n$910.51\nCredit Limit\nCredit Limit\n \n$7,000\nIncludes $1,400.00 cash advance limit\nAvailable Credit Limit\n \n$6,089\nIncludes $1,400\n \navailable for cash advance\nCostco Cash Back\n \nRewards Summary\nas of 08/01/25\n$179.26\n» See page 3 for more information\n \nabout your rewards\nFor Payments, send check to: Citi Cards, PO BOX 60734, City of Industry CA, 91716-0734\nCostco Anywhere Visa\n®\n Card\nPO Box 790057\n \nSaint Louis, MO 63179-0057\nYour Monthly Statement\n \nis Enclosed\nPay your bill from virtually anywhere with the\n \nCiti Mobile\n®\n App and Citi\n®\n Online\nTo download:\n \nText \n'App15'\n to \nMyCiti (692484)\n \nor go to your device's app store.\n \nOr visit \nwww.citicards.com\n \n   \nMinimum payment due\n \n$41.00\nNew balance\n \n$910.51\nPayment due date\n \n08/27/25\nAmount enclosed: \n$\nAccount number ending in 8633 \nPlease make check payable to Citi Cards.\nCiti Cards\n \nPO BOX 60734\n \nCity of Industry CA 91716-0734\n  \n--- End of Page ---\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n           \n           \n863300\nCARDHOLDER SUMMARY\nMOHIT AGGARWAL\n \nCard ending in 8633\nNew Charges\n \n$451.75\nHIMANI SOOD\n \nCard ending in 1325\nNew Charges\n \n$495.04\nACCOUNT SUMMARY\nSale\n \nDate\nPost\n \nDate\nDescription\nAmount\nPayments, Credits and Adjustments\n07/22\nELECTRONIC PAYMENT-THANK YOU\n-\n$1,041.44\n06/30\n07/02\nHEADWAY                  NEW YORK     NY\n-\n$10.00\n07/10\n07/10\nHEADWAY                  NEW YORK     NY\n-\n$10.00\n07/16\n07/16\nCOSTCO WHSE #0627        FONTANA      CA\n-\n$16.28\nMOHIT AGGARWAL \nStandard Purchases\n07/05\n07/05\nHEADWAY                  HEADWAY.CO   NY\n $10.00 \n07/06\n07/06\nCOSTCO WHSE #0678        RANCHO CUCAMOCA\n $312.41 \n07/12\n07/12\nTRADER JOE S #217        RANCHO CUCAMOCA\n $66.64 \n07/12\n07/12\nSQ *HANDEL'S RANCHO CUCAMRancho\n \nCucamoCA\n $13.50 \n07/13\n07/13\nHEADWAY                  HEADWAY.CO   NY\n $10.00 \n07/13\n07/13\nDESI BAZAAR CASH AND CAR RANCHO\n \nCUCAMOCA\n $1.99 \n07/13\n07/13\nHOMEGOODS 275            RCH CUCAMONGACA\n $18.31 \n07/14\n07/14\nALDI 79061               FONTANA      CA\n $8.91 \n07/14\n07/14\nRING STANDARD PLAN       RING.COM     CA\n $9.99 \nHIMANI SOOD \nStandard Purchases\n07/03\n07/03\nCOSTCO GAS #0686         MONTCLAIR    CA\n $42.49 \n07/08\n07/08\nCOSTCO WHSE #0627        FONTANA      CA\n $2.71 \n07/08\n07/08\nCOSTCO WHSE #0627        FONTANA      CA\n $145.98 \n07/14\n07/14\nCOSTCO GAS #0686         MONTCLAIR    CA\n $40.09 \n07/16\n07/16\nCOSTCO WHSE #0627        FONTANA      CA\n $186.02 \n07/17\n07/17\nUBER   *EATS             HELP.UBER.COMCA\n $35.31 \n07/24\n07/24\nCOSTCO GAS #0478         SAN BERNARDINCA\n $42.44 \nFees Charged\nTOTAL FEES FOR THIS PERIOD\n$0.00\nInterest Charged\nTOTAL INTEREST FOR THIS PERIOD\n$0.00\n2025 totals year-to-date\nTotal fees charged in 2025\n \n$0.00\nTotal interest charged in 2025\n \n$0.00\nCostco Cash Back\n \nRewards Summary\nTotal Costco\n \nCash Back Rewards Balance:\n$179.26\nCostco Cash Back Rewards Summary\nCostco Cash Back Rewards balance as of\n \nlast statement   .................................... +$158.15\nEarned this period .................................. +$21.11\nTotal Costco Cash Back Rewards Balance\n \nYear to Date :\n \n$179.26\nCostco Cash Back Rewards\n \nEarned This Period\n5% on gas at Costco\n............................. +$6.24\n4% on other eligible gas\n \nand EV charging....................................... +$0.00\n5% and 4% earn is on a combined $7,000\n \nspend per year, 1% thereafter\n3% on restaurants.................................... +$1.06\n3% on eligible travel ............................... +$0.00\n2% on Costco and Costco.com\n \npurchases ................................................. +$12.61\n1% on all other purchases....................... +$1.20\nTotal Earned:\n \n$21.11\n» Visit \nCiti.com/Costco\n \nfor more information\nPage 3 of 4\nwww.citicards.com\nCustomer Service\n \n1-855-378-6467\n(TTY: 711)\nMOHIT AGGARWAL\n--- End of Page ---\n"

In [6]:
type(parsed_text)

str

In [7]:
# --- 1. Imports ---
# Standard library and Pydantic
import os
import json
from typing import List, Optional
from pydantic import BaseModel, Field

# Core AutoGen components with explicit paths
from autogen_agentchat.agents import AssistantAgent, UserProxyAgent
from autogen_core.tools import Workbench

# --- 2. Define the Desired JSON Structure ---
# This defines the schema for each individual transaction.
class Transaction(BaseModel):
    cardholder: str = Field(description="The name of the cardholder who made the transaction.")
    sale_date: Optional[str] = Field(description="The date the transaction was made, e.g., '07/15'")
    post_date: str = Field(description="The date the transaction was posted to the account, e.g., '07/16'")
    description: str = Field(description="The description of the transaction")
    transaction_amount: float = Field(description="The amount. Debits (purchases) must be negative, credits (payments) must be positive.")

# This is the main structure for the entire statement.
class StatementData(BaseModel):
    bank_name: str = Field(description="The name of the bank or card issuer, e.g., 'Costco Anywhere Visa Card by Citi'")
    transactions: List[Transaction] = Field(description="A list of all transactions from the statement")

# --- 3. Create the Python Function (The 'Tool') ---
def parse_bank_statement(bank_name: str, transactions: List[dict]) -> str:
    """
    Parses raw bank statement data into a structured JSON format using Pydantic models.
    
    Args:
        bank_name: The name of the bank or card issuer.
        transactions: A list of dictionaries, each representing a transaction.
                      Each dictionary must contain all fields for the Transaction model.
                      
    Returns:
        A JSON string representation of the structured data.
    """
    statement = StatementData(
        bank_name=bank_name,
        transactions=[Transaction(**t) for t in transactions]
    )
    return statement.model_dump_json(indent=2)

# --- 4. Configure AutoGen Agents and Environment ---
# Ensure your OPENAI_API_KEY is set as an environment variable
config_list = [
    {
        "model": "gpt-4o", 
        "api_key": os.getenv("OPENAI_API_KEY2")
    }
]

# The Workbench manages the tools and execution environment.
workbench = Workbench(work_dir="coding", tools=[parse_bank_statement])

# The UserProxyAgent acts as the executor.
user_proxy = UserProxyAgent(
    name="UserProxy",
    human_input_mode="NEVER",
    max_consecutive_auto_reply=5,
    workbench=workbench,
)

# The AssistantAgent acts as the planner, deciding to use the tool.
parser_assistant = AssistantAgent(
    name="ParserAssistant",
    system_message="""You are an expert financial data extractor. Your task is to meticulously analyze the user's text and use the `parse_bank_statement` tool to convert it into a structured JSON format.

    CRITICAL INSTRUCTIONS:
    1.  **Cardholders:** The statement has multiple cardholders (e.g., MOHIT AGGARWAL, HIMANI SOOD). You MUST associate each transaction with the correct cardholder name it's listed under.
    2.  **Transaction Amounts:** You MUST correctly interpret the signs. 
        -   Purchases are DEBITS and must be represented as **NEGATIVE** numbers.
        -   Payments and Credits are CREDITS and must be represented as **POSITIVE** numbers. If a payment is shown as '-$1,041.44', its value is 1041.44.
    3.  **Data Focus:** Ignore all summary data, ads, addresses, and page markers. Focus ONLY on the detailed list of transactions.""",
    llm_config={"config_list": config_list},
    tools=[parse_bank_statement],
)

# --- 5. Your Input String is Loaded Here ---
pdf_text_string = parsed_text

# --- 6. Initiate the Conversation and Print the Final Output ---
user_proxy.initiate_chat(
    recipient=parser_assistant,
    message=f"Please parse the following bank statement text:\n\n{pdf_text_string}"
)

final_message = user_proxy.last_message(parser_assistant)
final_json_output = final_message.get("content", "")

print("\n--- Final Parsed JSON Output ---")
print(final_json_output)

TypeError: Workbench() takes no arguments

In [13]:
# Install (run once)

# ---------------------------
# Agent + Code-executor demo
# ---------------------------
import os, asyncio
from autogen_agentchat.agents import AssistantAgent, CodeExecutorAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.code_executors.local import LocalCommandLineCodeExecutor
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import TextMentionTermination, MaxMessageTermination
from autogen_agentchat.messages import TextMessage

async def run_parse_v0_7_2(statement_text: str):
    # 1) model client used by AssistantAgent
    model_client = OpenAIChatCompletionClient(
        model="gpt-4o", 
        api_key=os.getenv("OPENAI_API_KEY2")
    )

    # 2) assistant - this agent will generate parsing code
    assistant = AssistantAgent(
        name="assistant",
        model_client=model_client,
        system_message=(
            "You are an expert Python developer. Given a raw multi-line bank statement string, "
            "produce Python code inside triple-backtick ```python``` blocks that parses the statement "
            "into a JSON object with keys: account_info, transactions, rewards and also prints a pandas DataFrame "
            "for transactions. When done, print the JSON and then reply with the single word: TERMINATE"
        ),
        reflect_on_tool_use=True,
    )

    # 3) code executor component (local) — start it before using
    code_executor = LocalCommandLineCodeExecutor(work_dir="coding")
    await code_executor.start()

    # 4) code executor *agent* (do NOT pass llm_config)
    code_executor_agent = CodeExecutorAgent(
        name="code_executor",
        code_executor=code_executor,
        # optional: model_client=..., approval_func=..., supported_languages=...
        approval_func=None,
    )

    # 5) team: assistant -> code executor (round-robin). Terminate on 'TERMINATE' or after N messages
    termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(6)
    team = RoundRobinGroupChat(participants=[assistant, code_executor_agent], termination_condition=termination)

    # 6) the task: feed the raw statement and ask for JSON + DataFrame
    task_msg = TextMessage(
        content=(
            "Parse the bank statement below into JSON & a pandas DataFrame. "
            "Print the JSON first, then print the DataFrame. End your final response with TERMINATE.\n\n"
            f"{statement_text}"
        ),
        source="user",
    )

    # 7) run the team (synchronous top-level via asyncio)
    result = await team.run(task=task_msg)
    # final model text available as result.chat_message
    print("===== AGENT FINAL OUTPUT =====")
    print(result.chat_message.content)

    # cleanup
    await code_executor.stop()



In [14]:
# Usage: replace the string below with your extracted-statement string
YOUR_STATEMENT = parsed_text
asyncio.run(run_parse_v0_7_2(YOUR_STATEMENT))

RuntimeError: asyncio.run() cannot be called from a running event loop