In [None]:
import pathlib
import glob
import shutil
from pdf2image import convert_from_path
from PIL import Image
import base64
import json
import boto3
import os
from langchain_core.runnables import RunnableLambda

# Initialize boto3 client for Bedrock (Replace 'your-region' with the actual region)
bedrock = boto3.client(
  service_name='bedrock-runtime',
  region_name='us-east-1'
)


In [None]:
def convert_pdf_to_images(pdf_path, output_dir):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Copy the PDF file to the output directory
    shutil.copy(pdf_path, output_dir)

    # Convert PDF to images
    images = convert_from_path(pdf_path, dpi=300)

    # List to store paths of the images
    image_paths = []

    # Save each page as an image
    for i, image in enumerate(images):
        output_path = os.path.join(output_dir, f'page_{i+1}.jpeg')
        image.save(output_path, 'JPEG')
        image_paths.append(output_path)

    return image_paths

pdf_files = []
directory = 'data'

# Iterate through all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".pdf"):
        pdf_files.append(os.path.join(directory, filename))
for pdf in pdf_files:
    pdf_path = pdf
output_dir = 'data'
image_paths = convert_pdf_to_images(pdf_path, output_dir)
print(f"PDF has been converted to images and saved in {output_dir}")
print("Image paths:", image_paths)


In [None]:
prompt = """Extract and Format Information from the given Image:
 
You are an AI trained to extract and format information from images containing structured data, such as restaurant bills, hotel invoices, shipping invoices, financial reports, or various other forms. Your goal is to convert this information into a well-organized Markdown format. Follow these guidelines:
 
1. Extract Text:
	- Extract all text from the image.
	- Preserve the textual structure, including headings, subheadings, and paragraphs.
2. Extract Tables:
	- Identify and extract tabular data.
	- Convert tables into Markdown table format.
	- Ensure headers and rows are clearly separated and correctly formatted.
	- For purchase receipts, represent items in five columns:  item, quantity (including appropriate units such as pcs, kgs, lbs, liters), price per unit, discount/sales if applicable and total price.
	- Should double check if the extracted table is meeting the above requirements.
3. Address Extraction:
	- Accurately extract and format any addresses present in the images.
4. Organize Information:
	- Start with a heading for the overall document.
	- Use subheadings for different sections (e.g., "Monthly Overview", "Holdings", "Dividend Payment Detail").
	- Present extracted text under appropriate subheadings.
	- Insert Markdown tables for tabular data under the relevant sections.
5. Formatting:
	- Use Markdown syntax for headings (#, ##, ###, etc.).
	- Use Markdown syntax for tables (pipe | characters and dashes --- for table borders).
	- Ensure the final output is clean and readable.
Letâ€™s work this out in a step-by-step way to be sure we have the right answer.
"""

In [None]:
image_files = []
directory = 'data'

# Iterate through all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".jpeg"):
        image_files.append(os.path.join(directory, filename))
    

In [None]:
def image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        image_b64_string = base64.b64encode(image_file.read()).decode("utf-8")
        return image_b64_string


In [None]:
def get_messages(image_b64_string):
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "<image>"
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": image_b64_string
                    }
                },
                {
                    "type": "text",
                    "text": "</image>"
                },
                {
                    "type": "text",
                    "text": prompt
                }
            ]
        }
    ]
    return messages


In [None]:
def get_body(messages):
    body = json.dumps({
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": 4000,
        "temperature": 0.1,
        "top_k": 250,
        "top_p": 0.999,
        "stop_sequences": ["\n\nHuman"],
        "messages": messages
    })
    return body


In [None]:
def get_response(body):
    # modelId = 'anthropic.claude-3-haiku-20240307-v1:0'
    modelId = 'anthropic.claude-3-sonnet-20240229-v1:0'
    contentType = 'application/json'
    accept = 'application/json'

    response = bedrock.invoke_model(
        modelId=modelId,
        contentType=contentType,
        accept=accept,
        body=body
    )
    response_body = json.loads(response.get('body').read())
    result_dict= {}
    result_dict['text']= response_body['content'][0]['text']
    result_dict['tokens']= response_body['usage']
    
    return result_dict


In [None]:
def main_func(args):
    image_path= args['image_path']
    image_b64 = image_to_base64(image_path)
    messages = get_messages(image_b64)
    body = get_body(messages)
    response = get_response(body)
    return response



In [None]:
chain = RunnableLambda(main_func) 

In [None]:
results= chain.invoke({"image_path": 'merrill_images\\page_22.jpeg'})

In [None]:

results= chain.batch([{"image_path": f"{each}"} for each in image_files])

# Individual Investment Account Monthly Statement

**ACCOUNT INFORMATION**
Regina P Smitheral  
Individual Investment Account - TOD*  
A1111111

**ACCOUNT HOLDER**
Regina P Smitheral
2 Jackson St
Shrubbery, PA 12345-4321

**Monthly Statement for July 1 - 31, 2021**

## I. Account Summary

## II. Holdings 

## III. Account Activity

**CONTACT WEALTHFRONT BROKERAGE LLC**
support@wealthfront.com
123.123.1234

**YOUR CUSTODIAN**
Wealthfront Brokerage LLC
261 Hamilton Ave
Palo Alto, CA 94301
Member SIPC

\* Transfer on death (TOD) means that the designated beneficiary(ies) will inherit this account if the current owner dies.<!-- Page 1 -->

---

# Wealthfront Individual Investment Account Report

## I. Account Summary

| Date       | Description     | Balance     |
|------------|-----------------|--------------|
| July 1, 2021 | Starting Balance | $10,473.55 |
| July 31, 2021 | Ending Balance   | $10,445.13 |

The provided image appears to be an account summary statement from Wealthfront for an individual investment account. The summary section shows the starting balance on July 1, 2021, as $10,473.55 and the ending balance on July 31, 2021, as $10,445.13.<!-- Page 2 -->

---

# Individual Investment Account Report

## Holdings as of July 31, 2021

### ETFs / STOCKS

| Security                             | Symbol | Shares | Share Price | Value     |
|--------------------------------------|--------|--------|-------------|------------|
| Schwab U.S. Broad Market ETF         | SCHB   | 35     | $106.1400   | $3,714.90 |
| Vanguard FTSE Developed Markets ETF  | VEA    | 59     | $51.7800    | $3,055.02 |
| Vanguard Dividend Appreciation       | VIG    | 6      | $159.7200   | $958.32   |
| Vanguard Tax-Exempt Bond ETF         | VTEB   | 13     | $55.5000    | $721.50   |
| Vanguard FTSE Emerging Markets ETF   | VWO    | 37     | $51.1100    | $1,891.07 |
| Total                                |        |        |             | $10,340.81|

### MONEY MARKET FUNDS

| Security                              | Symbol | Effective Annual Yield | Shares  | Share Price | Value   |
|----------------------------------------|--------|------------------------|---------|-------------|---------|
| RBC US Government Money Market Fund   | TIMXX  | 0.03%                  | 104.32  | $1.0000     | $104.32 |
| Total                                 |        |                        |         |             | $104.32 |

### CASH

(No cash holdings listed)

Page 3 of 10<!-- Page 3 -->

---

# Wealthfront Individual Investment Account Report

## Currency

| Currency | Value |
|-----------|-------|
| US Dollar | $0.00 |

## Total Holdings

| Total Holdings |
|-----------------|
|     $10,445.13 |

This report appears to be an investment account statement from Wealthfront for the period of July 1 - 31, 2021 for an individual named Regina P Smitheral. The only information provided is the account currency (US Dollar with a $0.00 balance) and the total holdings value of $10,445.13. No other details like account numbers, transactions, or holdings breakdown are visible in the given image excerpt.<!-- Page 4 -->

---

# Wealthfront Individual Investment Account Report

## III. Account Activity

### Deposits to Wealthfront Brokerage

| Date | Method | Status | Amount |
|------|--------|--------|--------|
| Total |        |        | $0.00  |

### Withdrawals from Wealthfront Brokerage  

| Date | Method | Status | Amount |
|------|--------|--------|--------|
| Total |        |        | $0.00  |

The image appears to be an excerpt from a financial report titled "Regina P Smitheral | Individual Investment Account | July 1 - 31, 2021" from Wealthfront. The section shown is "III. Account Activity", which contains two tables: one for "Deposits to Wealthfront Brokerage" and another for "Withdrawals from Wealthfront Brokerage". Both tables have columns for Date, Method, Status, and Amount. However, there are no specific transactions listed, and the Total rows show $0.00 for both deposits and withdrawals during the stated period.<!-- Page 5 -->

---

# Individual Investment Account Report

## TRADES

| Trade Date | Security                                | Symbol | Type | Shares | Share Price | Value |
|------------|------------------------------------------|--------|------|--------|-------------|-------|
| 7/1/2021   | RBC US Government Money Market Fund     | TIMMX  | Sell | 142.6  | $1.0000     | $142.60 |
| 7/2/2021   | RBC US Government Money Market Fund     | TIMMX  | Sell | 1.05   | $1.0000     | $1.05 |
| 7/6/2021   | RBC US Government Money Market Fund     | TIMMX  | Buy  | 0.01   | $1.0000     | $0.01 |
|            |                                          |        |      |        | (as of 6/30/2021) |    |
| 7/7/2021   | RBC US Government Money Market Fund     | TIMMX  | Buy  | 0.97   | $1.0000     | $0.97 |
| 7/19/2021  | Schwab Dow Jones U.S. Dividend 100 Index | SCHD  | Sell | 14     | $74.0800    | $1,037.12 |
| 7/19/2021  | Schwab International Equity ETF         | SCHF   | Sell | 76     | $38.3050    | $2,911.18 |
| 7/19/2021  | Vanguard FTSE Developed Markets ETF     | VEA    | Buy  | 59     | $50.0550    | $2,953.25 |
| 7/19/2021  | Vanguard Dividend Appreciation          | VIG    | Buy  | 6      | $154.9050   | $929.43 |
| 7/21/2021  | RBC US Government Money Market Fund     | TIMMX  | Buy  | 65.62  | $1.0000     | $65.62 |
| 7/22/2021  | iShares Core MSCI Emerging Markets ETF  | IEMG   | Sell | 30     | $65.0200    | $1,950.60 |
| 7/22/2021  | Vanguard FTSE Emerging Markets ETF      | VWO    | Buy  | 37     | $52.7150    | $1,950.46 |
| 7/26/2021  | RBC US Government Money Market Fund     | TIMMX  | Buy  | 0.14   | $1.0000     | $0.14 |

## DIVIDENDS

*No dividend information provided in the image.*<!-- Page 6 -->

---

# Individual Investment Account Report

## Holdings

| Date     | Type | Security                 | Symbol | Shares | Dividend Per Share | Price per Share | Taxable Value | Tax-Exempt Value | Total Value |
|----------|------|--------------------------|--------|--------|---------------------|-----------------|----------------|-------------------|-------------|
| 6/30/2021| Cash | RBC US Government Money Market Fund | TIMXX | -- | -- | -- | $0.01 | $0.00 | $0.01 |
| 7/7/2021 | Cash | Vanguard Tax-Exempt Bond ETF | VTEB | -- | -- | -- | $0.97 | $0.00 | $0.97 |
| Total |      |                          |        |        |                     |                 | $0.98          | $0.00             | $0.98       |

Note: Dividend income that is not subject to federal income tax, but may still be subject to state income tax or the Alternative Minimum Tax (AMT).<!-- Page 7 -->

---

# Wealthfront Individual Investment Account Statement

## Fees

| Charge Date | Description                           | Amount | Note                                                |
|-------------|----------------------------------------|--------|-----------------------------------------------------|
| 7/2/2021    | Wealthfront Advisory Fee For June 2021 | $1.05  | The fee on an annual basis is 0.25% of assets under management (excluding assets managed for free). |<!-- Page 8 -->

---

# Wealthfront Individual Investment Account Statement

## Disclosures

Investment advisory and investment management services are offered by Wealthfront Advisers LLC ("Wealthfront Advisers," the successor investment adviser to Wealthfront Inc.), an SEC-registered investment adviser. The Wealthfront Risk Parity Fund is managed by Wealthfront Strategies LLC ("Wealthfront Strategies," and formerly known as WFAS LLC), an SEC-registered investment adviser. Brokerage products and services are offered by Wealthfront Brokerage LLC ("Wealthfront Brokerage," and formerly known as Wealthfront Brokerage Corporation), member FINRA / SIPC. Wealthfront Strategies and Wealthfront Brokerage are both affiliates of Wealthfront Advisers. Wealthfront Brokerage holds your cash and securities in SEC-Clearing & Custody ("NSCC") NSCC / DTC, a Wealthfront Brokerage's clearing firm, which means NSCC processes all transactions for your account.

View Wealthfront Brokerage's most recent Statement of Financial Condition at
https://www.wealthfront.com/static/documents/WB_Financial_Statement.pdf. Please report any inaccuracies in your monthly statement to support@wealthfront.com. This statement is considered accurate if you do not report
inaccuracies within 10 business days after receipt of this statement. Any oral communications should be re-confirmed in writing to further protect your rights, including your rights under the Securities Investor Protection Act (SIPA).

### Money Market Fund
An investment in the Money Market Fund we offer is neither insured nor guaranteed by the Federal Deposit Insurance Corporation (FDIC) or any other government agency. There is no guarantee that the Money Market Fund will be
able to maintain its value. At any time, you can choose to liquidate your money market fund shares and have the proceeds either returned to your securities account or remitted to you.

### Tax Reporting
This monthly account statement should not be used for tax reporting purposes. Following each calendar year end, you will receive a Form 1099 for annual tax reporting (including taxable interest, dividends, capital gains, taxes
withheld, and sales proceeds). If you have a retirement account, contributions and distributions will be reported on Form 5498 or 1099-R respectively.

## Wealthfront Strategies
Wealthfront Strategies is the investment adviser to the Wealthfront Risk Parity Fund. Northern Lights Distributors, LLC ("NLD"), member FINRA and SIPC, serves as the principal distributor for the Fund. NLD is not an affiliate of
Wealthfront Strategies.

### Share Prices
The prices of securities have been obtained from quotation services and other independent sources if available. Prices are typically based on the closing price. If no price is available from a quotation service or other independent
source, the market price is shown as Not Available ("N/A"). The prices on your monthly statement are provided as general information, and we do not guarantee the accuracy of any security's price. Certain security types trade
differently than equities or may not trade on a liquid exchange. For additional information on bond holdings, please contact us at support@wealthfront.com.

### SIPC
Wealthfront Brokerage is a member of Securities Investor Protection Corporation (SIPC), which protects securities of its members up to $500,000 (including $250,000 for claims for cash) to the extent allowed by the Securities
Investor Protection Act of 1970. In the event Wealthfront Brokerage were to go out of business. Securities held at Wealthfront Brokerage are not FDIC-insured. Certain securities, such as GBTC and ETHE, may not be covered by
SIPC. An explanatory brochure is available on request at www.sipc.org or by contacting SIPC at 202-371-8300.

### Order Routing Details
Wealthfront Brokerage is happy to provide the identity of the firm to which your orders were routed over the previous six months upon request. Please send your request to support@wealthfront.com.

### Trade Confirmations (SEC Rule 10b-10)
All confirmations are generated on the trade date and are available for viewing on your Wealthfront Adviser's dashboard under "Taxes and Documents" on the following day. Money market fund purchases are included on your
monthly statements.<!-- Page 9 -->

---

# Wealthfront Individual Investment Account Statement

### Cash Balance
Your cash balance is the amount of funds you can withdraw. You have the right to receive any cash balance to which you are entitled, less any amounts you owe on any of your accounts (such as advisory fees or interest). Your cash balance is not segregated and may be used by Wealthfront Brokerage in its business, subject to the limitations of U.S. Securities Exchange Commission Rule 15c3-3. What this means is that although your cash balance is not segregated, a bank account for the exclusive benefit of Wealthfront Brokerage customers has a cash balance set aside to ensure that your funds are safe.

Page 10 of 10<!-- Page 10 -->

---

