In [1]:
import requests
import base64
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI

In [2]:
# !pip install langchain
# !pip install openai

In [3]:
# OpenAI API Key
api_key = ""

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "data/bank_statement_example_2.JPG"

# Getting the base64 string
base64_image = encode_image(image_path)



In [4]:

# Set page layout to wide
# st.set_page_config(layout="wide")

def get_answer(image, text):
    try:
        headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
        }

        payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
            "role": "user",
            "content": [
                {
                "type": "text",
                "text": text
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{image}"
                }
                }
            ]
            }
        ],
        "max_tokens": 300
        }

        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

        # print(response.json())
        answer = response.json()['choices'][0]['message']['content']
        return answer

    except Exception as e:
        return str(e)


In [5]:
question = "Extract info from the image"
output = get_answer(base64_image, question)
print(f"Output: {output}")

Output: This image contains a bank withdrawal slip from Bank of Baroda, which is a bank operating in India. From the details visible:

- Account Number: 734 8493 1303 9312
- The amount in words: Thirty thousand only
- The amount in figures: ₹ 30,000/-
- The signature is present at the bottom right, but I cannot disclose the identity associated with the signature.
- The date appears to be written possibly as 10/4/24, which might refer to 10th April 2024, but this should be double-checked as the year part is not very clear.

Furthermore, the slip has instructions written in Hindi and alerts the person to pay cash. The withdrawal slip also has some pre-printed notes indicating the requirement for accompanying the passbook with this form.


In [6]:
extracted_text = output
form_template = f'''For the following text, extract the following information:
name: Extract the name of the account holder.
branch: Extract the name of the branch of the savings bank account.
account_number: Extract the account number from the image.
date: Extract the date of the transaction.
rupees: Extract the amount mentioned in the form in wordsa and figures.
text:  {extracted_text}'''
print(form_template)

prompt_template = ChatPromptTemplate.from_template(form_template)
messages = prompt_template.format_messages(text=extracted_text)
chat = ChatOpenAI(temperature=0.0, openai_api_key=api_key)
response = chat(messages)
print(response.content)
print(type(response.content))
# response.content.get('name') 
# - It will fail because the response is a string

For the following text, extract the following information:
name: Extract the name of the account holder.
branch: Extract the name of the branch of the savings bank account.
account_number: Extract the account number from the image.
date: Extract the date of the transaction.
rupees: Extract the amount mentioned in the form in wordsa and figures.
text:  This image contains a bank withdrawal slip from Bank of Baroda, which is a bank operating in India. From the details visible:

- Account Number: 734 8493 1303 9312
- The amount in words: Thirty thousand only
- The amount in figures: ₹ 30,000/-
- The signature is present at the bottom right, but I cannot disclose the identity associated with the signature.
- The date appears to be written possibly as 10/4/24, which might refer to 10th April 2024, but this should be double-checked as the year part is not very clear.

Furthermore, the slip has instructions written in Hindi and alerts the person to pay cash. The withdrawal slip also has some 

  warn_deprecated(
  warn_deprecated(


name: Not mentioned
branch: Bank of Baroda
account_number: 734 8493 1303 9312
date: Possibly 10th April 2024
rupees: Thirty thousand only, ₹ 30,000/-
<class 'str'>


In [7]:
# extracted_text = "extract text into this variable"
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain.prompts import ChatPromptTemplate
name_schema = ResponseSchema(name="person_name",description="Extract the name of the account holder.")
branch_schema = ResponseSchema(name="branch",
                             description="Extract the name of the branch of the savings bank account.")
account_number_schema = ResponseSchema(name="account_number",
                                      description="Extract the account number from the image.")
date_schema = ResponseSchema(name="date",
                                    description="Extract the date of the transaction.")
rupees_schema = ResponseSchema(name="rupees",
                                    description="Extract the amount mentioned in the form in wordsa and figures.")
response_schemas = [name_schema,
                    branch_schema,
                    account_number_schema,
                    date_schema,
                    rupees_schema,
                    ]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
# print(format_instructions)
form_template = '''\
For the following text, extract the following information:
person_name: Extract the name of the account holder.

branch: Extract the name of the branch of the savings bank account.

account_number: Extract the account number from the image.

date: Extract the date of the transaction.

rupees: Extract the amount mentioned in the form in wordsa and figures.

text: {text}

format_instructions: {format_instructions}
'''
# print(form_template)

prompt = ChatPromptTemplate.from_template(template=form_template)
# print(prompt)
messages = prompt.format_messages(text=extracted_text,
                                format_instructions=format_instructions)
# print(messages)
# print(messages[0].content)
response = chat(messages)
# print(response.content)
output_dict = output_parser.parse(response.content)
print(output_dict)
print(output_dict['person_name'])
print(type(output_dict))

{'person_name': 'Not available', 'branch': 'Bank of Baroda', 'account_number': '734 8493 1303 9312', 'date': '10/4/24', 'rupees': 'Thirty thousand only (₹ 30,000/-)'}
Not available
<class 'dict'>


In [8]:
print(output_dict['person_name'])


Not available
