In [1]:
from langchain.prompts import PromptTemplate
from langchain.llms.bedrock import Bedrock
from langchain.chains import LLMChain
from langchain_community.document_loaders import PyPDFLoader
import boto3
import botocore

config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 3}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)
path = (r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\Jupiter\517792954-SBI-Card-Statement-0824-22-07-2021.pdf")
loader = PyPDFLoader(path)
docs = loader.load()



In [16]:
template = """
    Extract the text from the bank transcation statement and print the results in the JSON format only

    for example:(make below JSON format as the reference and print exactly like the below format)
        
    "user_id":
    "statement_id":
    "identity": 
    "bank":
    "credit_card_number":
    "name":
    "address":
    "payment_due_date":
    "total_dues":
    "min_amt_due":
    "finance_charges":
    "credit_limit":
    "avl_credit_limit":
    "cash_limit":
    "avl_cash_limit":
    "opening_balance":
    "payment_or_credits":
    "purchase_or_debits":
    "statement_date": 
    "points_earned" : 
    "total_points": 

        "transactions":
            
                "amount": 
                "transaction_type":
                "date": 
                "transaction_narration":
                "transaction_details": 
                "transaction_ref_number":

the above all the details are to be in JSON

        Description for the output format:
            
                statement_id: Unique statement id for each statement to be extracted
                user_id: Unique id for the user/name in the statement 
                identity: Dictionary of user details present in the statement
                bank: Name of the bank
                credit_card_number: Credit Card number
                name: Customer name
                address: Address of the customer
                payment_due_date: Due date of the credit card bill
                total_dues: Total Amount Due
                min_amt_due: Minimum Due Amount
                finance_charges: Amount of interest charged on the amount of money borrowed
                credit_limit: Credit Limit including cash
                avl_credit_limit: Available credit limit
                avl_cash_limit: Available cash limit
                opening_balance: Opening balance
                payment_or_credits: Total of the refunds and credits
                purchase_or_debits: Total of the purchase and debits
                statement_date: Date of the statement
                points_earned: Points earned in the statement cycle
                total_points: Total points earned till now
                transactions: List of transactions
                - amount: Transaction Amount
                - transaction_type: Type of transaction
                - transaction_narration: The transaction narration present in the statement
                - transaction_details: Any other details of the transaction (maybe extended to the next line and missed in the narration)
                - transaction_ref_number: Transaction reference number if available



so the i need to extract all the transcations and convert into JSON and print the accurate results for all the transcations for given document
        
        Instruction:
                - Must extract and convert into JSON for all the ttransactions datas, do not skip anything end to end transaction should be converted
                - Avoid mismatching of datas
{datas}
"""

In [17]:
qa_prompt = PromptTemplate(template=template, input_variables=["datas"])
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 40000})
llm_chain = LLMChain(prompt=qa_prompt, llm=llm, verbose= False)
result = llm_chain.run(datas= docs)
print(result)

 Here is the extracted text from the bank transaction statement converted to JSON format:

```json
{
  "user_id": "SUNIL KUMAR YADAV",
  "statement_id": "B21073266361",
  "identity": {
    "name": "SUNIL KUMAR YADAV"
  },
  "bank": "SBI", 
  "credit_card_number": "XXXX XXXX XXXX XX24",
  "name": "SUNIL KUMAR YADAV",
  "address": "UP/9/UTTAR PRADESH",
  "payment_due_date": "11 Aug 2021",
  "total_dues": "33,051.00",
  "min_amt_due": "5,717.00",
  "finance_charges": "2,327.62",
  "credit_limit": "2,00,000.00",
  "avl_credit_limit": "60,000.00", 
  "cash_limit": "60,000.00",
  "avl_cash_limit": "13,778.47",
  "opening_balance": "39,150.60",
  "payment_or_credits": "41,096.00",
  "purchase_or_debits": "13,396.71",
  "statement_date": "22 Jul 2021",
  "points_earned": "2706",
  "total_points": "5156",

  "transactions": [
    {
      "amount": "1,099.00",
      "transaction_type": "D",
      "date": "25 Jun 21",
      "transaction_narration": "FLIPKART PAYMENTS      GURGAON       IN",
     

In [None]:
template = """
    Extract the text from the bank transcation statement and print the results in the JSON format only

    for example:(make below JSON format as the reference and print exactly like the below format)
        
    "user_id":
    "statement_id":
    "identity": 
    "bank":
    "credit_card_number":
    "name":
    "address":
    "payment_due_date":
    "total_dues":
    "min_amt_due":
    "finance_charges":
    "credit_limit":
    "avl_credit_limit":
    "cash_limit":
    "avl_cash_limit":
    "opening_balance":
    "payment_or_credits":
    "purchase_or_debits":
    "statement_date": 
    "points_earned" : 
    "total_points": 

        "transactions":
            
                "amount": 
                "transaction_type":
                "date": 
                "transaction_narration":
                "transaction_details": 
                "transaction_ref_number":

the above all the details are to be in JSON

        Description for the output format:
            
                statement_id: Unique statement id for each statement to be extracted
                user_id: Unique id for the user/name in the statement 
                identity: Dictionary of user details present in the statement
                bank: Name of the bank
                credit_card_number: Credit Card number
                name: Customer name
                address: Address of the customer
                payment_due_date: Due date of the credit card bill
                total_dues: Total Amount Due
                min_amt_due: Minimum Due Amount
                finance_charges: Amount of interest charged on the amount of money borrowed
                credit_limit: Credit Limit including cash
                avl_credit_limit: Available Credit Limit
                avl_cash_limit: Available Cash Limit
                opening_balance: Opening balance
                payment_or_credits: Payments Reversals & other Credits
                purchase_or_debits: Purchases & Other Debits
                statement_date: Date of the statement
                points_earned: Points earned in the statement cycle
                total_points: Total points earned till now
                transactions: List of transactions
                - amount: Transaction Amount
                - transaction_type: Type of transaction
                - transaction_narration: The transaction narration present in the statement
                - transaction_details: Any other details of the transaction (maybe extended to the next line and missed in the narration)
                - transaction_ref_number: Transaction reference number if available



so the i need to extract all the transcations and convert into JSON and print the accurate results for all the transcations for given document
        
        Instruction:
                - Must extract and convert into JSON for all the ttransactions datas, do not skip anything end to end transaction should be converted
{datas}
"""