In [3]:
from langchain_community.document_loaders import AmazonTextractPDFLoader
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.prompts import PromptTemplate
from langchain.llms.bedrock import Bedrock
from langchain.chains import LLMChain
import boto3
import botocore

config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 3}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

textract_client = boto3.client("textract", region_name="us-west-2")
textract_features=["LAYOUT"]
file_path = "s3://my-s3-doc-loader/NEULAND 7668290[91].pdf"
loader = AmazonTextractPDFLoader(file_path,textract_features,client=textract_client)
docs = loader.load()

In [4]:
all_page_content = ""

for doc in docs:
    all_page_content += doc.page_content

In [21]:
template = """
    Must convert datas into JSON format
    Example format:
        I need exactly like this below format(Expected format)
        
            "Customs_Station": "",
                "CHA_Details": 
                    "CHA_Code": "", 
                    "CHA_Name": "",
                "BE_Details": 
                    "BE_Number": "", 
                    "BE_Date": "", 
                    "BE_Type": "",
                "Importer_Details": 
                    "Importer_ID": "", 
                    "PAN": "", 
                    "AD_Code": "", 
                    "Name": "", 
                    "Address": "", 
                    "Payment_Method": "",
                "IGM_Details": 
                    "IGM_No": "", 
                    "IGM_Date": "", 
                    "Port_of_Loading": "", 
                    "Country_of_Origin": "", 
                    "Country_of_Consignment": "", 
                    "MBL_Details": 
                        "MBL_No": "", 
                        "MBL_Date": ""

                    "HBL_Details": 
                        "HBL_No": "", 
                        "HBL_Date": ""
                        
                    "Pkgs_Details": 
                        "No_of_Packages": "", (print only the number of packages)
                        "Package_Type": "" (print only the package type, identify the package type in the 'No_of_Packages')

                    "Gross_weight": 
                        "Gross_Wt": "", 
                        "Units": ""

                    "Marks&Nos": ""(only need to be printed in this line ,above to the Invoice_Details, take data from 'Marks&Nos')
                "Invoice_Details": 
                    "Invoice_No": "", 
                    "Invoice_Date": "", 
                    "Supplier_Name": "", (identify and print the supplier name)
                    "Supplier_Address": "", (identify and print the supplier address)
                    "POL_Customs_House": "", (take data only from 'Cust. House')
                    "Incoterms": "", (take datas from 'TOI')
                    "Invoice_Amount": 
                        "Value": "", 
                        "Currency": "", 
                    "Freight_Amount": 
                        "Value": "", 
                        "Currency": "", 
                    "Insurance_Amount": 
                        "Value": "", 
                        "Currency": ""

                "USD to INR Exchange Value": "",(mention only the INR value)
                "Item_Details": 
                    "Item_No":, (must mention the item number like 1,2..., take from item details)
                    "RITC": "", (take datas only from 'RITC' contains only number)
                    "Description": "", (take datas only from 'Description' , print fully)
                    "Quantity": "", 
                    "Unit_Price": "", 
                    "CTH": "", 
                    "Customs_Duty_Rate": "", 
                    "BCD_amount": "", 
                    "Unit": "", (take datas from 'Unit' it consists like KGS)
                    "Assessable_Value": "", 
                    "CETH": "", (take datas from 'CETH')
                    "Excise Duty Rate": "",
                    "Countervailing Duty Amount": "", 
                    "Educational Cess on CVDs": 
                        "% Rate": "",
                        "Amount": "",
                    "Sec & Higher Edu. Cess on CVD": 
                        "% Rate": "", 
                        "Amount": "",
                    "Customs Educational Cess": 
                        "% Rate": "", 
                        "Amount": "",
                    "Customs Sec & Higher Edu. Cess": 
                        "% Rate": "", 
                        "Amount": "",
                    "Social Welfare Surcharge": 
                        "% Rate": "", 
                        "Amount": "",
                    "IGST": 
                        "% Rate": "", 
                        "Amount": "",
                    "GST Cess": 
                        "% Rate": "", 
                        "Amount": ""

                "Duties": 
                    "TOTAL ASSESSABLE VALUE": "",(take value from 'Ass Val' in 'Item Details')
                    "Inv. Gross Total": "", 
                    "BE Gross Total": "", 
                    "Total Basic Customs Duty": "", 
                    "NCD Duty": "", 
                    "ANTID": "", 
                    "SAFEGUARD DUTY": "", 
                    "CVD": "", 
                    "Sch 2 Spl Excise Duty": "", 
                    "Cess": "", 
                    "GSIA": "", 
                    "TTA": "", 
                    "Edu. Cess CVD": "", 
                    "Customs Edu. Cess": "", 
                    "Health CVD": "", 
                    "Addl Duty - (Imports)": "", 
                    "SHE. Cess CVD": "", 
                    "SH Cust Edu. Cess": "", 
                    "Total_Duty_Payable": ""

                "Container_Details": 
                    "Seal_No": "", (contains only numbers)
                    "FCL/LCL": "", (contains only letters)
                    "Container_No": ""

                "GSTIN_Details": 
                    "Document_No": "", 
                    "Document_Type": "", 
                    "State_Code": "", 
                    "State_Name": "", 
                    "IGST_Assessable_Value": "", 
                    "IGST_Amount": "", 
                    "GST_Cess_Amount": ""

                "Licence_Details": 
                    "Invoice_No": "", (print data from 'Inv' from 'Licence Details' for example, 1,2... etc)
                    "Item_No": "", 
                    "Licence_No": "", 
                    "Licence_Date": "",(take from Licence Dt)
                    "Reg_No": "", 
                    "Reg_Date": "", (take from Reg.No Dt)
                    "Debit_Value": "", 
                    "Debit_Duty": "", 
                    "Debit_Date": "", 
                    "Debit_Quantity": ""


    Instructions:
        - Do not skip any datas while converting into JSON
        - Understand and create a JSON accurately
        - Strictly provide the results in above format only ,ensure that not to change the Expected format
        - Strictly Avoid mismatching of the datas
        - Mention all the dates accurately
        - Provide results with the exact above format only

    
{datas}
"""

In [22]:
qa_prompt = PromptTemplate(template=template, input_variables=["datas"])
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 40000})
llm_chain = LLMChain(prompt=qa_prompt, llm=llm, verbose= False)
result = llm_chain.run(datas= all_page_content)
print(result)

 Here is the converted JSON format for the given data:

```json
{
    "Customs_Station": "INMAA1",
    "CHA_Details": {
        "CHA_Code": "AADFA8603LCH004",
        "CHA_Name": "ANCHORAGE SHIPPING"
    },
    "BE_Details": {
        "BE_Number": "7668290",
        "BE_Date": "04/09/2023",
        "BE_Type": "N/H"
    },
    "Importer_Details": {
        "Importer_ID": "0988005077",
        "PAN": "AAACN9531EFT001",
        "AD_Code": "0510019",
        "Name": "NEULAND LABORATORIES LTD.",
        "Address": "11th Floor (5th Office Level), Phoen\nPlot No. 573A-III, Road No. 82, Jubi\nHYDERABAD\n500033",
        "Payment_Method": "Transaction"
    },
    "IGM_Details": {
        "IGM_No": "2353740",
        "IGM_Date": "02/09/2023",
        "Port_of_Loading": "Shanghai",
        "Country_of_Origin": "CHINA", 
        "Country_of_Consignment": "",
        "MBL_Details": {
            "MBL_No": "HDMUSHAZ52494100",
            "MBL_Date": "18/08/2023"
        },
        "HBL_Details": {
 