In [2]:
from langchain_community.document_loaders import AmazonTextractPDFLoader
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.prompts import PromptTemplate
from langchain.llms.bedrock import Bedrock
from langchain.chains import LLMChain
import boto3
import botocore

config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 3}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

textract_client = boto3.client("textract", region_name="us-west-2")
textract_features=["LAYOUT"]
file_path = r"s3://my-s3-doc-loader/2667551.pdf"
loader = AmazonTextractPDFLoader(file_path,textract_features,client=textract_client)
docs = loader.load()

In [13]:
docs

[Document(page_content='Indian Customs EDI System - Imports V1.5R001\r\nV1.5R001\r\n# BILL OF ENTRY FOR HOME CONSUMPTION\r\n[Custom Stn: INMAA1 CHA : AGGPP7274BCH002 [SUPREME C & F SERVICES ] BE No/Dt./cc/Typ: 2667551/20/03/2024/N/H Importer Details :0798000066 PAN : AAACL2937JFT001 AD Code : 0510010 LIFE STYLE INTERNATIONAL PRIVATE LIMITED 14 : CHENNAI IMPORT WAREHOUSE NO.31/1, POOCHIATHIPET VILLAGE OFF REDHILLS, THIRUVALLUVAR RD REDHILLS CHENNAI 600052 Payment Method : Transaction\r\nIGM No : 2371888/19/03/2024 Cntry of Orgn : CHINA BL No : SZXCB24007583 Date : 08/03/2024 No. of Pkgs. : 628 CTN Marks:AS PER BL & Nos\r\nPort of Loading : Shekou Cntry of Consgn.: H/BL No : RSJ2402518 Date : 08/03/2024 Gross Wt. : 4807.900 KGS\r\nInv No & Dt. : IV2403-0231 07/03/2024 BERGNER HK LIMITED Inv Val : 31087.68 USD TOI: FOB 48/F LANGHAM PLACE OFFICE TOWER 8 Freight : 61975.00 INR ARGYLE ST MONG KOK Insurance : 0.001200% 0.00 HONG KONG SVB Load (Ass) : Cust. House: HK SVB Load (Dty) : HSS Load 

In [None]:
all_page_content = ""

for doc in docs:
    all_page_content += doc.page_content

print(all_page_content)

In [10]:
all_page_content

'Indian Customs EDI System - Imports V1.5R001\r\nV1.5R001\r\n# BILL OF ENTRY FOR HOME CONSUMPTION\r\n[Custom Stn: INMAA1 CHA : AGGPP7274BCH002 [SUPREME C & F SERVICES ] BE No/Dt./cc/Typ: 2667551/20/03/2024/N/H Importer Details :0798000066 PAN : AAACL2937JFT001 AD Code : 0510010 LIFE STYLE INTERNATIONAL PRIVATE LIMITED 14 : CHENNAI IMPORT WAREHOUSE NO.31/1, POOCHIATHIPET VILLAGE OFF REDHILLS, THIRUVALLUVAR RD REDHILLS CHENNAI 600052 Payment Method : Transaction\r\nIGM No : 2371888/19/03/2024 Cntry of Orgn : CHINA BL No : SZXCB24007583 Date : 08/03/2024 No. of Pkgs. : 628 CTN Marks:AS PER BL & Nos\r\nPort of Loading : Shekou Cntry of Consgn.: H/BL No : RSJ2402518 Date : 08/03/2024 Gross Wt. : 4807.900 KGS\r\nInv No & Dt. : IV2403-0231 07/03/2024 BERGNER HK LIMITED Inv Val : 31087.68 USD TOI: FOB 48/F LANGHAM PLACE OFFICE TOWER 8 Freight : 61975.00 INR ARGYLE ST MONG KOK Insurance : 0.001200% 0.00 HONG KONG SVB Load (Ass) : Cust. House: HK SVB Load (Dty) : HSS Load Rate: 0.00% Amount: 0.0

In [16]:
template = """
    Must convert datas into JSON format

    Instructions:
        - Do not skip any datas while converting into JSON
        - I need all the datas to be in JSON format
        - Avoid mismatching of the datas

    
{datas}
"""

In [17]:
qa_prompt = PromptTemplate(template=template, input_variables=["datas"])
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 40000})
llm_chain = LLMChain(prompt=qa_prompt, llm=llm, verbose= False)
result = llm_chain.run(datas= all_page_content)
print(result)

 Here is the JSON format of the given data:

```json
{
  "CustomsStation": "INMAA1",
  "CHA": "AGGPP7274BCH002",
  "CHAName": "SUPREME C & F SERVICES",
  "BENumber": "2667551",
  "BEDate": "20/03/2024",
  "BEType": "N/H",
  "ImporterDetails": {
    "ImporterCode": "0798000066",
    "PAN": "AAACL2937JFT001",
    "ADCode": "0510010"
  },
  "ImporterName": "LIFE STYLE INTERNATIONAL PRIVATE LIMITED", 
  "InvoiceDetails": {
    "Number": "IV2403-0231",
    "Date": "07/03/2024"
  },
  "ExporterName": "BERGNER HK LIMITED",
  "IGMNumber": "2371888",
  "IGMDate": "19/03/2024",
  "CountryOfOrigin": "CHINA",
  "BillOfLadingNumber": "SZXCB24007583",
  "BillOfLadingDate": "08/03/2024",
  "NumberOfPackages": 628,
  "ContainerMarks": "AS PER BL & Nos",
  "PortOfLoading": "Shekou",
  "CountryOfConsignment": "H/BL",
  "HouseBillOfLadingNumber": "RSJ2402518", 
  "HouseBillOfLadingDate": "08/03/2024",
  "GrossWeight": 4807.9,
  "InvoiceValueCurrency": "USD",
  "InvoiceValue": 31087.68,
  "TermsOfShipment