In [1]:
import os
import time
import shutil
import json
from langchain.llms import Ollama
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate

FILEPATH = "./Image to Pdf Converter/output.pdf"
LOCAL_MODEL = "llama3.1"

# Set up the language model without any memory or retrieval chain
llm = Ollama(
    base_url="http://localhost:11434",
    model=LOCAL_MODEL,
    verbose=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

# Load PDF and extract text (assume one-page certificate)
loader = PyPDFLoader(FILEPATH)
data = loader.load()
pdf_text = "".join([doc.page_content for doc in data])

extract_template = """You are an AI assistant that extracts precise information from a death certificate.
Do not assume answers if the certificate does not explicitly provide them.
Check for every minute detail in the provided certificate.
Return the extracted details in a structured list with one detail per line.
Context:
{context}

Extracted Details:"""

extract_prompt = PromptTemplate(
    input_variables=["context"],
    template=extract_template,
)

# Use only the PDF text to extract details
extracted_data_str = llm.invoke(extract_prompt.format(context=pdf_text)).strip()

print("\nExtracted Information:\n")
print(extracted_data_str)

qa_template = """You are an AI assistant. Based solely on the following extracted details:
{extracted_details}

For each question, provide an answer exactly as it is stated in the extracted details. Do not infer or assume any details that are not explicitly mentioned.
If the detail is not explicitly stated, simply answer "None".

Question: {question}
Answer:"""

qa_prompt = PromptTemplate(
    input_variables=["extracted_details", "question"],
    template=qa_template,
)

# Simplified keys for JSON storage and updated Residential Address question
questions_mapping = {
    "Name": "What is the name of the dead person?",
    "Gender": "What is the gender of the dead person?",
    "Age": "What is the age of the dead person?",
    "Spouse Name": "What is the wife's/husband's name of the dead person?",
    "Mother's Name": "What is the mother's name of the dead person?",
    "Father's Name": "What is the father's name of the dead person?",
    "Date of Birth": "What is the date of birth of the dead person?",
    "Date of Death": "What is the date of death of the dead person?",
    "Address at Time of Death": "What is the address of the dead person at the time of death? Answer only if address at the time of death is explicitly mentioned becuase permanent address might not be same as address at the time of death in some cases; otherwise, answer 'None'.",
    "Residential Address": "What is the dead person's residential address? Answer only if a distinct permanent or residential address is explicitly mentioned; otherwise, answer 'None'.",
    "Cause of Death": "What is the cause of death?",
    "Place of Death": "What is the place of death?",
}

answers_dict = {}

# Ask each question using only the extracted details as context
for key, question in questions_mapping.items():
    response = llm.invoke(
        qa_prompt.format(extracted_details=extracted_data_str, question=question)
    ).strip()
    
    # Normalize responses that indicate missing data
    if not response or any(term in response.lower() for term in ["none", "not mentioned", "no information"]):
        answers_dict[key] = "None"
    else:
        answers_dict[key] = response

# Save extracted answers to JSON
with open("answers.json", "w") as json_file:
    json.dump(answers_dict, json_file, indent=4)

print("\nExtracted answers saved to answers.json")

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


Here are the extracted details from the death certificate in a structured list with one detail per line:

1. **Name:** Jaswinder Kaur
2. **Permanent Address:** H no. 246-A Model town extension, Ludhiana (Pb)
3. **Father's Name:** Ratan Singh
4. **Nationality:** Indian
5. **Sex:** Female
6. **Date of Death:** 14/12/2023
7. **Registration No.:** 65
8. **Place of Death:** Ludhiana
9. **Date of Registration:** 13/02/2024
Extracted Information:

Here are the extracted details from the death certificate in a structured list with one detail per line:

1. **Name:** Jaswinder Kaur
2. **Permanent Address:** H no. 246-A Model town extension, Ludhiana (Pb)
3. **Father's Name:** Ratan Singh
4. **Nationality:** Indian
5. **Sex:** Female
6. **Date of Death:** 14/12/2023
7. **Registration No.:** 65
8. **Place of Death:** Ludhiana
9. **Date of Registration:** 13/02/2024
Jaswinder KaurFemaleNoneNone.None.Ratan SinghNone.14/12/2023None.H no. 246-A Model town extension, Ludhiana (Pb)NoneLudhiana
Extracted