#### Insurance Policy Extraction (Docx)

In [None]:
#%pip install openai
import pdfplumber
import openai
import json


In [7]:
# Load DOCX into plain text

def load_docx_text(path):
    doc = docx.Document(path)
    text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
    return text

In [8]:
# Run OPENAI GPT-5-mini
# ----------------------------------------------------
def extract_with_openai(policy_text):

    client = openai.OpenAI()

    prompt = f"""
Extract the following details from this insurance policy and return ONLY valid JSON:

- Policy holder name
- Policy number
- Start date
- End date
- Premium amount
- Coverage summary

Policy text:
\"\"\"{policy_text}\"\"\"
"""

    response = client.chat.completions.create(
        model="gpt-5-mini",
        messages=[{"role": "user", "content": prompt}]
    )

    raw_output = response.choices[0].message.content

    try:
        return json.loads(raw_output)
    except:
        try:
            start = raw_output.index("{")
            end = raw_output.rindex("}") + 1
            return json.loads(raw_output[start:end])
        except:
            return {"raw_output": raw_output}

In [9]:
# Run LOCAL LLAMA3 via OLLAMA
# ----------------------------------------------------
def extract_with_llama(policy_text):

    prompt = f"""
Extract the following details from this insurance policy and return ONLY valid JSON:

- Policy holder name
- Policy number
- Start date
- End date
- Premium amount
- Coverage summary

Policy text:
\"\"\"{policy_text}\"\"\"
"""

    result = subprocess.run(
        ["ollama", "run", "llama3"],
        input=prompt.encode("utf-8"),
        stdout=subprocess.PIPE
    )

    raw_output = result.stdout.decode("utf-8").strip()

    try:
        return json.loads(raw_output)
    except:
        # fallback if model outputs text + JSON
        try:
            start = raw_output.index("{")
            end = raw_output.rindex("}") + 1
            return json.loads(raw_output[start:end])
        except:
            return {"raw_output": raw_output}

In [10]:
# Save JSON easily
# ----------------------------------------------------
def save_json(filename, data):
    with open(filename, "w") as f:
        json.dump(data, f, indent=4)

In [12]:
def main():

    policy_path = "data/Insurance_Policy.docx"  # <-- change if needed

    print("Loading insurance policy...")
    text = load_docx_text(policy_path)

    print("\nRunning LLaMA3 extraction...")
    llama_data = extract_with_llama(text)
    save_json("policy_llama.json", llama_data)

    print("Saved: policy_llama.json")

    print("\nRunning OpenAI GPT-5-mini extraction...")
    openai_data = extract_with_openai(text)
    save_json("policy_openai.json", openai_data)

    print("Saved: policy_openai.json")

    print("\n--- Extraction Complete ---")
    print("Both JSON files are ready.")


if __name__ == "__main__":
    main()

Loading insurance policy...

Running LLaMA3 extraction...


[?2026h[?25l[1G⠙ [K[?25h[?2026l[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠴ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l[?2026h[?25l[1G⠧ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l[?2026h[?25l[1G⠏ [K[?25h[?2026l[?2026h[?25l[1G⠋ [K[?25h[?2026l[?2026h[?25l[1G⠙ [K[?25h[?2026l[?2026h[?25l[1G⠙ [K[?25h[?2026l[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠴ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l[?2026h[?25l[1G⠇ [K[?25h[?2026l[?2026h[?25l[1G⠋ [K[?25h[?2026l[?2026h[?25l[1G⠙ [K[?25h[?2026l[?2026h[?25l[1G⠹ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠸ [K[?25h[?2026l[?2026h[?25l[1G⠼ [K[?25h[?2026l[?2026h[?25l[1G⠴ [K[?25h[?2026l[?2026h[?25l[1G⠦ [K[?25h[?2026l

Saved: policy_llama.json

Running OpenAI GPT-5-mini extraction...
Saved: policy_openai.json

--- Extraction Complete ---
Both JSON files are ready.


In [None]:
##### Format of Date fields and Amount  --Differed in both
##### Summary -- matched
### Name, Policy number -- matched