In [56]:
import os
from dotenv import load_dotenv
# loading variables from .env file
load_dotenv() # pass a path if it's not a .env in the current working directory 
 

# Define folder path
folder_path = "/Users/lavonda/Documents/RAG/fulldata"

# List all PDF files in the folder
pdf_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".pdf")]

# Print the list of PDF files to verify they are loaded correctly
print(f"Total PDFs found: {len(pdf_files)}")
print("First 5 PDF files (for verification):")
for file in pdf_files[:5]:  # Print only the first 5 files to avoid clutter
    print(file)

# Uncomment below line to print all files if needed
# print("\n".join(pdf_files))

Total PDFs found: 33
First 5 PDF files (for verification):
/Users/lavonda/Documents/RAG/fulldata/Aorta-follow-up.pdf
/Users/lavonda/Documents/RAG/fulldata/HCM.pdf
/Users/lavonda/Documents/RAG/fulldata/Tricuspid-Regurgitation-Focused-Imaging.pdf
/Users/lavonda/Documents/RAG/fulldata/Pericardiocentesis-follow-up.pdf
/Users/lavonda/Documents/RAG/fulldata/AV-Optimization.pdf


In [None]:
import nest_asyncio
import json
from llama_cloud_services import LlamaParse


# Required for async execution in Jupyter
nest_asyncio.apply()

# Initialize LlamaParse with JSON output
LLAMA_API_KEY = os.getenv("LLAMA_API_KEY")
parser = LlamaParse(
    api_key=LLAMA_API_KEY,  # Set API key
    result_type="json",  # Change to JSON format
    output_raw_json=True,  # Ensures JSON structure is preserved
    num_workers=4,  # Optimize for batch processing
    verbose=True,
    premium_mode=True,  
    language="en"  # Set language (default="en")
)

# Load all PDFs synchronously
json_data = parser.get_json_result(pdf_files)

# Save as JSON file
with open("output_data.json", "w", encoding="utf-8") as f:
    json.dump(json_data, f, indent=4)  # Pretty print JSON

print("Extracted JSON data saved to output_data.json")

# Print a preview of the first document's JSON
print("Extracted JSON preview:")
print(json.dumps(json_data[0], indent=4)[:500])  # Show first 500 characters

Parsing files:   0%|          | 0/33 [00:00<?, ?it/s]

Started parsing the file under job_id f78dc916-62fb-4dda-a3ac-5403543d988e
Started parsing the file under job_id b0c0f75c-0dff-47c1-a232-ab2489351c58
Started parsing the file under job_id fddd67a8-864d-4907-82a9-cd2939b99542
Started parsing the file under job_id 77fc9210-d85c-48d9-84cb-e75b7cd50006


KeyboardInterrupt: 

Parsing files:   6%|▌         | 2/33 [00:05<01:09,  2.25s/it]

Started parsing the file under job_id 76978a7b-035c-49eb-b11d-983454911029
Started parsing the file under job_id 9f487e91-0d6d-49ef-8423-be8a3d4fe617
Started parsing the file under job_id 44d09f49-282d-4140-ac53-2cde62b9ab46


Parsing files:  12%|█▏        | 4/33 [00:06<00:31,  1.08s/it]

Started parsing the file under job_id 0859514d-9cd7-4e55-b807-05cba25ae6f4


Parsing files:  18%|█▊        | 6/33 [00:09<00:35,  1.31s/it]

Started parsing the file under job_id e422e8a9-0bc2-492c-b996-0c55db02c226


Parsing files:  24%|██▍       | 8/33 [00:10<00:20,  1.21it/s]

Started parsing the file under job_id 1047613b-311a-4ba3-aa09-490ae99833a7
Started parsing the file under job_id fe1cad11-6965-4266-9a21-7ab2a8e5d437
Started parsing the file under job_id bfcb29ab-2b05-46e5-8de2-ead38c4183fa


Parsing files:  33%|███▎      | 11/33 [00:13<00:19,  1.16it/s]

Started parsing the file under job_id 40e96f24-6061-493c-acb4-55ab4291fb62
Started parsing the file under job_id dd8a70b3-d8fd-4447-aba5-65af60217ae0


Parsing files:  36%|███▋      | 12/33 [00:14<00:17,  1.19it/s]

Started parsing the file under job_id f5415416-c239-4025-9314-f4db7468d0dc
Started parsing the file under job_id 36d7c022-ec52-4440-85dd-7d873ea0f568


Parsing files:  45%|████▌     | 15/33 [00:18<00:15,  1.18it/s]

Started parsing the file under job_id d8d43d75-8c1b-4659-86a2-159361a39534
Started parsing the file under job_id a90bc7e6-9cc1-4c49-9bfc-c723bc117826
Started parsing the file under job_id 189c0161-db47-4935-bda9-f5ba181916f4


Parsing files:  48%|████▊     | 16/33 [00:18<00:12,  1.33it/s]

Started parsing the file under job_id adcba549-78ab-4c38-a854-9239379806a9


Parsing files:  52%|█████▏    | 17/33 [00:21<00:24,  1.51s/it]

Started parsing the file under job_id fe4f35a0-52f9-4261-a432-0ddfc7b307cd


Parsing files:  58%|█████▊    | 19/33 [00:23<00:16,  1.15s/it]

Started parsing the file under job_id 36984ea4-8d95-4c5c-a9c2-6c414c52fb0c
Started parsing the file under job_id 27e5803a-6f86-40ea-a748-a3e8c1546afd


Parsing files:  64%|██████▎   | 21/33 [00:25<00:14,  1.19s/it]

Started parsing the file under job_id 7f4493fa-0501-4e6c-8fa4-526bec03c8d7
Started parsing the file under job_id cb24e6d7-4bf8-410c-af9b-5c2e2ae8b2ef


Parsing files:  67%|██████▋   | 22/33 [00:27<00:12,  1.16s/it]

Started parsing the file under job_id 1b5052cb-0314-4341-b01d-27d5f83f52bf


Parsing files:  70%|██████▉   | 23/33 [00:27<00:10,  1.08s/it]

Started parsing the file under job_id c44078f2-b0d9-485f-a997-d90083172a3a


Parsing files:  73%|███████▎  | 24/33 [00:29<00:11,  1.23s/it]

Started parsing the file under job_id 543c4e4b-775d-47bc-9595-d460dfa31f21


Parsing files:  76%|███████▌  | 25/33 [00:30<00:08,  1.05s/it]

Started parsing the file under job_id 76d66050-c71c-4798-8c86-7541722df0bc


Parsing files:  79%|███████▉  | 26/33 [00:30<00:06,  1.00it/s]

Started parsing the file under job_id 7ead572c-75f9-41b3-a733-001bbfcc761f


Parsing files:  82%|████████▏ | 27/33 [00:32<00:06,  1.07s/it]

Started parsing the file under job_id 0f7cbb88-51cd-40fe-886b-91782b58bbf5


Parsing files:  85%|████████▍ | 28/33 [00:33<00:05,  1.11s/it]

Started parsing the file under job_id 6f64e5c6-6533-480f-bc73-0f8864fc7e62


Parsing files:  88%|████████▊ | 29/33 [00:34<00:04,  1.03s/it]

Started parsing the file under job_id 7ec7677a-8eac-4573-862d-b6ea30f213b8


Parsing files: 100%|██████████| 33/33 [00:38<00:00,  1.16s/it]


In [54]:
import json
from neo4j import GraphDatabase

# ✅ Use your Neo4j connection details
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USER = os.getenv('NEO4J_USER')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')


# ✅ Initialize the connection
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

# ✅ Load JSON Data
with open("output_data.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# ✅ Function to insert documents into Neo4j
def insert_data(tx, doc):
    query = """
    MERGE (d:Document {file_path: $file_path})
    SET d.num_pages = size($pages)
    WITH d
    UNWIND $pages AS page
    MERGE (p:Page {document: d.file_path, page_number: page.page})
    SET p.text = page.text, p.md = page.md
    MERGE (d)-[:HAS_PAGE]->(p)
    """
    tx.run(query, file_path=doc["file_path"], pages=doc["pages"])

# ✅ Insert Data into Neo4j
with driver.session() as session:
    for doc in data:
        session.execute_write(insert_data, doc)  # Use execute_write instead of write_transaction

print("✅ JSON data successfully imported into Neo4j!")

# ✅ Close connection
driver.close()


✅ JSON data successfully imported into Neo4j!


In [60]:
import openai

# 🔹 Your OpenAI API Key
openai.api_key = os.getenv('OPENAI_API_KEY')

# 🔹 Function to get an answer from GPT-4
def ask_gpt4(context, question):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are an AI assistant that answers questions based on the provided medical documents."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
        ],
        temperature=0.7
    )
    return response["choices"][0]["message"]["content"]

# 🔹 Example: Retrieve context from Neo4j and ask GPT-4
question = "What are the latest treatments for heart disease?"
search_results = search_documents("heart disease")

# 🔹 Combine relevant text into context
context_text = "\n\n".join([result["p.text"] for result in search_results])

# 🔹 Get an AI-generated answer
answer = ask_gpt4(context_text, question)

print("🤖 Answer:", answer)


  with driver.session() as session:


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
