In [1]:
import os
import requests

from openai import OpenAI
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

load_dotenv()

True

## Load the data from the PDF

In [19]:
# data_file = "./data/test_study_1.pdf"
data_file = "./data/test_study_3.pdf"

In [20]:
doc = PyPDFLoader(data_file)

In [21]:
loaded_doc = doc.load()

In [22]:
# Define the Text Splitter 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 800
)

#Create a split of the document using the text splitter
splits = text_splitter.split_documents(loaded_doc)

In [23]:
db = FAISS.from_documents(splits, OpenAIEmbeddings())

## Setup GPT-4

In [24]:
client = OpenAI()

def get_completion(message):

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": message
            }
        ],
        model="gpt-4o",
        temperature=0
    )

    print(chat_completion.choices[0].message.content)

## Search the document

In [25]:
query = "What are median lifespan and maximum lifespan in the treatment group vs the control group?"
docs = db.similarity_search(query, k=2)

prompt_context = f"{docs[0].page_content}\n\n{docs[1].page_content}"

In [26]:
prompt_template = """In the following you will find a study on study on longevity, separated by ```. 
Please extract both the median and the maximum lifespan increase or decrease of the treatment group vs. the control group. 
If you don't know the answer, then say that you don't know.

Return the result in json format, where "median_lifespan_change" and "maximum_lifespan_change" are both keys.

```
{prompt_context}
```
"""

In [27]:
get_completion(prompt_template.format(prompt_context=prompt_context))

```json
{
  "median_lifespan_change": "I don't know",
  "maximum_lifespan_change": "I don't know"
}
```
