In [None]:
from pathlib import Path
vectorstore_path = Path(__file__).parent / "vectorstore"
rel = vectorstore_path.relative_to(Path.cwd())
rel

In [29]:
pwd

'/Users/rlm/Desktop/Code/langchain-main/langchain/templates/rag-biomedical/rag_biomedical'

In [34]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
vectorstore = Chroma(persist_directory="./vectorstore", 
                     collection_name="rag-biomedical",
                     embedding_function=OpenAIEmbeddings())

In [35]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

# Provide context about the metadata
metadata_field_info = [
    
    AttributeInfo(
        name="NCTId",
        description="The unique identifier assigned to each clinical trial when registered on ClinicalTrials.gov. ",
        type="string",
    ),
    AttributeInfo(
        name="StudyType",
        description="The nature of the study, indicating whether participants receive specific interventions or are merely observed for specific outcomes.",
        type="string",
    ),
    AttributeInfo(
        name="PhaseList",
        description="This pertains to the phase of the study in drug trials.",
        type="string",
    )
]

# Overall context for the data
document_content_description = "Information about clinical trial on ClinicalTrials.gov"

# LLM
llm = OpenAI(temperature=0,)

# Retriever
retriever_self_query = SelfQueryRetriever.from_llm(
    llm, 
    vectorstore, 
    document_content_description, 
    metadata_field_info, 
    verbose=True
)

In [37]:
# retriever_self_query.get_relevant_documents("What is the focus of trial NCT00000102?")

In [38]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough

# RAG prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# LLM
model = ChatOpenAI()

# RAG chain
chain = (
    RunnableParallel({"context": retriever_self_query, "question": RunnablePassthrough()})
    | prompt
    | model
    | StrOutputParser()
)

In [39]:
chain.invoke("What is the focus of trial NCT00000102?")

'The focus of trial NCT00000102 is to evaluate the effects of extended release nifedipine (Procardia XL), a blood pressure medication, on the hypothalamic-pituitary-adrenal axis in patients with congenital adrenal hyperplasia. The trial aims to assess both acute and chronic effects of nifedipine and evaluate its potential to decrease the dosage of glucocorticoid medication needed to suppress the HPA axis in order to reduce the deleterious effects of glucocorticoid treatment in CAH.'