In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
persist_directory = 'all_docs_zotero/chroma/'
embedding = OpenAIEmbeddings()

vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [42]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.retrievers import ContextualCompressionRetriever
from langchain_core.messages import AIMessage, HumanMessage

In [4]:
metadata_field_info = [
    AttributeInfo(
        name="Publication Year",
        description="The year that the paper was published.",
        type="integer",
    ),
    AttributeInfo(
        name="Date Added",
        description="The year that the paper was added to the collection.",
        type="integer",
    ),
    AttributeInfo(
        name="Author",
        description="Authors of the paper, it could be couple of people.",
        type="string",
    ),
    AttributeInfo(
        name="Title", 
        description="Title of the paper that the paper is about.", 
        type="string",
    ),
]

In [30]:
document_content_description = "Brain Heart Interconnectome (BHI) research papers"
llm = ChatOpenAI(temperature=0)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    #enable_limit=True,
)

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter,
    base_retriever=retriever
)

In [36]:
query_transform_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Given the above conversation, generate a search query to look up \
            in order to get information relevant to the conversation. \
            Only respond with the query, nothing else.",
        ),
    ]
)

In [38]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableBranch

query_transforming_retriever_chain = RunnableBranch(
    (
        lambda x: len(x.get("messages", [])) == 1,
        # If only one message, then we just pass that message's content to retriever
        (lambda x: x["messages"][-1].content) | compression_retriever,
    ),
    # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
    query_transform_prompt | llm | StrOutputParser() | compression_retriever,
).with_config(run_name="chat_retriever_chain")

In [49]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

SYSTEM_TEMPLATE = """
Answer the user's questions based on the below context. 
If the context doesn't contain any relevant information \
to the question, don't make something up and just say "I don't know":

<context>
{context}
</context>
"""

question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            SYSTEM_TEMPLATE,
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

document_chain = create_stuff_documents_chain(llm, question_answering_prompt)

conversational_retrieval_chain = RunnablePassthrough.assign(
    context=query_transforming_retriever_chain,
).assign(
    answer=document_chain,
)

In [54]:
conversational_retrieval_chain.invoke(
    {
        "messages": [
            HumanMessage(content="What can you tell me about consort?"),
        ]
    }
)

{'messages': [HumanMessage(content='What can you tell me about consort?')],
 'context': [Document(page_content='able results.\n22We believe that the CONSO\nRT Statement, and its extensions, is a key tool throughwhich adequate reporting can be achieved. More jour-\nnals should endorse the CONSORT Statement, and,\nmost importantly, they should do more to ensure\nadherence. For example, journals could incorporate\nthe checklist and flow diagram into their review pro-\ncesses and indicate this requirement in their published\ninstructions to authors.\n28Without wide endorsement,\nthe CONSORT Statement cannot fully yield the ben-\nefits it was intended to produce.We are grateful to Sophie Moher and Sui Yan Wong for their help in\nreviewing articles and assessing journal instructions to authors.Contributors: SH was involved in the design, implementation, and\nanalysis of the study, and in writing, and commenting on, drafts on the\nfinal manuscript. SD, L-MY, A-WC, and DGA were involved in the

In [55]:
conversational_retrieval_chain.invoke(
    {
        "messages": [
            HumanMessage(content="What can you tell me about consort?"),
            AIMessage(
                content="CONSORT stands for Consolidated Standards of Reporting Trials. It is a key tool for achieving adequate reporting in research. Journals are encouraged to endorse the CONSORT Statement and ensure adherence by incorporating the checklist and flow diagram into their review processes and instructions to authors. Endorsement by journals can lead to better reporting of randomized controlled trials (RCTs)."
            ),
            HumanMessage(content="Tell me more!"),
        ]
    }
)

{'messages': [HumanMessage(content='What can you tell me about consort?'),
  AIMessage(content='CONSORT stands for Consolidated Standards of Reporting Trials. It is a key tool for achieving adequate reporting in research. Journals are encouraged to endorse the CONSORT Statement and ensure adherence by incorporating the checklist and flow diagram into their review processes and instructions to authors. Endorsement by journals can lead to better reporting of randomized controlled trials (RCTs).'),
  HumanMessage(content='Tell me more!')],
 'context': [Document(page_content='CONSORT statement for quality of reports on randomized controlled trial\nabstracts from four high-impact general medical journals. Trials 2012;\n13:77.\n5 Begg C, Cho M, Eastwood S, et al. Improving the quality of reporting of\nrandomized controlled trials: the CONSORT statement. JAMA 1996;\n276:637–639.6 Moher D, Hopewell S, Schulz KF, et al. CONSORT 2010 explanation and\nelaboration: updated guidelines for reporting