In [2]:
import os 

from langchain.chains import create_history_aware_retriever 
from langchain_core.prompts import MessagesPlaceholder

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory


  from tqdm.autonotebook import tqdm


In [6]:
PINECONE_INDEX_NAME = "quickstart"
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]

In [3]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", api_key=OPENAI_API_KEY)
llm = ChatOpenAI(model="gpt-3.5-turbo")

# pc = Pinecone(api_key=pinecone_api_key)

vectorstore = PineconeVectorStore.from_existing_index(
    PINECONE_INDEX_NAME, embeddings
)

retriever = vectorstore.as_retriever()


In [4]:
# Check
vectorstore.similarity_search("õppekavade arv Tartu ülikoolis")

[Document(page_content='<LINK>https://ut.ee/et/avaleht</LINK>Tartumaa EST 0 Sotsiaalteaduste valdkonna emeriitdotsendid Töötajate kontaktandmed Lossi 36 51003 Tartu linn , Tartu linn , Tartumaa EST 0 Meditsiiniteaduste valdkond Meditsiiniteaduste valdkonna dekanaat https : //meditsiiniteadused.ut.ee Töötajate kontaktandmed +372 737 5326 med @ ut.ee Ravila 19 50411 Tartu linn , Tartu linn , Tartumaa EST Bio- ja siirdemeditsiini instituut https : //biomeditsiin.ut.ee Töötajate kontaktandmed +372 737 4210 bsmi @ ut.ee Biomeedikum , Ravila 19 50411 Tartu linn , Tartu linn , Tartumaa EST Farmaatsia instituut https : //farmaatsia.ut.ee Töötajate kontaktandmed +372 737 5286 pharmacy @ ut.ee Nooruse 1 50411 Tartu linn , Tartu linn , Tartumaa EST Hambaarstiteaduse instituut https : //hambaarstiteadus.ut.ee Töötajate kontaktandmed +372 731 9856 L. Puusepa 1a 50406 Tartu linn , Tartu linn , Tartumaa EST Kliinilise meditsiini instituut https : //kliinilinemeditsiin.ut.ee Töötajate kontaktandmed +3

In [5]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is"
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [6]:
system_prompt = (
    "Context information is below.\n"
    "---------------------\n"
    "{context}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge,"
    "Please answer the question, please answer user question"
    #"Please answer the question: {query}\n"
    #"In language {language}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [7]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [8]:
conversational_rag_chain.invoke(
    {"input": "Who is Jaan Aru?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

'Jaan Aru is an associate professor at the Institute of Computer Science at the University of Tartu in Estonia. He studied psychology in Berlin, completed his PhD at the Max Planck Institute for Brain Research, and obtained a Marie Skłodowska-Curie fellowship for post-doctoral research with Matthew Larkum in Berlin. Jaan Aru has a background in AI, psychology, and neuroscience, and he is known for his research at the intersection of AI, deep learning, neuroscience, physics, and psychology. He has also published popular scientific books and received awards for science communication.'

In [9]:
from langchain_core.messages import AIMessage

for message in store["abc123"].messages:
    if isinstance(message, AIMessage):
        prefix = "AI"
    else:
        prefix = "User"

    print(f"{prefix}: {message.content}\n")

User: Who is Jaan Aru?

AI: Jaan Aru is an associate professor at the Institute of Computer Science at the University of Tartu in Estonia. He studied psychology in Berlin, completed his PhD at the Max Planck Institute for Brain Research, and obtained a Marie Skłodowska-Curie fellowship for post-doctoral research with Matthew Larkum in Berlin. Jaan Aru has a background in AI, psychology, and neuroscience, and he is known for his research at the intersection of AI, deep learning, neuroscience, physics, and psychology. He has also published popular scientific books and received awards for science communication.



In [60]:
conversational_rag_chain.invoke(
    {"input": "Which research group does he work at?"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)["answer"]



'Jaan Aru works at the Natural and Artificial Intelligence Lab at the University of Tartu.'

In [62]:
conversational_rag_chain.invoke(
    {"input": "What his email?"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)["answer"]


"Jaan Aru's email is jaan.aru@gmail.com."

In [27]:
import uuid
str(uuid.uuid4())


'51864a0f-ac94-4ebb-9a1b-cdc97e980dcc'

In [19]:
res = conversational_rag_chain.astream(
    {"input": "Which research group does he work at?"},
    config={
        "configurable": {"session_id": "abc123"}
    },
)


In [22]:

async for chunk in res:
    print(chunk)

In [10]:
model_name = "gpt-3.5-turbo"
embedding_model_name = "text-embedding-3-small"
pinecone_index_name = "quickstart"
store = {}
session_id = "abc"

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

llm = ChatOpenAI(model=model_name)
embeddings = OpenAIEmbeddings(model=embedding_model_name, api_key=OPENAI_API_KEY)
       
vectorstore = PineconeVectorStore.from_existing_index(
    pinecone_index_name, embeddings
    )

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
       
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is"
)
       
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
       
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)
       
system_prompt = (
    "Context information is below.\n"
    "---------------------\n"
    "{context}\n"
    "---------------------\n"
    "Given the context information and not prior knowledge,"
    "Please answer the question in the following language: {language}"
)
       
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
       
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

       
conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)
        
question = "Kes on Jaan Aru?"
language = "English" 

conversational_rag_chain.stream({"input": question, "language": language},
                                            config={"configurable": {"session_id": session_id
                                                                    }}, 
                                            )


<generator object RunnableBindingBase.stream at 0x00000184B1772840>

In [12]:
rag_chain.invoke({"input": question, "language": language},
                                            config={"configurable": {"session_id": session_id
                                                                    }})

KeyError: "Input to ChatPromptTemplate is missing variables {'chat_history'}.  Expected: ['chat_history', 'context', 'input', 'language'] Received: ['input', 'language', 'context']"