In [None]:
# !pip install langchain-chroma

# !pip install sentence-transformers

# !pip install -qU langchain-mistralai

# ASTRA_DB_APPLICATION_TOKEN
# ASTRA_DB_API_ENDPOINT
# HF_TOKEN
# MISTRALAI_API_KEY
# GROQ_API_KEY

!pip install "langchain-astradb>=0.1.0"

import os
from langchain import hub
import bs4
from langchain.agents import AgentExecutor, create_react_agent
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_chroma import Chroma
from langchain_core.messages import HumanMessage
from langchain_core.messages.ai import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import Tool
from langchain_groq import ChatGroq
from langchain_mistralai import MistralAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_astradb import AstraDBVectorStore
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

embeddings = MistralAIEmbeddings(api_key=api_key)

os.getcwd()

loader = DirectoryLoader("/Users/pranayyb/Documents/AIML/GenAI/Agents", glob = "./*.txt", loader_cls= TextLoader)
document=loader.load()

document

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
text = text_splitter.split_documents(document)

text[170]

# chunk_embeddings = [embeddings.embed_query(chunk.page_content) for chunk in text]

!pip install langchain_huggingface

from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
chunk_embeddings=[embeddings.embed_query(chunk.page_content) for chunk in text]

len(chunk_embeddings)

documents=[]

for i, chunk in enumerate(chunk_embeddings):
    piece = {
        "text": text[i].page_content,
        "$vector": chunk
    }
    documents.append(piece)


documents

collection = AstraDBVectorStore(
    embedding=embeddings,
    collection_name="dateher2",
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    token=ASTRA_DB_APPLICATION_TOKEN,
)

collection

from langchain.schema import Document
import uuid
documents = [
    Document(id=str(uuid.uuid4()), page_content=doc["text"])
    for doc in documents
]

collection.add_documents(documents=documents)

documents

results = collection.similarity_search(query="How to update profile",k=1)

for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

llm = ChatGroq(model="mixtral-8x7b-32768")

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, just "
    "reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

retriever = collection.as_retriever()

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

# qa_system_prompt = (
#     "You are an assistant for question-answering tasks. Use "
#     "the following pieces of retrieved context to answer the "
#     "question. If you don't know the answer, just say that you "
#     "don't know. Use three sentences maximum and keep the answer "
#     "concise."
#     "\n\n"
#     "{context}"
# )

qa_system_prompt = (
    "You are a conversational assistant. Use the retrieved context and "
    "the conversation history to answer questions and personalize responses. "
    "If the user shares their name, remember it and use it appropriately."
    "\n\n"
    "{context}"
)


qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = ""
os.environ["LANGCHAIN_API_KEY"] = ""
os.environ["LANGCHAIN_PROJECT"] = "rag-agent"

react_docstore_prompt = hub.pull("hwchase17/react")

def search_wikipedia(query):
    from wikipedia import summary
    try:
        return summary(query, sentences=2) 
    except:
        return "I couldn't find any information on that."

tools = [
    Tool(
        name="Answer Question through RAG",
        func=lambda input, **kwargs: rag_chain.invoke(
            {
                "input": input,
                "chat_history": kwargs.get("chat_history", []),
            }
        ),
        description="useful for when you need to answer questions about the context",
    ),
    Tool(
        name="wikipedia",
        func=search_wikipedia,
        description="useful when you cannot find answers about the context",
    ),
]

agent = create_react_agent(
    llm=llm,
    tools=tools,
    prompt=react_docstore_prompt,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent, 
    tools=tools, 
    handle_parsing_errors=True, 
    verbose=True,
)

from langchain.schema import HumanMessage, AIMessage

chat_history = []
while True:
    query = input("You: ")
    if query.lower() == "exit":
        break
    response = agent_executor.invoke(
        {"input": query, "chat_history": chat_history})
    print(f"LoveGuru: {response['output']}")
    # print(response)
    chat_history.append(HumanMessage(content=query))
    chat_history.append(AIMessage(content=response["output"]))

