In [36]:
import streamlit as st
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from llm import llm
import os
from sentence_transformers import SentenceTransformer
import pinecone
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI


load_dotenv()

True

In [37]:
AZURE_OPENAI_API_KEY = os.environ.get('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get('AZURE_OPENAI_DEPLOYMENT_NAME')
AZURE_ENDPOINT = os.environ.get('AZURE_ENDPOINT')
API_VERSION = os.environ.get('API_VERSION_LLM')

llm = AzureChatOpenAI(
    azure_endpoint=AZURE_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
    api_version=API_VERSION,
    temperature=0,
    streaming=True
)

In [38]:
from langchain_core.prompts import ChatPromptTemplate

RAG_TEMPLATE = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

<context>
{context}
</context>

Answer the following question:

{question}"""

prompt_template = ChatPromptTemplate.from_template(RAG_TEMPLATE)

In [39]:
class ChatAgent:
    def __init__(self):
        self.history = StreamlitChatMessageHistory(key="chat_history")
        self.embedding_model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')  # Sentence Transformer for embeddings
        pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'))
        self.index = pinecone.Index('test-git')
        self.chain = self.setup_chain()

In [40]:
def setup_chain(self):
        """Set up a LangChain with message history."""
        return RunnableWithMessageHistory(
            prompt_template | llm,
            lambda session_id: self.history,
            input_messages_key="question",
            history_messages_key="history",
        )

In [41]:
def display_messages(self):
        """Display chat messages."""
        if len(self.history.messages) == 0:
            self.history.add_ai_message("How can I assist you?")
        for msg in self.history.messages:
            st.chat_message(msg.type).write(msg.content)


In [29]:
def format_retrieved_content_for_prompt(self, search_results):
        """Format the retrieved content for the prompt."""
        context = ""
        for match in search_results["results"][0]["matches"]:
            title = match["metadata"]["title"]
            url = match["metadata"]["url"]
            text = match["metadata"]["text"]
            context += f"Title: {title}\nURL: {url}\nText: {text}\n\n"
        return context


In [44]:
def retrieve_documents(self, question):
        """Retrieve documents from Pinecone using a question embedding."""
        query_embedding = self.embedding_model.encode(question).tolist()
        print(len(query_embedding))
        search_results = self.index.query(queries=[query_embedding], top_k=5, include_metadata=True)
        return search_results

In [31]:
def get_answer_from_llm(self, context, question):
        """Generate answer using the LLM with given context and question."""
        response = self.chain.invoke({"context": context, "question": question})
        return response.content

In [32]:
def start_conversation(self):
        """Start the chat conversation."""
        self.display_messages()
        user_question = st.chat_input(placeholder="Ask me anything!")
        if user_question:
            st.chat_message("human").write(user_question)

            # Retrieve relevant documents from Pinecone
            search_results = self.retrieve_documents(user_question)
            context = self.format_retrieved_content_for_prompt(search_results)
            
            # Get answer from LLM
            answer = self.get_answer_from_llm(context, user_question)

            # Display answer and sources
            st.chat_message("ai").write(answer)
            
            # Display sources separately with clickable links
            st.write("#### Sources:")
            for match in search_results["results"][0]["matches"]:
                title = match["metadata"]["title"]
                url = match["metadata"]["url"]
                st.write(f"**[{title}]({url})**")
