In [18]:
# imports

import os
import glob
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv

In [2]:
# imports for langchain
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_ollama.llms import OllamaLLM
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

MODEL_COMMAND_R = 'command-r'
MODEL_COMMAND_R7B = 'command-r7b'

In [4]:
db_name = "vector_db"

In [5]:
# Read in documents using LangChain's loaders
# Take everything in all the sub-folders of our knowledgebase

folders = glob.glob("RAG-docs/*") # In other tests I had more than one folder inside RAG-docs
text_loader_kwargs = {'encoding': 'utf-8'}

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

In [6]:
# Splitting using chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

In [7]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")

Document types found: q-and-a


In [8]:
# Load environment variables in a file called .env

load_dotenv('../.env',override=True)

False

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [10]:
# Check if a Chroma Datastore already exists - if so, delete the collection to start from scratch

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

In [11]:
# Create our Chroma vectorstore!

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 20 documents


In [12]:
# Define the LLM
llm = OllamaLLM(model=MODEL_COMMAND_R7B)

# Define memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Define the retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) 

# Define a custom system message
system_message = SystemMessagePromptTemplate.from_template(
    """
    Dê respostas precisas e com código quando for disponível nos documentos.
    Atenda de forma simpática e personalizada, como um bom atendente de clientes.
    Se não souber a resposta, diga isso. Não invente informações se não tiver contexto relevante.
    Você responde em português. Mesmo que a pergunta seja feita em inglês, você SEMPRE responde em português.
                
    Aqui está o contexto relevante para ajudar a responder: {context}
    """
)

# Define the human message template
human_message = HumanMessagePromptTemplate.from_template("{question}")

# Create a chat prompt template with both the system and human messages
custom_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Create the conversation chain with a custom prompt
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": custom_prompt}  # Pass the fixed prompt
)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [13]:
# set up a new conversation memory for the chat
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

# putting it together: set up the conversation chain with the LLM, the vector store and memory
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, 
                                                           retriever=retriever, 
                                                           memory=memory,
                                                          combine_docs_chain_kwargs={"prompt": custom_prompt}
                                                          )

In [14]:
def chat(question, history):
    result = conversation_chain.invoke({"question": question})
    return result["answer"]

In [17]:
# Launch Gradio:

view = gr.ChatInterface(chat, type="messages").launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.
