# Enterprise Knowledge Assistant (Local LLaMA + RAG)

In [None]:
!pip install langchain chromadb pypdf pymupdf pillow gradio numpy langchain-ollama langchain-community

In [None]:
import os
import numpy as np
import fitz  # PyMuPDF
from PIL import Image as PILImage

import gradio as gr

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

from langchain_ollama import ChatOllama, OllamaEmbeddings

from langchain_core.prompts import PromptTemplate
try:
    from langchain.chains import RetrievalQA
except ImportError:
    from langchain_classic.chains import RetrievalQA

In [None]:
# Modelo LLaMA local via Ollama
LLM_MODEL_NAME = "llama3:8b"  # ajusta se usares outro nome/modelo

llm = ChatOllama(
    model=LLM_MODEL_NAME,
    temperature=0.2
)

# Embeddings locais via Ollama
embeddings = OllamaEmbeddings(
    model=LLM_MODEL_NAME
)

print("‚úÖ LLM e embeddings locais configurados.")

In [None]:
BASE_DIR = "data"
DB_DIR = "db"

CATEGORIES = {
    "Informa√ß√µes Internas": "internas",
    "Pe√ßas / Material": "pecas",
    "Manuais": "manuais",
    "Outros": "outros"
}

os.makedirs(BASE_DIR, exist_ok=True)
os.makedirs(DB_DIR, exist_ok=True)

print("Categorias dispon√≠veis:", list(CATEGORIES.keys()))

In [None]:
def create_vector_db_for_category(category_name):
    folder_key = CATEGORIES[category_name]
    pdf_folder = os.path.join(BASE_DIR, folder_key)
    db_folder = os.path.join(DB_DIR, folder_key)

    os.makedirs(db_folder, exist_ok=True)

    if not os.path.isdir(pdf_folder):
        print(f"‚ö†Ô∏è Pasta n√£o encontrada para categoria '{category_name}': {pdf_folder}")
        return

    pdf_files = [f for f in os.listdir(pdf_folder) if f.lower().endswith(".pdf")]

    if not pdf_files:
        print(f"‚ö†Ô∏è Nenhum PDF encontrado em {pdf_folder}")
        return

    all_docs = []

    for pdf in pdf_files:
        path = os.path.join(pdf_folder, pdf)
        loader = PyPDFLoader(path)
        docs = loader.load()

        for d in docs:
            d.metadata["source"] = pdf
            d.metadata["category"] = category_name

        all_docs.extend(docs)

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separators=["\n\n", "\n", ".", "!", "?", ","]
    )

    chunks = splitter.split_documents(all_docs)

    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=db_folder
    )

    # vector_store.persist() # In newer versions of Chroma, persist is automatic or handled differently, but keeping for compatibility if old version used
    print(f"‚úÖ Base vetorial criada para categoria: {category_name} ({len(chunks)} chunks)")

In [None]:
for cat in CATEGORIES.keys():
    create_vector_db_for_category(cat)

print("‚úÖ Todas as bases vetoriais foram processadas (se havia PDFs).")

In [None]:
def load_vector_store(category_name):
    folder_key = CATEGORIES[category_name]
    db_folder = os.path.join(DB_DIR, folder_key)

    if not os.path.isdir(db_folder):
        raise ValueError(f"Base vetorial n√£o encontrada para categoria '{category_name}' em {db_folder}")

    vs = Chroma(
        embedding_function=embeddings,
        persist_directory=db_folder
    )
    return vs

In [None]:
REJECTION_PHRASE = "N√£o encontro essa informa√ß√£o nos documentos dispon√≠veis"

prompt_template = """
√âs um assistente inteligente especializado em gest√£o de conhecimento empresarial.

Regras:
- Responde APENAS com base na informa√ß√£o dos documentos fornecidos.
- Se a resposta n√£o estiver clara nos documentos, responde exatamente: "{rejection_phrase}".
- Usa linguagem profissional, corporativa e direta.
- Sempre que fizer sentido, organiza a resposta em pontos ou par√°grafos curtos.

Categoria selecionada: {category}

Contexto (excerto dos documentos internos):
{context}

Pergunta do utilizador:
{question}

Resposta:
"""

qa_prompt = PromptTemplate(
    input_variables=["context", "question", "category", "rejection_phrase"],
    template=prompt_template
)

In [None]:
def create_rag_chain_for_category(category_name):
    vector_store = load_vector_store(category_name)

    retriever = vector_store.as_retriever(
        search_kwargs={"k": 5}
    )

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={
            "prompt": qa_prompt.partial(
                category=category_name,
                rejection_phrase=REJECTION_PHRASE
            )
        }
    )
    return chain

In [None]:
def answer_question(category_name, user_question):
    if not user_question:
        return "Por favor, insira uma pergunta."

    try:
        qa_chain = create_rag_chain_for_category(category_name)
    except Exception as e:
        return f"Erro ao preparar o motor de pesquisa para a categoria '{category_name}': {e}"

    try:
        result = qa_chain.invoke({"query": user_question})
        answer_text = result["result"]
        source_docs = result.get("source_documents", [])

        # Acrescentar fontes no fim da resposta
        if source_docs:
            answer_text += "\n\nFontes consultadas:"
            unique_sources = set()
            for doc in source_docs:
                src = doc.metadata.get("source", "desconhecido")
                unique_sources.add(src)
            for src in unique_sources:
                answer_text += f"\n- {src}"

        return answer_text

    except Exception as e:
        return f"Ocorreu um erro ao consultar o modelo local: {e}"

In [None]:
def gradio_answer(category, question):
    return answer_question(category, question)

with gr.Blocks(title="ü§ñ Enterprise Knowledge Assistant") as demo:
    gr.Markdown("""
    # ü§ñ Enterprise Knowledge Assistant (Local Secure AI)
    Assistente inteligente para consulta de documenta√ß√£o interna.
    Seguran√ßa total: todas as respostas s√£o geradas localmente (LLaMA 3) sem envio de dados para a cloud.
    """)

    with gr.Row():
        category_input = gr.Dropdown(
            choices=list(CATEGORIES.keys()),
            value="Informa√ß√µes Internas",
            label="Categoria de documentos"
        )
        question_input = gr.Textbox(
            lines=4,
            label="Pergunta",
            placeholder="Ex.: Qual √© a pol√≠tica de f√©rias? / Como √© o procedimento de seguran√ßa X?"
        )

    submit_btn = gr.Button("Obter resposta", variant="primary")

    answer_output = gr.Textbox(
        lines=12,
        label="Resposta do assistente"
    )

    submit_btn.click(
        fn=gradio_answer,
        inputs=[category_input, question_input],
        outputs=[answer_output]
    )

demo.launch()