# RAG for ColaborEJA

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from transformers import BitsAndBytesConfig
import torch
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

nb_4bit_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    "rhaymison/Mistral-portuguese-luana-7b",
    quantization_config=nb_4bit_config,
    device_map={"": 0}
)
tokenizer = AutoTokenizer.from_pretrained("rhaymison/Mistral-portuguese-luana-7b")
model.eval()


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [3]:
pipe = pipeline("text-generation", 
                model=model, 
                device_map='auto',
                tokenizer=tokenizer, 
                use_cache = True,
                do_sample=True,
                temperature=0.7,
                top_p=0.95,
                repetition_penalty = 1.2,
                max_new_tokens=2000)

llm = HuggingFacePipeline(pipeline=pipe)

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=pipe)


In [4]:
# Prompt template
PROMPT_TEMPLATE = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer
the question. If you don't know the answer, say that you
don't know. DON'T MAKE UP ANYTHING.
Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""


In [5]:
# Prompt template
PROMPT_TEMPLATE = """
You are a highly sophisticated artificial intelligence specialized in digital literacy,
with a focus on Youth and Adult Education, assisting teachers in creating lesson plans
that meet the needs of their students.
Use the contextual excerpts provided to answer the question and develop a lesson plan to assist the teacher.
Be respectful and pay attention to Brazilian Portuguese spelling.
If you do not know the answer, simply say that you do not know. DO NOT MAKE ANYTHING UP.
Use four sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""

In [6]:
def get_text_splitter():
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500,
                                               chunk_overlap = 500,
                                               length_function=len,
                                               separators=["\n\n", "\n", " "])
    return text_splitter

In [7]:
def get_embeddings_function():
    embeddings = HuggingFaceEmbeddings(
        model_name ="sentence-transformers/all-MiniLM-L6-v2"
    )
    return embeddings
embedding_function = get_embeddings_function()

  embeddings = HuggingFaceEmbeddings(


In [8]:
import uuid

def create_vectorstore(chunks, embedding_function, vectorstore_path):

    # Create a list of unique ids for each document based on the content
    ids = [str(uuid.uuid5(uuid.NAMESPACE_DNS, doc.page_content)) for doc in chunks]
    
    # Ensure that only unique docs with unique ids are kept
    unique_ids = set()
    unique_chunks = []
    
    unique_chunks = [] 
    for chunk, id in zip(chunks, ids):     
        if id not in unique_ids:       
            unique_ids.add(id)
            unique_chunks.append(chunk) 

    # Create a new Chroma database from the documents
    vectorstore = Chroma.from_documents(documents=unique_chunks, 
                                        ids=list(unique_ids),
                                        embedding=embedding_function, 
                                        persist_directory = vectorstore_path)

    vectorstore.persist()
    
    return vectorstore

In [9]:
from langchain.evaluation import load_evaluator

evaluator = load_evaluator(evaluator = "embedding_distance",
                           embeddings = embedding_function)
evaluator.evaluate_strings(prediction= "cat", reference="Animal")

{'score': 0.32553651341390677}

In [10]:
def embed_pdf_file(pdf_file):
    text_splitter = get_text_splitter()
    chunks = text_splitter.split_documents(load_pdf(pdf_file))
    # Create vectorstore
    vectorstore = create_vectorstore(chunks=chunks, 
                                    embedding_function=embedding_function, 
                                    vectorstore_path="vectorstore_diretivas")
    return vectorstore

In [11]:
# Create retriever and get relevant chunks
def get_retriver(question, vectorstore):
    retriever = vectorstore.as_retriever(search_type="similarity")
    relevant_chunks = retriever.invoke(question)
    return relevant_chunks

In [12]:
def generate(question, vectorstore):
    relevant_chunks = get_retriver(question = question, vectorstore = vectorstore)
    # Concatenate context text
    context_text = "\n\n---\n\n".join([doc.page_content for doc in relevant_chunks])

    # Create prompt
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, 
                                    question = question)
    response = llm.invoke(prompt)
    return response


In [None]:
pdfs_file = ["diretivas\REquestoes_prinpicais.pdf",
             "diretivas\questoes_alternativas.pdf",
             "diretivas\Exemplo de plano de aula.pdf"]

all_docs = []
for pdf in pdfs_file:
    loader = PyPDFLoader(pdf, extraction_mode="layout")
    pages = loader.load()
    all_docs.extend(pages)

# Criar o text splitter uma vez
text_splitter = get_text_splitter()

# Dividir os documentos em chunks
chunks = text_splitter.split_documents(all_docs)

# Criar o vectorstore persistente com todos os chunks
vectorstore = create_vectorstore(
    chunks=chunks,
    embedding_function=embedding_function,
    vectorstore_path="vectorstore_diretivas"
)

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
  vectorstore.persist()


In [17]:
answer = generate("""Na tabela 1, a pergunta 'Quais sensores são necessários para aprender o código de comunicação? 
O participante consegue usar esses sensores?'
                  esta relacionada a qual componente do MHP?
                    """,
                   vectorstore)

print(answer.split("Answer:")[-1])

 A pergunta está relacionada ao componente "Efficacia" no modelo de habilidades multifacetadas (MHP), pois implica a capacidade de uma parte de trabalhar de forma eficaz na tarefa. Os sensors citados em questões como "Qual sensor é necessário para aprender o código de comunicação?" ou "Quais informações de conscientização o participante deve armazenar?" são necessárias para desempenhar a função esperada dentro do modelo. Assim, a pergunta aborda diretamente o atributo de qualidade relevante e ajuda o participante a criar seu plano de aula.


# tentando uma outra abordagem
## Usando langgraph

In [None]:

from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

    # Define application steps
def retrieve(state: State):
    retrieved_docs = vectorstore.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt_template.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({"question": """Quais são as questões de acessibilidade que um jovem com autismo pode encontrar? 
                    Quais as possíveis soluções?
                    Faça um novo plano de aula baseado nas questões anteriores e usando o Plano de Aula como exemplo.
                    """})
print(response["answer"])

Human: 
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer
the question. If you don't know the answer, say that you
don't know. DON'T MAKE UP ANYTHING.
Use three sentences maximum and keep the answer concise.
Question: Quais são as questões de acessibilidade que um jovem com autismo pode encontrar? 
                    Quais as possíveis soluções?
                    Faça um novo plano de aula baseado nas questões anteriores e usando o Plano de Aula como exemplo.
                     
Context: Tabela 5: Alternativas a serem exploradas se um desafio de comunicação for identificado.
Questão Alternativas
Quais sensores são necessários para aprender o código de comunicação? 
O participante consegue usar esses sensores?- É possível tornar esse código disponível por meio de outros sensores?
- Existe algum conversor automático que traduz o material de aprendizado 
para outros sensores?
- Há pessoas disponíveis para fazer essa conversão?
