In [None]:
# google/gemma-7b-it # meta-llama/Meta-Llama-3-8B-Instruct # google/gemma-2-27b-it
#sudo apt-get update
#sudo apt-get install libmagic1
# pip install transformers==4.46.3
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# pip install bitsandbytes
# pip install accelerate
# pip install sentence-transformers
# pip install langchain
# pip install -U langchain-community
# pip install unstructured
# pip install "unstructured[pdf]"
# pip install langchain-huggingface

In [24]:
import torch
print("GPU disponível:", torch.cuda.is_available())
print("Nome da GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Nenhuma GPU detectada")

GPU disponível: True
Nome da GPU: NVIDIA GeForce RTX 3090


In [13]:
from langchain.vectorstores import FAISS
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np

# 1. Carregando documentos PDF
loader = DirectoryLoader("pdfs/", glob="**/*.pdf")
documents = loader.load()

# 2. Dividindo os documentos
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n", " ", ""]
)
docs = text_splitter.split_documents(documents)

# 3. Criando embeddings com HuggingFace
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/gtr-t5-large")

# Gerar embeddings para os documentos em formato NumPy
docs_texts = [doc.page_content for doc in docs]
docs_embeddings = np.array([embeddings.embed_query(text) for text in docs_texts]).astype('float32')

# 4. Criar índice FAISS manualmente
import faiss
dimension = docs_embeddings.shape[1]  # Dimensão das embeddings
faiss_index = faiss.IndexFlatL2(dimension)  # Usar L2 (distância euclidiana)
faiss_index.add(docs_embeddings)  # Adicionar as embeddings ao índice

# 5. Consulta
query_text = "How can I clean the equipment?"
query_embedding = np.array(embeddings.embed_query(query_text)).astype('float32')  # Garantir formato correto

# 6. Buscar as k embeddings mais próximas
k = 5
distances, indices = faiss_index.search(query_embedding.reshape(1, -1), k)

# 7. Recuperar os textos correspondentes e exibir os resultados
print("Top 5 textos com maior similaridade por embeddings:")
for i, (dist, idx) in enumerate(zip(distances[0], indices[0]), start=1):
    if idx == -1:  # Caso não haja mais resultados
        continue
    print(f"{i}. Texto: {docs_texts[idx][:100]}...")  # Mostrar os primeiros 100 caracteres
    print(f"   Distância: {dist:.4f}")

# 8. Concatenar os textos recuperados em um único contexto
top_texts = [docs_texts[idx] for idx in indices[0] if idx != -1]
context = "<Next> \n".join(top_texts)

# Salvar o contexto concatenado em um arquivo (opcional)
with open("context_rag.txt", "w", encoding="utf-8") as file:
    file.write(context)

print(f"\nContexto concatenado gerado com {len(context)} caracteres.")
print(context)  # Mostra os primeiros 500 caracteres para validação



Top 5 textos com maior similaridade por embeddings:
1. Texto: Please take this product to the collection point for recycling of electrical and electronic equipmen...
   Distância: 0.6202
2. Texto: Cleaning and maintenance (stand)

Please use a clean dry cloth for cleaning and wiping.

Disclaimers...
   Distância: 0.7689
3. Texto: relocate the receiving antenna. —Increase the separation between the equipment and receiver. —Connec...
   Distância: 0.7915
4. Texto: This equipment has been tested and found to comply with the limits for a Class B digital device, pur...
   Distância: 0.7953
5. Texto: turned on.

This product must use a socket with a rated power supply of 110V~240V and a rated curren...
   Distância: 0.8415

Contexto concatenado gerado com 4238 caracteres.
Please take this product to the collection point for recycling of electrical and electronic equipment in accordance with local regulations. Do not dispose of this product together with normal household waste. By recycling w

In [10]:
context

"Please take this product to the collection point for recycling of electrical and electronic equipment in accordance with local regulations. Do not dispose of this product together with normal household waste. By recycling waste equipment, you can make an important contribution to environmental protection.\n\n24\n\nCleaning and maintenance (camera)\n\nPlease use a clean dry cloth for cleaning and wiping.\n\nDo not use strong solvents such as thinners, benzene-containing solvents or other chemical solvents next to or on this\n\nproduct.\n\nThis product only features splash-proof water protection. Please do not expose the product to water splashes, showers, or\n\nany contact with sea water, salt water, chlorinated water or beverages and other liquids. Improper use will void your\n\nwarranty.\n\nBefore cleaning your product, please ensure power is turned off, and the power cord from the AC adapter is disconnected.\n\nCleaning and maintenance (stand)\n\nPlease use a clean dry cloth for cle

In [73]:
query_text

'How can I clean the equipment?'

In [74]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
#bnb.config.bnb_4bit_compute_dtype = torch.float16

# Configuração de quantização
quantization_config = BitsAndBytesConfig(load_in_4bit=True)

# Diretório onde o modelo será salvo/carregado
local_cache_dir = "/models"
# Carregar o tokenizer e o modelo com o diretório especificado
tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it") # , cache_dir=local_cache_dir
model_llm = AutoModelForCausalLM.from_pretrained(
    "google/gemma-7b-it",
    quantization_config=quantization_config) # cache_dir=local_cache_dir

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


In [1]:
import time
import torch

# Escolha o dispositivo (CPU ou GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Entrada de exemplo
chat = [
    { "role": "user", "content": f"Based on the instructions manual of the equipment: <context>{context}</contex>, look for the answer for the question:<question>{query_text}</question>" },
]

# Medir o tempo de execução
start_time = time.time()

# Crie o prompt
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")

# Mova os inputs para o dispositivo apropriado
inputs = inputs.to(device)
model_llm = model_llm.to(device)  # Certifique-se de que o modelo também está no dispositivo correto

# Geração de texto
outputs = model_llm.generate(input_ids=inputs, max_new_tokens=150)

# Exibir o resultado
print(tokenizer.decode(outputs[0]))

end_time = time.time()
print(f"Tempo de execução: {end_time - start_time:.2f} segundos")



NameError: name 'context' is not defined

In [76]:
query_text

'How can I clean the equipment?'

In [77]:
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import plotly.io as pio

# Configurar o renderer
pio.renderers.default = "iframe"

# 1. Garantir que as embeddings dos documentos e da query estejam no formato correto
docs_embeddings_np = docs_embeddings  # Renomear para manter consistência
query_embedding_np = query_embedding.reshape(1, -1)  # Garantir formato correto

# 2. Combinar as embeddings dos documentos e a embedding da query
all_embeddings = np.vstack([docs_embeddings_np, query_embedding_np]).astype('float32')

# 3. Aplicar t-SNE para reduzir para 2 dimensões
tsne = TSNE(n_components=2, random_state=42, perplexity=10, n_iter=500)
embeddings_2d = tsne.fit_transform(all_embeddings)

# 4. Criar lista de textos (documentos + query)
texts = [doc.page_content for doc in docs]  # Documentos
query_text_wrapped = "QUERY: " + query_text  # Texto específico para a query
texts.append(query_text_wrapped)  # Garantir que a query seja a última entrada

# 5. Criar DataFrame para visualização
data = pd.DataFrame({
    "x": embeddings_2d[:, 0],  # Primeiro componente t-SNE
    "y": embeddings_2d[:, 1],  # Segundo componente t-SNE
    "Text": texts,             # Textos dos documentos e da query
    "Type": ["Document"] * len(docs) + ["Query"]  # Tipo: Document ou Query
})

# 6. Formatar os tooltips para limitar a largura
def format_tooltip(text, max_length=50):
    """Quebra o texto em múltiplas linhas com um limite de caracteres."""
    return '<br>'.join([text[i:i+max_length] for i in range(0, len(text), max_length)])

data["FormattedText"] = data["Text"].apply(lambda x: format_tooltip(x))

# 7. Criar gráfico interativo com Plotly
fig = px.scatter(
    data,
    x="x",
    y="y",
    color="Type",
    hover_data={"FormattedText": True, "x": False, "y": False},  # Tooltips formatados
    title="t-SNE dos Documentos e Query Embeddings",
    labels={"x": "t-SNE Component 1", "y": "t-SNE Component 2"}
)

# Melhorar a aparência dos pontos
fig.update_traces(marker=dict(size=10, opacity=0.8, line=dict(width=1, color='DarkSlateGrey')))

# 8. Salvar como HTML
fig.write_html("tsne_plot_tooltip.html")
print("Gráfico salvo como 'tsne_plot_tooltip.html'. Abra no navegador para visualizar.")




Gráfico salvo como 'tsne_plot_tooltip.html'. Abra no navegador para visualizar.



'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.

