In [None]:
# google/gemma-7b-it # meta-llama/Meta-Llama-3-8B-Instruct # google/gemma-2-27b-it
#sudo apt-get update
#sudo apt-get install libmagic1
# pip install -r requirements.txt
# pip install transformers==4.46.3
# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# pip install bitsandbytes
# pip install accelerate
# pip install sentence-transformers
# pip install langchain
# pip install -U langchain-community
# pip install unstructured
# pip install "unstructured[pdf]"
# pip install langchain-huggingface

In [1]:
import torch
print("GPU disponível:", torch.cuda.is_available())
print("Nome da GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Nenhuma GPU detectada")

GPU disponível: True
Nome da GPU: NVIDIA GeForce RTX 3090


In [2]:
create_embeddings = False  # Defina para False se quiser carregar do disco

# Caminho para salvar o índice e as embeddings
index_path = "faiss_index.bin"
embeddings_path = "embeddings.npy"
texts_path = "texts.npy"



In [3]:
from langchain.vectorstores import FAISS
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
import faiss

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/gtr-t5-large")

def create_and_save_embeddings():
    # Carregando documentos PDF
    loader = DirectoryLoader("pdfs/", glob="**/*.pdf")
    documents = loader.load()

    # Dividindo os documentos
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separators=["\n", " ", ""]
    )
    docs = text_splitter.split_documents(documents)

    # Criando embeddings com HuggingFace
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/gtr-t5-large")

    # Gerar embeddings para os documentos em formato NumPy
    docs_texts = [doc.page_content for doc in docs]
    docs_embeddings = np.array([embeddings.embed_query(text) for text in docs_texts]).astype('float32')

    # Salvar embeddings e textos em disco
    np.save(embeddings_path, docs_embeddings)
    np.save(texts_path, docs_texts)

    # Criar índice FAISS
    dimension = docs_embeddings.shape[1]  # Dimensão das embeddings
    faiss_index = faiss.IndexFlatL2(dimension)  # Usar L2 (distância euclidiana)
    faiss_index.add(docs_embeddings)  # Adicionar as embeddings ao índice

    # Salvar o índice FAISS em disco
    faiss.write_index(faiss_index, index_path)

    return docs_embeddings, docs_texts, faiss_index

def load_embeddings_and_index():
    # Carregar embeddings e textos do disco
    docs_embeddings = np.load(embeddings_path).astype('float32')
    docs_texts = np.load(texts_path, allow_pickle=True)

    # Carregar índice FAISS do disco
    faiss_index = faiss.read_index(index_path)
    return docs_embeddings, docs_texts, faiss_index

if create_embeddings:
    docs_embeddings, docs_texts, faiss_index = create_and_save_embeddings()
else:
    docs_embeddings, docs_texts, faiss_index = load_embeddings_and_index()




  from .autonotebook import tqdm as notebook_tqdm
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]


In [9]:
import transformers
import torch

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

Downloading shards: 100%|██████████| 4/4 [06:23<00:00, 95.77s/it] 
Loading checkpoint shards: 100%|██████████| 4/4 [00:01<00:00,  2.12it/s]
Some parameters are on the meta device because they were offloaded to the cpu.


In [92]:
import numpy as np
import torch
import re


# Configurar dispositivo (CPU ou GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Inicializar histórico global da conversa
conversation_history = []

# Função para buscar contexto no RAG
def fetch_context_from_rag(query_texts, faiss_index, embeddings, docs_texts, k=3):
    """
    Busca o contexto mais relevante usando FAISS e embeddings para uma lista de textos de consulta.

    Args:
    - query_texts (list): Lista de perguntas/textos de consulta do usuário.
    - faiss_index: Índice FAISS.
    - embeddings: Modelo de embeddings.
    - docs_texts (list): Textos associados ao índice FAISS.
    - k (int): Número de textos mais similares a recuperar para cada consulta.

    Returns:
    - str: Contexto concatenado com todos os textos relevantes.
    """
    all_top_texts = []  # Lista para armazenar todos os textos recuperados

    # Iterar sobre cada consulta na lista
    for query_text in query_texts:
        # Calcular as embeddings da consulta
        query_embedding = np.array(embeddings.embed_query(query_text)).astype('float32')
        
        # Buscar as k embeddings mais próximas no FAISS
        distances, indices = faiss_index.search(query_embedding.reshape(1, -1), k)
        
        # Recuperar os textos correspondentes e adicionar à lista
        top_texts = [docs_texts[idx] for idx in indices[0] if idx != -1]
        all_top_texts.extend(top_texts)
    
    # Concatenar todos os textos recuperados com o delimitador "<Next>"
    context = "<Next> \n".join(all_top_texts)
    
    return context




# Inicializar o histórico global fora da função
conversation_history = []

# Função principal para interação com o modelo
def interact_with_model(query_text, faiss_index, embeddings, docs_texts, pipeline):
    """
    Interage com o modelo LLM utilizando o RAG para buscar contexto e mantém o histórico.

    Args:
    - query_text (str): Pergunta do usuário.
    - faiss_index: Índice FAISS.
    - embeddings: Modelo de embeddings.
    - docs_texts (list): Textos associados ao índice FAISS.
    - tokenizer: Tokenizador do modelo LLM.
    - model_llm: Modelo de linguagem LLM.

    Returns:
    - str: Resposta gerada pelo modelo.
    """
    global conversation_history

    # Verificar se é a primeira interação
    first_interaction = len(conversation_history) == 0
    if first_interaction:
        print("FI: Primeira interação detectada")
        # Buscar contexto atualizado
        context = fetch_context_from_rag([query_text], faiss_index, embeddings, docs_texts)

        #print(context)

        messages = [
            {
                "role": "system",
                "content": (
                    f"You are a helpful assistant that is supposed to help customers to adjust their equipment. "
                    f"Please look at the context extracted from the manual of the equipment, and use it in case it helps "
                    f"to answer the customer's question <context>{context}</context>"
                ),
            },
            {
                "role": "user",
                "content": query_text,
            },
                   ]        
                       
        conversation_history = [{"role": "user", "content": query_text}]
        
    else:
        print("SI: Interação subsequente detectada")
        # Adicionar a nova pergunta com o contexto longo ao histórico
        # Buscar contexto atualizado
        # Substring a ser removida
        
        user_queries = [entry['content'] for entry in conversation_history if entry['role'] == 'user']
        user_queries.append(query_text)



       
        context = fetch_context_from_rag(user_queries, faiss_index, embeddings, docs_texts)

        messages = [
        {
            "role": "system",
            "content": (
                f"You are a helpful assistant that is supposed to help customers to adjust their equipment. "
                f"Please look at the context extracted from the manual of the equipment, and use it in case it helps "
                f"to answer the customer's question <context>{context}</context>"
            ),
        }
                   ]

        # Adicionar o histórico da conversa
        messages.extend(conversation_history)
    
        # Adicionar a pergunta atual (user role)
        messages.append({"role": "user", "content": query_text})  
        
        
        conversation_history.append({"role": "user", "content": query_text})

    # Chamar o modelo com o histórico atualizado

    print(messages)
    
    outputs = pipeline(
    messages,
    max_new_tokens=256,
    )
    response = outputs[0]["generated_text"][-1]

    # Adicionar a resposta atual (assistant role)
    conversation_history.append(response)  
    conversation_history = conversation_history[-6:]


    return response





In [93]:
query_text = "Can you explain in another way ?"
resposta = interact_with_model(query_text, faiss_index, embeddings, docs_texts, pipeline)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


FI: Primeira interação detectada
[{'role': 'system', 'content': "You are a helpful assistant that is supposed to help customers to adjust their equipment. Please look at the context extracted from the manual of the equipment, and use it in case it helps to answer the customer's question <context>Do not cover this product or cables with towels and blankets.\n\nWhen the product is in operation, it will dissipate normal heat produced to the surface of the product. Please do not leave\n\nthe product on your knees or any part of your body for an extended period of time to avoid possible physical discomfort due\n\nto high temperature.\n\nPlease avoid placing this product near magnetic materials (such as speakers and TVs).\n\nDo not use or expose this product in an environment that could potentially leak flammable gas, or expose the product to rain\n\nor humid conditions.\n\nAvoid using the modem to connect to the Internet under thunder and lightning to avoid long-distance electric shock.\n\n

In [94]:
resposta

{'role': 'assistant',
 'content': 'Based on the information provided, it seems like the manual is for a device that is likely a modem or a router, given the mention of network signal and internet connection. However, there is no clear indication of what the device actually is.\n\nTo provide a clear and concise explanation, I\'ll break down the key points from the manual:\n\n1. **Safety Precautions**:\n   - Keep the device away from towels, blankets, and any part of your body to avoid overheating.\n   - Avoid placing the device near magnetic materials, flammable gases, or humid conditions.\n   - Don\'t use the device during thunderstorms or near fire sources.\n   - Don\'t use the device during flights, as it may interfere with navigation systems.\n\n2. **Cleaning and Maintenance**:\n   - Use a clean, dry cloth to wipe the device.\n\n3. **Warranty and Disclaimer**:\n   - The product manual is provided "as-is" without warranties.\n   - You agree to bear all risks arising from using the ma

In [91]:
conversation_history

[{'role': 'user',
  'content': 'What can i do In the event of poor wireless signal ?'},
 {'role': 'assistant',
  'content': "According to the manual, if you're experiencing poor wireless signal, you can try the following:\n\n1. Increase the separation between the equipment (pixsee Smart Baby Camera) and the receiver (mobile device).\n2. Connect the equipment into an outlet on a circuit different from that to which the receiver is connected.\n3. Consult the dealer or an experienced radio/TV technician for help.\n\nAdditionally, the manual also mentions that environmental factors and certain objects can affect the quality of wireless radio communications, such as:\n\n- Large objects containing metal (e.g. refrigerators, mirrors, blades, cabinets, metal doors or reinforced concrete)\n- Other solid objects such as wall structures or other electronics that transmit wireless signals (e.g. televisions, calculators, wireless phones, fluorescent lamps, or dimmer switches)\n\nTry moving the equi

In [77]:
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import plotly.io as pio

# Configurar o renderer
pio.renderers.default = "iframe"

# 1. Garantir que as embeddings dos documentos e da query estejam no formato correto
docs_embeddings_np = docs_embeddings  # Renomear para manter consistência
query_embedding_np = query_embedding.reshape(1, -1)  # Garantir formato correto

# 2. Combinar as embeddings dos documentos e a embedding da query
all_embeddings = np.vstack([docs_embeddings_np, query_embedding_np]).astype('float32')

# 3. Aplicar t-SNE para reduzir para 2 dimensões
tsne = TSNE(n_components=2, random_state=42, perplexity=10, n_iter=500)
embeddings_2d = tsne.fit_transform(all_embeddings)

# 4. Criar lista de textos (documentos + query)
texts = [doc.page_content for doc in docs]  # Documentos
query_text_wrapped = "QUERY: " + query_text  # Texto específico para a query
texts.append(query_text_wrapped)  # Garantir que a query seja a última entrada

# 5. Criar DataFrame para visualização
data = pd.DataFrame({
    "x": embeddings_2d[:, 0],  # Primeiro componente t-SNE
    "y": embeddings_2d[:, 1],  # Segundo componente t-SNE
    "Text": texts,             # Textos dos documentos e da query
    "Type": ["Document"] * len(docs) + ["Query"]  # Tipo: Document ou Query
})

# 6. Formatar os tooltips para limitar a largura
def format_tooltip(text, max_length=50):
    """Quebra o texto em múltiplas linhas com um limite de caracteres."""
    return '<br>'.join([text[i:i+max_length] for i in range(0, len(text), max_length)])

data["FormattedText"] = data["Text"].apply(lambda x: format_tooltip(x))

# 7. Criar gráfico interativo com Plotly
fig = px.scatter(
    data,
    x="x",
    y="y",
    color="Type",
    hover_data={"FormattedText": True, "x": False, "y": False},  # Tooltips formatados
    title="t-SNE dos Documentos e Query Embeddings",
    labels={"x": "t-SNE Component 1", "y": "t-SNE Component 2"}
)

# Melhorar a aparência dos pontos
fig.update_traces(marker=dict(size=10, opacity=0.8, line=dict(width=1, color='DarkSlateGrey')))

# 8. Salvar como HTML
fig.write_html("tsne_plot_tooltip.html")
print("Gráfico salvo como 'tsne_plot_tooltip.html'. Abra no navegador para visualizar.")




Gráfico salvo como 'tsne_plot_tooltip.html'. Abra no navegador para visualizar.



'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.

