<a href="https://colab.research.google.com/github/rmonterof/LLM-s-Python/blob/main/Unidad14_14_Crea_tu_chatbot_con_datos_personalizados.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Paso 1: Instalar las librerías necesarias
!pip install -U transformers faiss-cpu datasets gradio sentence-transformers

# Importar las librerías necesarias
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, pipeline
import faiss
import torch
import gradio as gr

# Cargar el modelo de lenguaje grande para generación de respuestas (FAISS-based)
gpt_model_name = "gpt2"  # Modelo para generación de respuestas basadas en FAISS
tokenizer = AutoTokenizer.from_pretrained(gpt_model_name)
llm_model = AutoModelForCausalLM.from_pretrained(gpt_model_name)

# Establecer el token de padding como el token de fin de secuencia
tokenizer.pad_token = tokenizer.eos_token

# Cargar el modelo de embeddings para consultas (usado para generar vectores)
embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
embed_model = AutoModel.from_pretrained(embed_model_name)

# Detectar si hay una GPU disponible y ajustar el dispositivo
device = 0 if torch.cuda.is_available() else -1

# Función para convertir texto en un embedding utilizando el modelo de embeddings
def get_embedding(text):
    inputs = embed_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = embed_model(**inputs)
    # Take the mean of the last hidden state as embedding
    embedding = torch.mean(outputs.last_hidden_state, dim=1).squeeze()
    return embedding.numpy().astype('float32')

# Obtener la dimensión del embedding directamente del modelo
test_embedding = get_embedding("test")
dimension = test_embedding.shape[0]

# Crear el índice FAISS con la dimensión obtenida
index = faiss.IndexFlatL2(dimension)

# Crear algunos documentos de prueba
sample_documents = [
    "Artificial intelligence (AI) is the field of study that involves creating machines or software that can perform tasks typically requiring human intelligence. These tasks include reasoning, learning, problem-solving, perception, and language understanding. AI aims to simulate cognitive functions, enabling machines to interact with their environment, adapt to new information, and make decisions. AI can be classified into two main types: narrow AI and general AI. Narrow AI, which is the current state of AI, is designed to perform specific tasks, such as speech recognition or image classification, often outperforming humans at these tasks. General AI, on the other hand, would represent a level of intelligence that can perform any intellectual task that a human can, though this level of AI is still theoretical and has not been achieved",
    "Machine learning (ML) is a subset of AI focused on enabling machines to learn from data without being explicitly programmed. Unlike traditional programming, where specific instructions are provided for each task, ML models identify patterns within large datasets and make predictions or decisions based on this data. Machine learning can be broadly divided into three types: supervised learning, unsupervised learning, and reinforcement learning",
    "Deep learning (DL) is a branch of machine learning inspired by the structure and function of the human brain, particularly through artificial neural networks. In deep learning, data is processed through multiple layers of neural networks to progressively extract higher-level features. These layers of neurons work in sequence, each layer interpreting the data with increasing complexity. Deep learning has shown exceptional performance in areas like image and speech recognition, natural language processing, and game playing"
]

# Generar embeddings para cada documento y añadirlos a FAISS
for doc in sample_documents:
    embedding = get_embedding(doc)
    index.add(embedding.reshape(1, -1))  # Añadir cada embedding al índice FAISS

# Función para buscar documentos relevantes en FAISS
def search_faiss(query_embedding, k=3):
    distances, indices = index.search(query_embedding.reshape(1, -1), k)
    return indices[0], distances[0]

# Función para crear el prompt para el LLM basado en FAISS
def create_prompt(query, retrieved_docs):
    prompt = f"Question: {query}\n\nRelevant Documents:\n"
    for i, doc in enumerate(retrieved_docs):
        prompt += f"{i+1}. {doc}\n"
    prompt += "\nAnswer:"
    return prompt

# Función para generar la respuesta del LLM basado en FAISS
def generate_llm_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = llm_model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.8,
            top_k=50,
            top_p=0.9,
            repetition_penalty=1.2
        )  # Control randomness and repetition
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Función completa del chatbot para respuesta basada en FAISS
def faiss_based_response(user_query):
    query_embedding = get_embedding(user_query)
    indices, _ = search_faiss(query_embedding)
    retrieved_docs = [sample_documents[i] for i in indices]
    prompt = create_prompt(user_query, retrieved_docs)
    return generate_llm_response(prompt)

# Función para respuesta general usando GPT-2 para preguntas abiertas
def general_model_response(user_query):
    inputs = tokenizer(user_query, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = llm_model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.8,
            top_k=50,
            top_p=0.9,
            repetition_penalty=1.2
        )  # Control randomness and repetition
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Función que decide qué modelo usar
def get_response(user_query):
    ai_related_keywords = ["artificial intelligence", "machine learning", "deep learning", "neural networks"]

    # Decide the response method based on the presence of AI-related keywords
    if any(keyword in user_query.lower() for keyword in ai_related_keywords):
        faiss_response = faiss_based_response(user_query)
        return f"**Respuesta de nuestro LLM:** {faiss_response}"
    else:
        general_response = general_model_response(user_query)
        return f"**Respuesta del modelo general:** {general_response}"

# Crear la interfaz de Gradio
interface = gr.Interface(
    fn=get_response,
    inputs="text",
    outputs="text",
    title="Chatbot de Búsqueda Semántica con Respuesta Dual",
    description="Haz una pregunta y el chatbot responderá con una respuesta basada en documentos relevantes de FAISS o usará un modelo general para temas más amplios.",
)

# Iniciar la interfaz de Gradio con opciones para Colab
interface.launch(share=True, debug=True)




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://9d443900e8aaafa131.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://9d443900e8aaafa131.gradio.live




In [None]:
# Paso 1: Instalar las librerías necesarias
!pip install -U transformers faiss-cpu sentence-transformers torch evaluate

# Importar las librerías necesarias
import faiss
import torch
import evaluate
import numpy as np
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

# Configuración del modelo y FAISS
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llm_model = AutoModelForCausalLM.from_pretrained(model_name)
llm_model.eval()

embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
embed_model = AutoModel.from_pretrained(embed_model_name)

# Crear función para obtener embeddings
def get_embedding(text):
    inputs = embed_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = embed_model(**inputs)
    embedding = torch.mean(outputs.last_hidden_state, dim=1).squeeze()
    return embedding.numpy().astype('float32')

# Crear y llenar índice FAISS con documentos de ejemplo
dimension = 384
index = faiss.IndexFlatL2(dimension)

documents = [
    "La inteligencia artificial es el futuro de la tecnología.",
    "El aprendizaje automático es una rama de la inteligencia artificial.",
    "Las redes neuronales son un pilar fundamental del aprendizaje profundo."
]

for doc in documents:
    embedding = get_embedding(doc)
    index.add(embedding.reshape(1, -1))

# Cargar métrica de Recall desde evaluate
recall_metric = evaluate.load("recall")

# Implementación manual de NDCG
def dcg_at_k(scores, k):
    return sum([score / np.log2(idx + 2) for idx, score in enumerate(scores[:k])])

def ndcg_at_k(predicted_indices, relevant_indices, k=3):
    # Generar una lista binaria de si cada documento es relevante o no
    scores = [1 if idx in relevant_indices else 0 for idx in predicted_indices[:k]]
    ideal_scores = sorted(scores, reverse=True)
    dcg = dcg_at_k(scores, k)
    idcg = dcg_at_k(ideal_scores, k)
    return dcg / idcg if idcg > 0 else 0

# Función de búsqueda en FAISS y evaluación de métricas
def search_and_evaluate(query, relevant_indices, k=3):
    query_embedding = get_embedding(query)
    _, indices = index.search(query_embedding.reshape(1, -1), k)

    # Convertir los índices obtenidos en una lista plana
    predicted_indices = indices[0].tolist()

    # Calcular Recall@K
    recall_results = recall_metric.compute(predictions=[predicted_indices], references=[relevant_indices], k=k)
    recall_at_k = recall_results["recall"]

    # Calcular NDCG
    ndcg = ndcg_at_k(predicted_indices, relevant_indices, k)

    return predicted_indices, recall_at_k, ndcg

# Ejemplo de uso del chatbot con una consulta y evaluación de precisión
query = "¿Qué sabes sobre el aprendizaje profundo?"
relevant_indices = [1, 2]  # Índices de documentos relevantes esperados en la respuesta

predicted_indices, recall_at_k, ndcg = search_and_evaluate(query, relevant_indices, k=3)

# Mostrar resultados de evaluación
print(f"Documentos recuperados para la consulta: {predicted_indices}")
print(f"Recall@3: {recall_at_k:.2f}")
print(f"NDCG@3: {ndcg:.2f}")







ValueError: Predictions and/or references don't match the expected format.
Expected format: {'predictions': Value(dtype='int32', id=None), 'references': Value(dtype='int32', id=None)},
Input predictions: [[2, 1, 0]],
Input references: [[1, 2]]

In [None]:
# Paso 1: Instalar las librerías necesarias
!pip install -U transformers faiss-cpu datasets gradio sentence-transformers

# Importar las librerías necesarias
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import faiss
import torch
import gradio as gr

# Cargar el modelo de lenguaje grande para generación de respuestas (FAISS-based)
gpt_model_name = "gpt2"  # Modelo para generación de respuestas basadas en FAISS
tokenizer = AutoTokenizer.from_pretrained(gpt_model_name)
llm_model = AutoModelForCausalLM.from_pretrained(gpt_model_name)
tokenizer.pad_token = tokenizer.eos_token

# Cargar el modelo de embeddings para consultas (usado para generar vectores)
embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_name)
embed_model = AutoModel.from_pretrained(embed_model_name)

# Función para convertir texto en un embedding utilizando el modelo de embeddings
def get_embedding(text):
    inputs = embed_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = embed_model(**inputs)
    embedding = torch.mean(outputs.last_hidden_state, dim=1).squeeze()
    return embedding.numpy().astype('float32')

# Crear el índice FAISS con documentos de prueba
test_embedding = get_embedding("test")
dimension = test_embedding.shape[0]
index = faiss.IndexFlatL2(dimension)
sample_documents = [
    "Artificial intelligence is the simulation of human intelligence by machines.",
    "Machine learning is a subset of artificial intelligence focused on training models.",
    "Deep learning is a branch of machine learning using neural networks."
]

for doc in sample_documents:
    embedding = get_embedding(doc)
    index.add(embedding.reshape(1, -1))  # Añadir cada embedding al índice FAISS

# Función para buscar documentos relevantes en FAISS
def search_faiss(query_embedding, k=3):
    distances, indices = index.search(query_embedding.reshape(1, -1), k)
    return indices[0], distances[0]

# Funciones para calcular métricas
def recall_at_k(predicted_indices, relevant_indices, k=3):
    relevant_retrieved = sum(1 for idx in predicted_indices[:k] if idx in relevant_indices)
    return relevant_retrieved / min(k, len(relevant_indices))

def ndcg_at_k(predicted_indices, relevant_indices, k=3):
    def dcg(scores, k):
        return sum([score / np.log2(idx + 2) for idx, score in enumerate(scores[:k])])

    scores = [1 if idx in relevant_indices else 0 for idx in predicted_indices[:k]]
    ideal_scores = sorted(scores, reverse=True)
    dcg_val = dcg(scores, k)
    idcg_val = dcg(ideal_scores, k)
    return dcg_val / idcg_val if idcg_val > 0 else 0

# Función para crear el prompt para el LLM basado en FAISS
def create_prompt(query, retrieved_docs):
    prompt = f"Question: {query}\n\nRelevant Documents:\n"
    for i, doc in enumerate(retrieved_docs):
        prompt += f"{i+1}. {doc}\n"
    prompt += "\nAnswer:"
    return prompt

# Función para generar la respuesta del LLM basado en FAISS
def generate_llm_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = llm_model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.8,
            top_k=50,
            top_p=0.9,
            repetition_penalty=1.2
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Función completa del chatbot para respuesta basada en FAISS y cálculo de métricas
def faiss_based_response(user_query):
    query_embedding = get_embedding(user_query)
    indices, _ = search_faiss(query_embedding)
    retrieved_docs = [sample_documents[i] for i in indices]
    prompt = create_prompt(user_query, retrieved_docs)
    response = generate_llm_response(prompt)

    # Calcular métricas usando los índices
    relevant_indices = [0, 1, 2]  # Índices de documentos relevantes (ajustar según caso)
    recall = recall_at_k(indices, relevant_indices, k=3)
    ndcg = ndcg_at_k(indices, relevant_indices, k=3)

    return response, f"Recall@3: {recall:.2f}", f"NDCG@3: {ndcg:.2f}"

# Función general para decidir el modelo de respuesta y calcular métricas
def get_response(user_query):
    ai_related_keywords = ["artificial intelligence", "machine learning", "deep learning", "neural networks"]
    response_text = ""
    recall_text = ""
    ndcg_text = ""

    # Usar FAISS si se encuentran palabras clave
    if any(keyword in user_query.lower() for keyword in ai_related_keywords):
        faiss_response, recall, ndcg = faiss_based_response(user_query)
        response_text = f"**Respuesta del LLM basado en FAISS:** {faiss_response}\n\n{recall}\n{ndcg}"
    else:
        # Generar respuesta con el modelo general (GPT-2)
        inputs = tokenizer(user_query, return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            outputs = llm_model.generate(
                **inputs,
                max_new_tokens=50,
                temperature=0.8,
                top_k=50,
                top_p=0.9,
                repetition_penalty=1.2
            )
        general_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Calcular métricas para el modelo general
        general_retrieved_docs = [sample_documents[i] for i in range(len(sample_documents))]
        general_relevant_indices = [0, 1, 2]  # Índices de documentos relevantes

        # Simulamos el cálculo de Recall y NDCG basados en los documentos completos
        recall = recall_at_k(general_retrieved_docs, general_relevant_indices, k=3)
        ndcg = ndcg_at_k(general_retrieved_docs, general_relevant_indices, k=3)

        response_text = f"**Respuesta del modelo general:** {general_response}\n\nRecall@3: {recall:.2f}\nNDCG@3: {ndcg:.2f}"

    return response_text

# Crear la interfaz de Gradio
interface = gr.Interface(
    fn=get_response,
    inputs="text",
    outputs="text",
    title="Chatbot de Búsqueda Semántica con Métricas de Evaluación",
    description="Este chatbot responde a preguntas sobre IA, aprendizaje automático y aprendizaje profundo usando documentos relevantes, y muestra métricas de Recall@3 y NDCG@3 para evaluar la calidad de la respuesta."
)

# Iniciar la interfaz de Gradio con opciones para Colab
interface.launch(share=True, debug=True)




Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ea1878ac77c1c7fd73.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
