In [27]:
# ---------- IMPORTS ----------
import os
import subprocess

from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader, WebBaseLoader, WikipediaLoader
from langchain_core.messages import HumanMessage, SystemMessage

from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langchain_community.chat_models import ChatOllama

import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout, Output, Button, Dropdown, Text, Textarea, IntSlider, FloatSlider, Checkbox, FileUpload
from IPython.display import display, clear_output


In [28]:
# ---------- API KEYS ----------
print("OPENAI_API_KEY:", os.getenv("OPENAI_API_KEY")[:6])
print("GOOGLE_API_KEY:", os.getenv("GOOGLE_API_KEY")[:6])
print("GROQ_API_KEY:", os.getenv("GROQ_API_KEY")[:6])

# ---------- ESTADO GLOBAL ----------
loaded_docs = []           # documentos cargados
conversation_messages = [] # historial de conversación


OPENAI_API_KEY: sk-pro
GOOGLE_API_KEY: AIzaSy
GROQ_API_KEY: gsk_KZ


In [29]:
# ---------- PROVEEDOR Y MODELOS DINÁMICOS ----------

provider_dropdown = Dropdown(
    options=["openai", "google", "groq", "ollama"],
    value="openai",
    description="Proveedor"
)

model_dropdown = Dropdown(
    options=[],
    description="Modelo"
)

# Modelos estáticos para APIs
provider_models = {
    "openai": ["gpt-4o-mini", "gpt-4o", "gpt-3.5-turbo"],
    "google": ["gemini-pro", "gemini-flash"],
    "groq": ["mixtral-8x7b", "llama-3.1-8b", "groq/compound"]
}

def list_ollama_models():
    """Devuelve lista de modelos locales instalados en Ollama."""
    try:
        result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
        if result.returncode != 0:
            return []
        lines = result.stdout.strip().split("\n")[1:]  # saltar encabezado
        models = [line.split()[0] for line in lines if line.strip()]
        return models
    except Exception:
        return []

def update_model_options(change):
    provider = change['new']
    if provider == "ollama":
        local_models = list_ollama_models()
        if not local_models:
            model_dropdown.options = ["<No hay modelos locales>"]
            model_dropdown.value = "<No hay modelos locales>"
        else:
            model_dropdown.options = local_models
            model_dropdown.value = local_models[0]
    else:
        options = provider_models.get(provider, [])
        if options:
            model_dropdown.options = options
            model_dropdown.value = options[0]
        else:
            model_dropdown.options = ["<Sin modelos disponibles>"]
            model_dropdown.value = "<Sin modelos disponibles>"

provider_dropdown.observe(update_model_options, names='value')
update_model_options({'new': provider_dropdown.value})

display(provider_dropdown, model_dropdown)


Dropdown(description='Proveedor', options=('openai', 'google', 'groq', 'ollama'), value='openai')

Dropdown(description='Modelo', options=('gpt-4o-mini', 'gpt-4o', 'gpt-3.5-turbo'), value='gpt-4o-mini')

In [30]:
# ---------- PARÁMETROS DEL MODELO ----------
temperature_slider = FloatSlider(value=0.7, min=0, max=1, step=0.05, description="Temp")
top_p_slider = FloatSlider(value=0.9, min=0, max=1, step=0.05, description="Top-p")
max_tokens_slider = IntSlider(value=512, min=64, max=4096, step=64, description="Tokens")


In [31]:
# ---------- WIDGETS DOCUMENTOS ----------
file_uploader = FileUpload(accept=".txt,.pdf", multiple=True)
wiki_query = Text(description="Wikipedia")
web_url = Text(description="Web URL")

chunk_size_slider = IntSlider(value=1000, min=200, max=4000, description="Chunk size")
chunk_overlap_slider = IntSlider(value=200, min=0, max=1000, description="Overlap")
summary_checkbox = Checkbox(value=False, description="Resumir docs")

doc_status = Output()


In [32]:
# ---------- FUNCIONES DE DOCUMENTOS ----------
def load_local():
    global loaded_docs
    if not file_uploader.value:
        with doc_status:
            clear_output()
            print("Sube un archivo primero.")
        return

    all_docs = []
    for item in file_uploader.value:
        fname = item['name']
        content = item['content']
        path = f"/tmp/{fname}"
        with open(path, "wb") as f:
            f.write(content)

        if fname.lower().endswith(".pdf"):
            loader = PyPDFLoader(path)
        else:
            loader = TextLoader(path, encoding="utf-8")

        docs = loader.load()
        all_docs.extend(docs)

    loaded_docs = all_docs
    with doc_status:
        clear_output()
        print(f"Archivo(s) cargado(s): {len(file_uploader.value)} | Fragmentos: {len(loaded_docs)}")

def load_wikipedia(query):
    global loaded_docs
    if not query.strip():
        with doc_status:
            clear_output()
            print("Escribe un término para Wikipedia.")
        return
    loader = WikipediaLoader(query=query, load_max_docs=2)
    loaded_docs = loader.load()
    with doc_status:
        clear_output()
        print(f"Cargados {len(loaded_docs)} documentos desde Wikipedia.")

def load_web(url):
    global loaded_docs
    if not url.strip():
        with doc_status:
            clear_output()
            print("Escribe una URL válida.")
        return
    loader = WebBaseLoader(url)
    loaded_docs = loader.load()
    with doc_status:
        clear_output()
        print(f"Cargado documento desde {url}.")

def process_docs():
    global loaded_docs
    if not loaded_docs:
        with doc_status:
            clear_output()
            print("Primero carga un documento.")
        return

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size_slider.value,
        chunk_overlap=chunk_overlap_slider.value
    )
    docs = splitter.split_documents(loaded_docs)

    if summary_checkbox.value:
        try:
            llm = get_model()
            summaries = []
            for d in docs[:5]:
                resp = llm.invoke([HumanMessage(content=f"Resume el siguiente texto:\n\n{d.page_content}")])
                summaries.append(Document(page_content=resp.content))
            docs = summaries
            msg = f"Documentos resumidos a {len(docs)} fragmentos."
        except Exception as e:
            msg = f"Error al resumir: {e}"
    else:
        msg = f"Documento dividido en {len(docs)} fragmentos."

    loaded_docs = docs
    with doc_status:
        clear_output()
        print(msg)

def reset_docs():
    global loaded_docs
    loaded_docs = []
    with doc_status:
        clear_output()
        print("Documentos limpiados.")


In [33]:
# ---------- CHAT Y MODELOS ----------
def get_model():
    provider = provider_dropdown.value
    model = model_dropdown.value
    temp = temperature_slider.value
    top_p = top_p_slider.value
    max_tokens = max_tokens_slider.value

    if provider == "openai":
        return ChatOpenAI(model=model, temperature=temp, max_tokens=max_tokens)
    elif provider == "google":
        return ChatGoogleGenerativeAI(model=model, temperature=temp)
    elif provider == "groq":
        return ChatGroq(model=model, temperature=temp, max_tokens=max_tokens)
    elif provider == "ollama":
        return ChatOllama(model=model)
    else:
        raise ValueError("Proveedor desconocido")

chat_input = Textarea(placeholder="Escribe tu mensaje...", layout=Layout(width="600px", height="100px"))
send_btn = Button(description="Enviar", layout=Layout(width="120px"))
clear_btn = Button(description="Limpiar", layout=Layout(width="120px"))
chat_out = Output()

def on_send(_):
    global conversation_messages, loaded_docs
    user_msg = chat_input.value.strip()
    if not user_msg:
        return

    # Añadir mensaje del usuario
    conversation_messages.append(HumanMessage(content=user_msg))

    llm = get_model()

    # Agregar contexto si hay documentos cargados
    context_text = "\n\n".join([d.page_content for d in loaded_docs[:3]]) if loaded_docs else ""
    if context_text:
        conversation_messages.insert(-1, SystemMessage(content=f"Usa este contexto para responder:\n{context_text}"))

    try:
        resp = llm.invoke(conversation_messages)
        conversation_messages.append(resp)
        with chat_out:
            print(f"Usuario: {user_msg}")
            print(f"Asistente: {resp.content}\n")
    except Exception as e:
        with chat_out:
            print("Error en la llamada al modelo:", str(e))

def on_clear(_):
    global conversation_messages
    conversation_messages = []
    with chat_out:
        clear_output()
        print("Historial limpiado.")

send_btn.on_click(on_send)
clear_btn.on_click(on_clear)


In [34]:
# ---------- BOTONES DOCUMENTOS ----------
btn_load_file = Button(description="Cargar archivo", layout=Layout(width="150px"))
btn_load_file.on_click(lambda _: load_local())

btn_load_wiki = Button(description="Cargar Wikipedia", layout=Layout(width="150px"))
btn_load_wiki.on_click(lambda _: load_wikipedia(wiki_query.value))

btn_load_web = Button(description="Cargar Web", layout=Layout(width="150px"))
btn_load_web.on_click(lambda _: load_web(web_url.value))

btn_process = Button(description="Procesar documento", layout=Layout(width="150px"))
btn_process.on_click(lambda _: process_docs())

btn_cancel = Button(description="Cancelar", layout=Layout(width="150px"))
btn_cancel.on_click(lambda _: reset_docs())


In [37]:
# ---------- UI FINAL ----------
ui = VBox([
    provider_dropdown,
    model_dropdown,
    HBox([temperature_slider, top_p_slider, max_tokens_slider],
         layout=Layout(justify_content="center", align_items="center")),

    VBox([chat_input], layout=Layout(justify_content="center", align_items="center")),
    HBox([send_btn, clear_btn], layout=Layout(justify_content="center", align_items="center")),

    HBox([file_uploader, btn_load_file], layout=Layout(justify_content="center", align_items="center")),
    HBox([wiki_query, btn_load_wiki], layout=Layout(justify_content="center", align_items="center")),
    HBox([web_url, btn_load_web], layout=Layout(justify_content="center", align_items="center")),

    HBox([chunk_size_slider, chunk_overlap_slider, summary_checkbox],
         layout=Layout(justify_content="center", align_items="center")),
    HBox([btn_process, btn_cancel], layout=Layout(justify_content="center", align_items="center")),

    doc_status,
    chat_out
], layout=Layout(align_items="center"))

display(ui)


VBox(children=(Dropdown(description='Proveedor', index=3, options=('openai', 'google', 'groq', 'ollama'), valu…