In [8]:
import os
import json
from typing import List, Dict, Any
import ipywidgets as widgets
from IPython.display import display, clear_output
from langchain.schema import SystemMessage, HumanMessage
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document


try:
    from langchain.llms import Ollama
    _HAS_OLLAMA = True
except Exception:
    _HAS_OLLAMA = False

try:
    from langchain import OpenAI
    _HAS_OPENAI = True
except Exception:
    _HAS_OPENAI = False



DEFAULT_MODEL = "Ollama (qwen3:8b)"
MODEL_OPTIONS = [
    "Ollama (qwen3:8b)",
    "OpenAI (gpt-4o-mini) - requires API key",
    "Groq (placeholder)",
    "Gemini (placeholder)",
]


def create_ollama_client(model_name: str = "qwen3:8b", temperature: float = 0.0, top_p: float = 1.0, top_k: int = None):
   
    if not _HAS_OLLAMA:
        raise RuntimeError("LangChain no tiene el wrapper 'Ollama' disponible. Instala langchain>=0.0 y la dependencia correspondiente o usa el cliente 'ollama' nativo.")

    try:
        client = Ollama(model=model_name, temperature=temperature)
        return client
    except Exception as e:
        raise


def get_llm_from_selection(selection: str, temperature: float, top_p: float, top_k: int):
  
    if selection.startswith("Ollama"):
        
        model_token = "qwen3:8b"
        return create_ollama_client(model_name=model_token, temperature=temperature, top_p=top_p, top_k=top_k)

    if selection.startswith("OpenAI"):
        if not _HAS_OPENAI:
            raise RuntimeError("LangChain OpenAI wrapper no disponible. Instala openai y langchain.")
        
        return OpenAI(temperature=temperature)

    
    raise NotImplementedError(f"La integración para {selection} no está implementada en este notebook. Agrega las credenciales y el wrapper correspondiente.")




def load_local_file(file_upload_widget) -> List[Document]:
    
    docs = []
    for filename, file_info in file_upload_widget.value.items():
        content = file_info['content']
        
        tmp_path = f"/tmp/{filename}"
        with open(tmp_path, "wb") as f:
            f.write(content)
        
        if filename.lower().endswith('.pdf'):
            try:
                loader = PyPDFLoader(tmp_path)
                docs.extend(loader.load())
            except Exception:
                # fallback to raw text read
                with open(tmp_path, 'rb') as f:
                    raw = f.read().decode(errors='ignore')
                    docs.append(Document(page_content=raw, metadata={'source': filename}))
        else:
            with open(tmp_path, 'r', encoding='utf-8', errors='ignore') as f:
                raw = f.read()
                docs.append(Document(page_content=raw, metadata={'source': filename}))
    return docs


def load_wikipedia(query: str, lang: str = 'es') -> List[Document]:
   
    try:
        import wikipedia
        wikipedia.set_lang(lang)
        page = wikipedia.page(query)
        text = page.content
        return [Document(page_content=text, metadata={'source': f'wikipedia:{query}'})]
    except Exception as e:
        raise RuntimeError(f"Error cargando Wikipedia: {e}")





def split_documents(docs: List[Document], chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    split_docs = []
    for d in docs:
        pieces = splitter.split_text(d.page_content)
        for i, p in enumerate(pieces):
            split_docs.append(Document(page_content=p, metadata={**d.metadata, 'chunk': i}))
    return split_docs


def filter_documents(docs: List[Document], keyword: str) -> List[Document]:
    if not keyword:
        return docs
    filtered = [d for d in docs if keyword.lower() in d.page_content.lower()]
    return filtered


def summarize_documents_with_llm(docs: List[Document], llm, method: str = 'map_reduce') -> str:
   
    if not docs:
        return "(no hay documentos)"
    chain = load_summarize_chain(llm=llm, chain_type="map_reduce")
    return chain.run(docs)




system_template = PromptTemplate.from_template("""
Eres un asistente experto que responde en español. Sigue las instrucciones del usuario.
""")

human_template = PromptTemplate.from_template("""
{user_input}
""")


def build_messages(system_text: str, user_text: str) -> List:
    return [SystemMessage(content=system_text), HumanMessage(content=user_text)]



model_dropdown = widgets.Dropdown(options=MODEL_OPTIONS, value=DEFAULT_MODEL, description='Modelo:')
temperature_slider = widgets.FloatSlider(value=0.7, min=0.0, max=1.0, step=0.01, description='Temp:')
top_p_slider = widgets.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.01, description='Top-p:')
top_k_int = widgets.IntText(value=50, description='Top-k:')
context_size = widgets.IntSlider(value=2048, min=256, max=65536, step=256, description='Context:')


file_uploader = widgets.FileUpload(accept='*', multiple=True, description='Subir archivos')


wiki_text = widgets.Text(value='', description='Wiki search:')
wiki_button = widgets.Button(description='Cargar Wikipedia')


chunk_size_widget = widgets.IntSlider(value=1000, min=200, max=5000, step=100, description='Chunk size:')
chunk_overlap_widget = widgets.IntSlider(value=200, min=0, max=1000, step=50, description='Overlap:')
filter_keyword = widgets.Text(value='', description='Filtrar por:')


system_textarea = widgets.Textarea(value='Eres un asistente útil y conciso en español.', description='Sistema:')
user_prompt = widgets.Textarea(value='Escribe tu pregunta aquí...', description='yo:')
send_button = widgets.Button(description='Enviar al LLM')


output_area = widgets.Output()


_loaded_documents: List[Document] = []




def on_wiki_button_clicked(b):
    with output_area:
        clear_output()
        q = wiki_text.value.strip()
        if not q:
            print("Ingresa un término para buscar en Wikipedia")
            return
        try:
            docs = load_wikipedia(q)
            _loaded_documents.extend(docs)
            print(f"Cargado Wikipedia: {q} -> {len(docs)} documento(s)")
        except Exception as e:
            print("Error cargando Wikipedia:", e)


wiki_button.on_click(on_wiki_button_clicked)


def on_file_upload_change(change):
    with output_area:
        clear_output()
        if file_uploader.value:
            try:
                docs = load_local_file(file_uploader)
                _loaded_documents.extend(docs)
                print(f"Cargados {len(docs)} documentos desde el uploader. Total en memoria: {len(_loaded_documents)}")
            except Exception as e:
                print("Error leyendo archivos:", e)


file_uploader.observe(on_file_upload_change, names='value')


def on_send_button_clicked(b):
    with output_area:
        clear_output()
        print("Preparando request...")
        # Preparar LLM
        try:
            llm = get_llm_from_selection(model_dropdown.value, temperature_slider.value, top_p_slider.value, top_k_int.value)
        except NotImplementedError as nie:
            print(nie)
            return
        except Exception as e:
            print("Error creando cliente LLM:", e)
            return

        
        docs = list(_loaded_documents)  
        if docs:
            docs = split_documents(docs, chunk_size=chunk_size_widget.value, chunk_overlap=chunk_overlap_widget.value)
            docs = filter_documents(docs, filter_keyword.value)
            print(f"Documentos preparados: {len(docs)} chunks")
            # Resumir si son muchos
            if len(docs) > 6:
                try:
                    print("Generando resumen de contexto...")
                    summary = summarize_documents_with_llm(docs[:10], llm)
                    
                    docs.insert(0, Document(page_content=summary, metadata={'source':'auto_summary'}))
                except Exception as e:
                    print("Resumen falló:", e)

       
        sys_msg = system_textarea.value
        human_msg = user_prompt.value
        messages = build_messages(sys_msg, human_msg)

        
        try:
            
            prompt_full = f"SYSTEM:\n{sys_msg}\n\nUSER:\n{human_msg}\n\nCONTEXT:\n"
            for d in (docs or []):
                prompt_full += f"[source:{d.metadata.get('source','unknown')}] {d.page_content[:500]}\n---\n"
            print("Enviando prompt al LLM...")
            resp = llm(prompt_full)
            print("--- RESPUESTA ---")
            print(resp)
        except TypeError:
           
            try:
                resp = llm(messages)
                print(resp)
            except Exception as e:
                print("Error invocando LLM con messages:", e)
        except Exception as e:
            print("Error llamando al LLM:", e)


send_button.on_click(on_send_button_clicked)


left = widgets.VBox([
    model_dropdown,
    widgets.HBox([temperature_slider, top_p_slider, top_k_int]),
    context_size,
    widgets.Label("Cargar contexto:"),
    file_uploader,
    widgets.HBox([wiki_text, wiki_button]),
    widgets.Label("Transformaciones:"),
    chunk_size_widget,
    chunk_overlap_widget,
    filter_keyword,
])

right = widgets.VBox([
    system_textarea,
    user_prompt,
    send_button,
    output_area,
])

ui = widgets.HBox([left, right], layout=widgets.Layout(width='100%'))

print("Interfaz lista. Ejecuta las celdas y usa los widgets para interactuar.")
display(ui)





Interfaz lista. Ejecuta las celdas y usa los widgets para interactuar.


HBox(children=(VBox(children=(Dropdown(description='Modelo:', options=('Ollama (qwen3:8b)', 'OpenAI (gpt-4o-mi…