In [None]:
# %pip install pinecone
# %pip install groq
# %pip install sentence-transformers
# %pip install --upgrade langchain
# %pip install pypdf
# %pip install langchain-pinecone
# %pip install langchain-groq
# %pip install -quiet --upgrade langchain-text-splitters langchain-community
# %pip install langgraph

In [None]:
import os
from typing_extensions import List, TypedDict

from sentence_transformers import SentenceTransformer

from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain import hub
from langgraph.graph import START, StateGraph
from langchain_core.documents import Document
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate

from pinecone import Pinecone, ServerlessSpec

import streamlit as st

In [4]:
device = "cpu"

## Extracción de PDF a texto

In [36]:
# Lee todos los archivos PDF en el directorio indicado
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

doc=read_doc("./")

In [37]:
doc

[Document(metadata={'source': 'CV_LeandroSaraco.pdf', 'page': 0}, page_content='Contact\nEmail: leandrosaraco@gmail.com\nLeandro Saraco\nSoftware Engineer - Artificial Intelligence\nSoftware Engineer with 6+ years of experience developing software for  a semiconductor\ncompany. Combining my electronic and programming skills to provide high quality  EDA (Electronic\nDesign Automation) software that significantly reduce manual efforts during the design of an\nintegrated circuit. I am used to work with teams worldwide, in any timezone.\nExperience\nEnhanced verification infrastructure by building custom python scripts for regressions.\nAutomated testcases using coverage-driven verification with randomization.\nExperience building CNNs and RNNs for automatic signal classification.\nDeveloped microservices with REST APIs using FastAPI for efficient communication.\nBuilt software to auto detect anomalies on simulation log files using neural networks..\nAnalysis and failure detection of induc

## Conversión de texto a embeddings

- Si usamos LLama quizás convenga usar https://huggingface.co/jinaai/jina-embeddings-v3
- Otra opción con Pinecone: https://docs.pinecone.io/guides/inference/generate-embeddings

In [38]:
embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)

## Carga de documentos en la base de datos

Separación de documentos en chunks

In [39]:
def chunk_data(docs, chunk_size=800, chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

documents=chunk_data(docs=doc,chunk_size=500, chunk_overlap=50)

documents

[Document(metadata={'source': 'CV_LeandroSaraco.pdf', 'page': 0}, page_content='Contact\nEmail: leandrosaraco@gmail.com\nLeandro Saraco\nSoftware Engineer - Artificial Intelligence\nSoftware Engineer with 6+ years of experience developing software for  a semiconductor\ncompany. Combining my electronic and programming skills to provide high quality  EDA (Electronic\nDesign Automation) software that significantly reduce manual efforts during the design of an\nintegrated circuit. I am used to work with teams worldwide, in any timezone.\nExperience'),
 Document(metadata={'source': 'CV_LeandroSaraco.pdf', 'page': 0}, page_content='Experience\nEnhanced verification infrastructure by building custom python scripts for regressions.\nAutomated testcases using coverage-driven verification with randomization.\nExperience building CNNs and RNNs for automatic signal classification.\nDeveloped microservices with REST APIs using FastAPI for efficient communication.\nBuilt software to auto detect anom

Conexión con base de datos de Pinecone y creación de un índice.

In [35]:
## CONNECT WITH PINECONE DATABASE
PINECONE_API_KEY=os.getenv("PINECONE_API_KEY")

#Connect to DB Pinecone
pc=Pinecone(api_key=PINECONE_API_KEY)
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)
index_name = 'cvs-ceia'

if index_name in pc.list_indexes().names():
  pc.delete_index(index_name)
  print("index {} borrado".format(index_name))

# check if index already exists (it shouldn't if this is first time)
if index_name not in pc.list_indexes().names():
    # if does not exist, create index
    print("index creado con el nombre: {}".format(index_name))
    pc.create_index(
        index_name,
        dimension=384,  # dimensionality of text-embedding models/embedding-001
        metric='cosine',
        spec=spec
        )
else:
    print("el index con el nombre {} ya estaba creado".format(index_name))

index cvs-ceia borrado
index creado con el nombre: cvs-ceia


Se crea un wrapper para los embeddings generados por el Sentence Transformer que sea compatible con el formato que requiere PineconeVectorStore

In [40]:

class SentenceTransformerEmbeddings:
    def __init__(self, model):
        self.model = model

    def embed_documents(self, texts):
        """Embed a list of documents."""
        return self.model.encode(texts, convert_to_tensor=False)

    def embed_query(self, text):
        """Embed a single query."""
        return self.model.encode([text], convert_to_tensor=False).tolist()

# Creo el objeto para pasar a PineconeVectorStore
embedding_wrapper = SentenceTransformerEmbeddings(embed_model)


Ahora es posible hacer el "upsert" (insertar o actualizar datos)

In [41]:
## Upsert
namespace = "espacio"

docsearch = PineconeVectorStore.from_documents(
    documents=documents,
    index_name=index_name,
    embedding=embedding_wrapper,
    namespace=namespace
)
print(f"Upsert de los embeddings al índice '{index_name}'")

Upsert de los embeddings al índice 'cvs-ceia'


## Busqueda de documentos en la base de datos

- Se realiza una prueba utilizando similarity search.

In [42]:
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embedding_wrapper,
    namespace=namespace,
)

In [43]:
query = "Contact information of Leandro"
vectorstore.similarity_search(query, k=2)

[]

## Juntando LLM con el contexto

- Basado en https://python.langchain.com/docs/tutorials/rag/

### Instanciación del LLM

In [31]:
llm = ChatGroq(model="llama3-8b-8192")

- Se define un prompt estándar.
- Se crean un wrapper para hacer retrieve a partir de los datos de almacenados en Pinecone.
- Se crea un wrapper para introducir el contexto y llamar al LLM.

In [69]:
# Se podría usar algún template del hub
# prompt = hub.pull("rlm/rag-prompt")


# Se define un template de prompt provio
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
    You are a knowledge assistant. Based on the context below, provide a concise and accurate answer to the user's query.
    Be brief, maximum 100 words.
    When applicable, the output should be in items (using "-" to start an item).

    Conversation History:
    {history}
    ---
    Context:
    {context}
    ---
    Question: {question}
    Answer:
    """
)


# Defino tipo de datos de State
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    history: List[str]  


# Defino función de retrieve
def retrieve(state: State):
    retrieved_docs = vectorstore.similarity_search(state["question"],k=2)
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    # Formateo la historia como un unico string
    history = "\n".join(state["history"])
    
    # Invoco el prompt con contexto e historia previa
    messages = prompt.invoke({
        "question": state["question"],
        "context": docs_content,
        "history": history
    })

    # print(messages)
    response = llm.invoke(messages)
    
    # Se agrega la pregunta y respuesta a la historia previa
    state["history"].append(f"Q: {state['question']} A: {response.content}")
    
    # Ahora ya es posible devolver la respuesta
    return {"answer": response.content}

# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

Se prueba haciéndole una pregunta sobre mí

In [71]:
response = graph.invoke({"question": "Tell me about Leandro's work experience","history":["When you name Leandro Saraco you must abbreviate it as Leandro S."]})
print(response["answer"])

Leandro S. has 6+ years of experience as a Software Engineer at a semiconductor company. His role involves developing EDA software to reduce manual efforts during integrated circuit design. He has worked with teams worldwide, handling global collaborations across different time zones.


## Chatbot usando streamlit

> Ver archivo chatbot_rag.py

In [None]:
## NOTA: ESTE CODIGO DEBE IR DENTRO DE UN .py PARA SER EJECUTADO POR STREAMLIT

# Inicializa el historial de conversación en el estado de la sesión
if "conversation_history" not in st.session_state:
    st.session_state.conversation_history = {
    "question": "",
    "context": [],
    "answer": "",
    "history": []  # Empty history
}


def generate_response(input_text):
  

    # Genera la respuesta del chatbot utilizando el modelo LLaMA 3 y el historial de la conversación
    # La función generate que llama graph.invoke se encarga automáticamente de:
    # - Hacer un retrieve en la base de datos vectorial.
    # - Hacer un append del historial anterior.
    st.session_state.conversation_history["question"] = input_text
    response = graph.invoke(st.session_state.conversation_history)
    answer = response["answer"]
    return answer



# Configuración de la interfaz de Streamlit
st.title("Chatbot con LLaMA 3")
st.subheader("¡Hazme una pregunta!")

user_input = st.text_input("Usuario:", "")

if user_input:
    response = generate_response(user_input)
    st.write(f"**Chatbot**: {response}")

2024-11-26 17:43:27.015 
  command:

    streamlit run /home/idk/git/ceia/LLM_fiuba/.venv/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
