In [1]:
from dotenv import load_dotenv

if load_dotenv():
    print("Dotenv loaded successfully")

Dotenv loaded successfully


In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings

# NVIDIAEmbeddings.get_available_models()
embedder = NVIDIAEmbeddings(model="nvidia/nv-embed-v1", truncate="END")

# ChatNVIDIA.get_available_models()
instruct_llm = ChatNVIDIA(model="meta/llama-3.3-70b-instruct")

In [3]:
from langchain_community.document_loaders import ArxivLoader

documents = [
    ArxivLoader(query="1706.03762").load(), ## Attention Is All You Need
    ArxivLoader(query="1810.04805").load(),  ## BERT Paper
]

for doc in documents:
    content = doc[0].page_content
    if "References" in content:
        doc[0].page_content = content[:content.index("References")]

In [4]:
documents

[[Document(metadata={'Published': '2023-08-02', 'Title': 'Attention Is All You Need', 'Authors': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin', 'Summary': 'The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks in an encoder-decoder configuration. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer, based\nsolely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to be\nsuperior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014\nEnglish-to-German translation task, improving over the existing best results,\nincluding ensembles by over 2 BLEU. On the WMT 2014 English-to-French\nt

In [5]:
documents[1][0].page_content

'BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin\nMing-Wei Chang\nKenton Lee\nKristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout}@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT, which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be ﬁne-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspeciﬁc architecture modiﬁcations.\nBERT is conceptually simple and empirically\npowerful.\nIt obtains new state-of-the-art re-\n

In [8]:
from langchain_community.vectorstores import FAISS
from faiss import IndexFlatL2
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100,
    separators=["\n\n", "\n", ".", ";", ",", " "],
)

embed_dims = len(embedder.embed_query("test"))
vectorstore = FAISS(
    embedding_function=embedder,
    index=IndexFlatL2(embed_dims), # Search for L2/Euclidean distance
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
    normalize_L2=False
)

print("Chunking Documents")
splitted_documents = [text_splitter.split_documents(doc) for doc in documents]

vecstores = [FAISS.from_documents(splitted_documentss, embedder) for splitted_documentss in splitted_documents]

print("Merging Vectorstores")
for vecstore in vecstores:
    vectorstore.merge_from(vecstore)

Chunking Documents
Merging Vectorstores


In [9]:
len(splitted_documents[0][3].page_content)

966

In [None]:
vectorstore.as_retriever().invoke("What is BERT?")

[Document(id='16a0f46a-2ab7-489a-aea6-cb5f6239fa2f', metadata={'Published': '2023-08-02', 'Title': 'Attention Is All You Need', 'Authors': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin', 'Summary': 'The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks in an encoder-decoder configuration. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer, based\nsolely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to be\nsuperior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014\nEnglish-to-German translation task, improving over the existing best results,\nincluding ensembles by over 2 

In [11]:
# Save the vectorstore
vectorstore.save_local("vector_index")

In [12]:
# Load the vector store
vectorstore = FAISS.load_local("vector_index", embedder, allow_dangerous_deserialization=True)

In [None]:
vectorstore.as_retriever().invoke("What is BERT?")

[Document(id='16a0f46a-2ab7-489a-aea6-cb5f6239fa2f', metadata={'Published': '2023-08-02', 'Title': 'Attention Is All You Need', 'Authors': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser, Illia Polosukhin', 'Summary': 'The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks in an encoder-decoder configuration. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer, based\nsolely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to be\nsuperior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014\nEnglish-to-German translation task, improving over the existing best results,\nincluding ensembles by over 2 

In [14]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.document_transformers import LongContextReorder
from langchain_core.runnables import RunnableLambda

long_reorder = RunnableLambda(LongContextReorder().transform_documents)

def docs2str(docs, title="Document"):
    """Useful utility for making chunks into context string. Optional, but useful"""
    out_str = ""
    for doc in docs:
        doc_name = getattr(doc, 'metadata', {}).get('Title', title)
        if doc_name:
            out_str += f"[Quote from {doc_name}] "
        out_str += getattr(doc, 'page_content', str(doc)) + "\n"
    return out_str

context_prompt = ChatPromptTemplate.from_template(
    "If the question is a greeting, respond with a greeting."
    "Answer the question using only the context"
    "\n\nRetrieved Context: {context}"
    "\n\nUser Question: {question}"
    "\nAnswer the user conversationally. User is not aware of context."
)

chain = (
    {
        'context': vectorstore.as_retriever() | long_reorder | docs2str,
        'question': (lambda x:x)
    }
    | context_prompt
    | instruct_llm
    | StrOutputParser()
)

In [15]:
def chat_gen(message, history=[], return_buffer=True):
    buffer = ""

    ## Then, stream the results of the stream_chain
    for token in chain.stream(message):
        buffer += token
        ## If you're using standard print, keep line from getting too long
        yield buffer if return_buffer else token

In [23]:
import gradio as gr

initial_msg = (
    "Hello! I am a document chat agent here to help the user!"
    " I have access to the following Papers: [Attention Is All You Need, BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding]"
    "\n\nHow can I help you?"
)

chatbot = gr.Chatbot(value = [[None, initial_msg]])
demo = gr.ChatInterface(chat_gen, chatbot=chatbot).queue()

try:
    demo.launch(debug=True, share=True, show_api=False)
    demo.close()
except Exception as e:
    demo.close()
    print(e)
    raise e

  chatbot = gr.Chatbot(value = [[None, initial_msg]])


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://7d1ec2828fde12979f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://7d1ec2828fde12979f.gradio.live
Closing server running on port: 7860


## Evaluar precisión de un modelo de LLM

In [26]:
retrived_contexts = []
documentos = vectorstore.as_retriever().invoke("What is BERT?")
retrived_contexts = [retrived.page_content for retrived in documentos]
print(retrived_contexts)

['BERT: Pre-training of Deep Bidirectional Transformers for\nLanguage Understanding\nJacob Devlin\nMing-Wei Chang\nKenton Lee\nKristina Toutanova\nGoogle AI Language\n{jacobdevlin,mingweichang,kentonl,kristout}@google.com\nAbstract\nWe introduce a new language representa-\ntion model called BERT, which stands for\nBidirectional Encoder Representations from\nTransformers. Unlike recent language repre-\nsentation models (Peters et al., 2018a; Rad-\nford et al., 2018), BERT is designed to pre-\ntrain deep bidirectional representations from\nunlabeled text by jointly conditioning on both\nleft and right context in all layers. As a re-\nsult, the pre-trained BERT model can be ﬁne-\ntuned with just one additional output layer\nto create state-of-the-art models for a wide\nrange of tasks, such as question answering and\nlanguage inference, without substantial task-\nspeciﬁc architecture modiﬁcations.\nBERT is conceptually simple and empirically\npowerful.\nIt obtains new state-of-the-art re-'

In [None]:
response = chain.invoke("What is BERT?")

"Hello. BERT stands for Bidirectional Encoder Representations from Transformers. It's a language representation model developed by Google that's used for natural language processing tasks. In simple terms, BERT is a powerful tool that helps computers understand human language better. How can I help you with BERT today?"

In [None]:
from ragas import SingleTurnSample
from ragas.metrics import LLMContextPrecisionWithoutReference

context_precision = LLMContextPrecisionWithoutReference(llm=instruct_llm)

sample = SingleTurnSample(
    user_input="What is BERT?",
    response=response,
    retrieved_contexts=retrived_contexts,
)


await context_precision.single_turn_ascore(sample)

0.7499999999625