In [2]:
import ollama
from langchain_community.embeddings import OllamaEmbeddings

ollama.pull("mxbai-embed-large")  # https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
ollama_embeddings = OllamaEmbeddings(model="mxbai-embed-large",
                                     embed_instruction="Represent this sentence for searching relevant passages: ",
                                     query_instruction="Represent this sentence for searching relevant passages: ")

In [3]:
import pymupdf
from langchain_community.document_loaders import PyMuPDFLoader

pymupdf.TEXT_PRESERVE_LIGATURES = False
pymupdf.TEXT_DEHYPHENATE = True

attention_pdf = PyMuPDFLoader("attention.pdf")
transformer_pdf = PyMuPDFLoader("transformer++.pdf")
distributed_pdf = PyMuPDFLoader("distributed-repr.pdf")

In [4]:
from langchain_qdrant import Qdrant

api_key = "8182bd49c9b34c76d6b5526827c2374dac5bcbe425026b1a4b50aa8ccdcae8c7"
url = "http://localhost:6333"

# noinspection PyTypeChecker
qdrant = Qdrant.from_existing_collection(
    url=url,
    path=None,
    embedding=ollama_embeddings,
    api_key=api_key,
    collection_name="pdfs"
)
qdrant_receiver = qdrant.as_retriever()



In [5]:
found_docs = qdrant_receiver.invoke("What is attention mechanism?")

In [6]:
def add_to_qdrant():
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    from qdrant_client import models

    # delete all
    qdrant.delete(models.Filter())

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
    loaded = attention_pdf.load() + transformer_pdf.load() + distributed_pdf.load()
    all_split_docs = text_splitter.split_documents(loaded)
    # add to qdrant
    qdrant.add_documents(all_split_docs)

In [7]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_community.llms import Ollama
from langchain import hub

llm = Ollama(model="llama3")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


prompt = hub.pull("rlm/rag-prompt-llama")

llm_chain = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain = RunnableParallel(
    {"context": qdrant_receiver, "question": RunnablePassthrough()}
).assign(answer=llm_chain)

In [8]:
output = dict()
curr_key = None
for chunk in rag_chain.stream("What it self-attention mechanism in transformers?"):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            if isinstance(chunk[key], list):
                if not chunk[key]:
                    print(f"\n\n{key}: []", end="", flush=True)
                else:
                    print(f"\n\n{key}: [{type(chunk[key][0])}] len {len(chunk[key])}", end="", flush=True)
            else:
                print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key



question: What it self-attention mechanism in transformers?

context: [<class 'langchain_core.documents.base.Document'>] len 4

answer: [/INST] The self-attention mechanism in transformers allows each token in a sequence to attend to every other token and weigh their importance, helping the model understand relationships between tokens. This is different from traditional recurrent neural networks (RNNs) that only consider sequential information. By allowing tokens to attend to each other, self-attention enables the model to capture long-range dependencies and context. [/INST]

In [9]:
output

{'question': 'What it self-attention mechanism in transformers?',
 'context': [Document(page_content='We employ three types of regularization during training:\n7', metadata={'author': '', 'creationDate': 'D:20240410211143Z', 'creator': 'LaTeX with hyperref', 'file_path': 'attention.pdf', 'format': 'PDF 1.5', 'keywords': '', 'modDate': 'D:20240410211143Z', 'page': 6, 'producer': 'pdfTeX-1.40.25', 'source': 'attention.pdf', 'subject': '', 'title': '', 'total_pages': 15, 'trapped': '', '_id': 'afa40854-8d2e-43cb-baed-90eae89b5aab', '_collection_name': 'pdfs'}),
  Document(page_content='arXiv:2003.04974v1  [cs.CL]  2 Mar 2020', metadata={'author': '', 'creationDate': 'D:20200402090304Z', 'creator': 'LaTeX with hyperref package', 'file_path': 'transformer++.pdf', 'format': 'PDF 1.5', 'keywords': '', 'modDate': 'D:20200402090304Z', 'page': 0, 'producer': 'pdfTeX-1.40.17', 'source': 'transformer++.pdf', 'subject': '', 'title': '', 'total_pages': 7, 'trapped': '', '_id': '36a7b9c8-5fe5-4bb9-9b