In [89]:
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.schema import Document
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
import os
import faiss
import pathlib
import json

In [81]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [44]:
DATA_PATH = pathlib.Path.cwd().parent / "data/arxiv_papers.json"

with open(DATA_PATH, "r") as f:
    arxiv_papers = json.load(f)

In [45]:
arxiv_papers[0]

{'id': 'http://arxiv.org/abs/2501.13881v1',
 'guidislink': True,
 'link': 'http://arxiv.org/abs/2501.13881v1',
 'updated': '2025-01-23T17:56:07Z',
 'updated_parsed': [2025, 1, 23, 17, 56, 7, 3, 23, 0],
 'published': '2025-01-23T17:56:07Z',
 'published_parsed': [2025, 1, 23, 17, 56, 7, 3, 23, 0],
 'title': 'The machine learning platform for developers of large systems',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'http://export.arxiv.org/api/query?search_query=abs:RAG&start=0&max_results=1000&sortBy=lastUpdatedDate&sortOrder=descending',
  'value': 'The machine learning platform for developers of large systems'},
 'summary': 'The machine learning system in the form of Retrieval Augmented Generation\n(RAG) has developed steadily since about 2021. RAG could be observed as a\nversion of the knowledge transfer. In the studied case, the large computing\nsystems are observed as the application point of RAG, which includes large\nlanguage model (LLM), as a partner for

In [51]:
docs = []
for paper in arxiv_papers:
    title = paper["title"]
    abstract = paper["summary"]
    link = paper["link"]
    paper_content = f"Title: {title}\nAbstract: {abstract}"
    paper_content = paper_content.lower()

    docs.append(Document(page_content=paper_content,
                         metadata={"link": link}))

In [52]:
docs[0]

Document(metadata={'link': 'http://arxiv.org/abs/2501.13881v1'}, page_content='title: the machine learning platform for developers of large systems\nabstract: the machine learning system in the form of retrieval augmented generation\n(rag) has developed steadily since about 2021. rag could be observed as a\nversion of the knowledge transfer. in the studied case, the large computing\nsystems are observed as the application point of rag, which includes large\nlanguage model (llm), as a partner for the developing team. such an approach\nhas advantages during the development process and further in exploitation time.')

In [53]:
# Load a pre-trained embedding model
embedding_model = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

In [None]:
index = faiss.IndexFlatL2(len(embedding_model.embed_query("Hello LLM")))

In [None]:
vector_store = FAISS(
    embedding_function=embedding_model,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [55]:
vector_store.add_documents(docs)

['555e8837-697b-4db1-9468-59c808963eb6',
 '9d00b276-2f9e-463a-b991-2a7b165e6ec3',
 '3dde76df-4bc7-485e-81c8-d85e6516b320',
 '99c0d5e3-edff-4968-9b95-4f88a245b0b1',
 '774b4fc7-171b-478b-93d4-5f49c9eb22ce',
 'c9b3d448-2662-4f7c-ad32-968923c6e104',
 '5327666e-14b7-4bbd-bda6-3fbdb9248d88',
 'e843454f-6211-4cf6-9ae8-ff818e14f0c9',
 '917be31f-b33e-4c33-9fe6-63745104b935',
 'b90f9aac-b1ee-48ad-9b73-8ddb020ad3db',
 'ae5c566c-8df8-45aa-8b0e-93bd38d7abbe',
 'f0a3f5dc-4c4c-4d8a-b391-fbd87da86f05',
 '3190921c-ba57-441a-9c5c-f14f4cc0c4b8',
 '5c9fdd1d-967e-462d-be12-f6c44faa57fe',
 'd64227b9-d23b-414e-93a9-92ca3b7d4c65',
 'd32a57a0-f439-449f-8ec5-3a2d672b2882',
 'ea11655c-7dbd-4a9e-a6a4-a14f582e28ab',
 'dcae81f1-ff69-4c78-a85b-a4c4999bffea',
 '8ce62ed0-ede3-41b4-ac8a-f359ad270a91',
 '98d8161c-4c25-4883-af68-b3647956177a',
 'c13007ff-a1c9-4ea2-955e-709449542e79',
 'f88991c0-9c7e-41e5-b13e-4a5c965837da',
 '73303c3c-7b5d-45ba-92bc-8b1611eb5390',
 '1bf4746b-cc39-4e23-a8c0-e8ed1255d871',
 '5b1194d5-dfb3-

In [57]:
query = "Is there any LLM paper for autonomous driving?"

In [58]:
vector_store.similarity_search_with_relevance_scores(query)

[(Document(id='549b9eef-0e8d-401a-b9ce-930a92dd8962', metadata={'link': 'http://arxiv.org/abs/2501.03535v2'}, page_content='title: senserag: constructing environmental knowledge bases with proactive\n  querying for llm-based autonomous driving\nabstract: this study addresses the critical need for enhanced situational awareness in\nautonomous driving (ad) by leveraging the contextual reasoning capabilities of\nlarge language models (llms). unlike traditional perception systems that rely\non rigid, label-based annotations, it integrates real-time, multimodal sensor\ndata into a unified, llms-readable knowledge base, enabling llms to dynamically\nunderstand and respond to complex driving environments. to overcome the\ninherent latency and modality limitations of llms, a proactive\nretrieval-augmented generation (rag) is designed for ad, combined with a\nchain-of-thought prompting mechanism, ensuring rapid and context-rich\nunderstanding. experimental results using real-world vehicle-to-ev

In [59]:
retriever = vector_store.as_retriever()

In [84]:
# Define a prompt template with placeholders
prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template="""
        You are an intelligent assistant that helps answer questions about research papers. 
        The user will provide a question related to the papers you know about. 
        Use the information in the papers to respond in a concise and informative manner. 
        The question is: {query}

        Here is some information about the papers:
        {context}

        Provide a response based on the context above.
    """
)

In [90]:
llm = HuggingFaceHub(repo_id="tiiuae/falcon-7b-instruct")

In [None]:
qa_chain = load_qa_chain(
    llm=llm,
    chain_type="stuff",  # or "map_reduce", "refine", etc. based on your needs
    retriever=retriever,  # Pass your retriever here
    prompt=prompt_template
)

# qa = RetrievalQA.from_chain_type(
#     llm=llm, chain_type="stuff",  retriever=retriever, prompt=prompt_template)

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  load_qa_chain(llm=llm, prompt=prompt_template,


ValidationError: 1 validation error for StuffDocumentsChain
retriever
  Extra inputs are not permitted [type=extra_forbidden, input_value=VectorStoreRetriever(tags...6A30>, search_kwargs={}), input_type=VectorStoreRetriever]
    For further information visit https://errors.pydantic.dev/2.10/v/extra_forbidden

In [None]:
query = "How is LLM used in autonomous driving?"
response = qa_chain.run({"query":query})
response

{'query': 'How is LLM used in autonomous driving?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\ntitle: senserag: constructing environmental knowledge bases with proactive\n  querying for llm-based autonomous driving\nabstract: this study addresses the critical need for enhanced situational awareness in\nautonomous driving (ad) by leveraging the contextual reasoning capabilities of\nlarge language models (llms). unlike traditional perception systems that rely\non rigid, label-based annotations, it integrates real-time, multimodal sensor\ndata into a unified, llms-readable knowledge base, enabling llms to dynamically\nunderstand and respond to complex driving environments. to overcome the\ninherent latency and modality limitations of llms, a proactive\nretrieval-augmented generation (rag) is designed for ad, combined with a\nchain-of-thought prompting mech

In [82]:
response['result']

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\ntitle: senserag: constructing environmental knowledge bases with proactive\n  querying for llm-based autonomous driving\nabstract: this study addresses the critical need for enhanced situational awareness in\nautonomous driving (ad) by leveraging the contextual reasoning capabilities of\nlarge language models (llms). unlike traditional perception systems that rely\non rigid, label-based annotations, it integrates real-time, multimodal sensor\ndata into a unified, llms-readable knowledge base, enabling llms to dynamically\nunderstand and respond to complex driving environments. to overcome the\ninherent latency and modality limitations of llms, a proactive\nretrieval-augmented generation (rag) is designed for ad, combined with a\nchain-of-thought prompting mechanism, ensuring rapid and context-rich\nunderstanding. experime