In [2]:
from langchain.document_loaders import ArxivLoader

docs = ArxivLoader(query="Retrieval Augmented Generation", load_max_docs=5).load()

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,# the character length of the chunk
    chunk_overlap = 200,# the character length of the overlap between chunks
    length_function =len # the length function - in this case, character length (aka the python len() fn.)
)

split_chunks = text_splitter.split_documents(docs)

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings

model_name = 'sentence-transformers/all-mpnet-base-v2'
model_kwargs = {'device': "cpu"}
encode_kwargs = {'normalize_embeddings': False}

hf_embeddings = HuggingFaceEmbeddings(model_name = model_name, model_kwargs = model_kwargs, encode_kwargs = encode_kwargs)




In [5]:
from langchain.vectorstores import FAISS

faiss_vectorstore = FAISS.from_documents(
    embedding=hf_embeddings,
    documents=split_chunks,
    
)

In [10]:
from langchain.llms import HuggingFaceHub
huggingfacehub_api_token = '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'
llm = HuggingFaceHub(repo_id = 'mistralai/Mistral-7B-Instruct-v0.1',huggingfacehub_api_token=huggingfacehub_api_token)

In [11]:
from langchain.chains import RetrievalQA

k = 4
qa = RetrievalQA.from_chain_type(
    llm= llm,
    retriever=faiss_vectorstore.as_retriever(search_kwargs={"k" : k})
)
  

In [12]:
qa("What is Retrieval Augmented Generation?")

  warn_deprecated(


{'query': 'What is Retrieval Augmented Generation?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nboost the quality of ﬁnal generation. To this end,\nCai et al. (2021) propose to unify the memory\nretriever and its downstream generation model\ninto a learnable whole. Such memory retrieval is\nend-to-end optimized for task-speciﬁc objectives.\n2.4\nIntegration\nData Augmentation\nThere are several ways to\nintegrate the retrieved external memory in gener-\nation. One straightforward way is data augmen-\ntation, which constructs some augmented inputs\nby concatenating spans from {⟨xr, yr⟩} with the\noriginal input x. By training on the augmented\ninputs, a generation model implicitly leans how\nto integrate the retrieved information. Despite the\nsimplicity, this kind of methods works efﬁciently\nin lots of tasks (Song et al., 2016; Weston et al.,\n2018; Bu

In [13]:
qa("What process is used to update the model's weights?")

{'query': "What process is used to update the model's weights?",
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nGuangjing Wang, Kai Zhang, Cheng Ji, Qiben Yan,\nLifang He, Hao Peng, Jianxin Li, Jia Wu, Ziwei Liu,\nPengtao Xie, Caiming Xiong, Jian Pei, Philip S. Yu,\nand Lichao Sun. 2023. A comprehensive survey on\npretrained foundation models: A history from bert to\nchatgpt. arXiv preprint arXiv:2302.09419.\nA\nExperimental Settings\nA.1\nTraining Hyperparameters\nWe take the ANCE initialized from T5Base3 (Xiong\net al., 2021; Ge et al., 2023) and Contriever4 (Izac-\nard et al., 2021)’s hyperparameters in the\naugmentation-adapted training. Specifically, we fix\nbatch size as 8, learning rate as 5e-6, and epochs as\n6 for ANCE while taking batch size as 8, learning\nrate as 1e-5, and epochs as 3 for Contriever. We\nchoose their best checkpoints based on th