# Setup

Install all needed dependencies.

In [None]:
!pip install --quiet --upgrade transformers datasets faiss-cpu

#  Hugging Face RAG

In [None]:
# Models
encoder_model = "facebook/dpr-ctx_encoder-multiset-base"
generator_model = "facebook/rag-sequence-nq"

Prepare chunk dataset.

In [None]:
import urllib.request
from datasets import Dataset

link = "https://huggingface.co/ngxson/demo_simple_rag_py/raw/main/cat-facts.txt"
dataset_list = []

# Retrieve knowledge from provided link, use every line as a separate chunk.
for line in urllib.request.urlopen(link):
    dataset_list.append({"text": line.decode('utf-8'), "title": "cats"})

print(f'Loaded {len(dataset_list)} entries')

dataset = Dataset.from_list(dataset_list)

Encode dataset chunks into embeddings (vector representations), append embeddings into dataset.

Add faiss index for similarity search.

In [None]:
from transformers import (
    DPRContextEncoder,
    DPRContextEncoderTokenizerFast,
)
import torch


torch.set_grad_enabled(False)

ctx_encoder = DPRContextEncoder.from_pretrained(encoder_model)
ctx_tokenizer = DPRContextEncoderTokenizerFast.from_pretrained(encoder_model)
ds_with_embeddings = dataset.map(lambda example: {'embeddings': ctx_encoder(**ctx_tokenizer(example["text"], return_tensors="pt"))[0][0].numpy()})
ds_with_embeddings.add_faiss_index(column='embeddings')


**Specify user query here**

In [None]:
input_query = "what is the name of the tiniest cat"

Generate response for user query using context from dataset.

In [None]:
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration

tokenizer = RagTokenizer.from_pretrained(generator_model)

# Construct retriever to return relevant context from dataset
retriever = RagRetriever.from_pretrained(
    generator_model, index_name="custom", indexed_dataset=ds_with_embeddings
)

model = RagSequenceForGeneration.from_pretrained(generator_model, retriever=retriever)

# Move model to GPU
device = 0
model = model.to(device)

input_dict = tokenizer.prepare_seq2seq_batch(input_query, return_tensors="pt").to(device)

generated = model.generate(input_ids=input_dict["input_ids"])
print(tokenizer.batch_decode(generated, skip_special_tokens=True)[0])

# Cleaning Up

Delete model from GPU.

In [None]:
import torch


del model, input_dict
torch.cuda.empty_cache()