# Open Source RAG - Leveraging Hugging Face Endpoints through LangChain


# Task 1: Install required libraries

In [2]:
pip install -qU langchain-huggingface langchain-community faiss-cpu

Note: you may need to restart the kernel to use updated packages.


# Task 2: Set Environment Variables

In [3]:
import os
import getpass

os.environ["HF_TOKEN"] = getpass.getpass("HuggingFace Write Token: ")

# Task 3: Creating LangChain components powered by the endpoints

## HuggingFaceEndpoint for LLM

In [4]:
YOUR_LLM_ENDPOINT_URL = "https://hafvk9pb2bx15vzu.us-east-1.aws.endpoints.huggingface.cloud"

In [6]:
from langchain_community.llms import HuggingFaceEndpoint

hf_llm = HuggingFaceEndpoint(
    endpoint_url=f"{YOUR_LLM_ENDPOINT_URL}",
    max_new_tokens=512,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.01,
    repetition_penalty=1.03,
    huggingfacehub_api_token=os.environ["HF_TOKEN"]
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /Users/waseem/.cache/huggingface/token
Login successful


In [7]:
hf_llm.invoke("Hello, how are you?")

" I hope you're having a great day! I just wanted to say that I'm really enjoying your blog and the information you share. It's always great to learn something new and I appreciate the effort you put into creating such a valuable resource. Keep up the good work! Best regards, [Your Name]\nI hope this message is helpful. Let me know if you have any questions or need further assistance. Thank you for your time and consideration. Best regards, [Your Name]\nI hope this message is helpful. Let me know if you have any questions or need further assistance. Thank you for your time and consideration. Best regards, [Your Name]\nI hope this message is helpful. Let me know if you have any questions or need further assistance. Thank you for your time and consideration. Best regards, [Your Name]\nI hope this message is helpful. Let me know if you have any questions or need further assistance. Thank you for your time and consideration. Best regards, [Your Name]\nI hope this message is helpful. Let me

## Now we can add a RAG-style prompt using Llama 3 Instruct's prompt templating!

In [8]:
from langchain_core.prompts import PromptTemplate

RAG_PROMPT_TEMPLATE = """\
<|start_header_id|>system<|end_header_id|>
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>

<|start_header_id|>user<|end_header_id|>
User Query:
{query}

Context:
{context}<|eot_id|>

<|start_header_id|>assistant<|end_header_id|>
"""

rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

In [9]:
rag_chain = rag_prompt | hf_llm

In [11]:
rag_chain.invoke({"query" : "How old is Carl?", "context" : "Carl is a sweet dude, he's 40."})

'According to the context, Carl is 40 years old.'

# HuggingFaceInferenceAPIEmbeddings

In [13]:
from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings

YOUR_EMBED_MODEL_URL = "https://ojuuf3ohxq99ad1c.us-east-1.aws.endpoints.huggingface.cloud"

hf_embeddings = HuggingFaceEndpointEmbeddings(
    model=YOUR_EMBED_MODEL_URL,
    task="feature-extraction",
    huggingfacehub_api_token=os.environ["HF_TOKEN"],
)

In [14]:
import numpy as np
from numpy.linalg import norm

def cosine_similarity(phrase_1, phrase_2):
  vec_1 = hf_embeddings.embed_documents([phrase_1])[0]
  vec2_2 = hf_embeddings.embed_documents([phrase_2])[0]
  return np.dot(vec_1, vec2_2) / (norm(vec_1) * norm(vec2_2))

In [15]:
cosine_similarity("I love my fluffy dog!", "I adore this furry puppy!")


0.8903063446222079

In [16]:
cosine_similarity("I love my fluffy dog!", "Eating pizza is the worst! Yuck!")


0.743020791930313

# Task 4: Preparing Data!

In [18]:
!git clone https://github.com/dbredvick/paul-graham-to-kindle.git

Cloning into 'paul-graham-to-kindle'...
remote: Enumerating objects: 36, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 36 (delta 3), reused 31 (delta 1), pack-reused 0[K
Receiving objects: 100% (36/36), 2.35 MiB | 331.00 KiB/s, done.
Resolving deltas: 100% (3/3), done.


In [19]:
from langchain_community.document_loaders import TextLoader

document_loader = TextLoader("./paul-graham-to-kindle/paul_graham_essays.txt")
documents = document_loader.load()

In [20]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
split_documents = text_splitter.split_documents(documents)
len(split_documents)

4265

In [21]:
from langchain_community.vectorstores import FAISS

for i in range(0, len(split_documents), 32):
  if i == 0:
    vectorstore = FAISS.from_documents(split_documents[i:i+32], hf_embeddings)
    continue
  vectorstore.add_documents(split_documents[i:i+32])

In [22]:
hf_retriever = vectorstore.as_retriever()

# Task 5: Simple LCEL RAG Chain

In [24]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

lcel_rag_chain = {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")} | rag_prompt | hf_llm


In [25]:
lcel_rag_chain.invoke({"query" : "What is the best part of Silicon Valley?"})


'Based on the provided context, it seems that Paul Graham, the author, is discussing the shortcomings of Silicon Valley and suggesting ways to improve it. He mentions that the best part of Silicon Valley is not the physical buildings, but the people who make it Silicon Valley.\n\nHowever, he also criticizes the current state of Silicon Valley, saying that it\'s not very interesting and that the strip development is demoralizing. He suggests that the area needs improvement in terms of public transportation and that it should be designed to prioritize walking, cycling, and taking the train over driving.\n\nSo, the "best part" of Silicon Valley, according to Paul Graham, is the people, but the area itself has a lot of room for improvement.'