In [None]:
# !pip install -U langchain langchain-community langchainhub openai chromadb bs4

In [None]:
# !pip install -U gpt4all

In [None]:
# pip install tiktoken

In [None]:
# %pip install gpt4all > /dev/null

In [None]:
# !pip install huggingface_hub

In [None]:
# !pip install faiss-gpu

In [None]:
# %pip install transformers --quiet

In [39]:
# required for ChatLiteLLM
# !pip install google-generativeai 
# pip install litellm

In [1]:
from getpass import getpass
import os
from langchain_community.llms import HuggingFaceHub
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import hub
from langchain_core.messages import AIMessage, HumanMessage

## Setting up HuggingFace Hub
https://python.langchain.com/docs/integrations/llms/huggingface_hub

Get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token

In [2]:
HUGGINGFACEHUB_API_TOKEN = getpass()

········


In [3]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

### Testing with a LLM
To make sure that the HF API works

In [4]:
question = "Who won the FIFA World Cup in the year 1994? "

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [5]:
prompt

PromptTemplate(input_variables=['question'], template="Question: {question}\n\nAnswer: Let's think step by step.")

In [6]:
repo_id = "google/flan-t5-xxl"

llm = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_length": 64}
)
llm_chain = LLMChain(prompt=prompt, llm=llm)

print(llm_chain.run(question))



The 1994 FIFA World Cup was held in France. France won the 1994 FIFA World Cup. The answer: France.


## Setting up for Q&A
https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa

### Load

USSD GIS Data page

In [7]:
# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("s-lib-box-content"))
loader = WebBaseLoader(
    web_paths=("https://ucsd.libguides.com/gis/gisdata",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

In [8]:
len(docs[0].page_content) # number of str char

5321

In [9]:
# print(docs[0].page_content)

Now we have the GIS data page saved as `docs`

### Split

We’ll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it

In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [11]:
len(all_splits) # total number of splitted doc

10

In [12]:
len(all_splits[0].page_content)

39

In [13]:
all_splits[1]

Document(page_content="UC San Diego affiliate can browse the Library's geospatial data holdings that are hosted on the X drive in our Data & GIS Lab. You must be connected to UCSD VPN in order to browse. This is for BROWSING ONLY. You CANNOT DOWNLOAD any files. You must visit the Lab to access files until we are able to publish the data into an online portal (currently in development)\nThe data is organized geographically, roughly by continent, with topical data arrangement inside each folder.\nRemember, if you are looking for data on a smaller area of geography, be sure to check the folder with the larger geography first.\xa0 An example would be if you are looking for data for only one of the states in the United States, you would want to look in the United States Data folder as well as the North America data folder.", metadata={'source': 'https://ucsd.libguides.com/gis/gisdata', 'start_index': 42})

### Store
https://python.langchain.com/docs/integrations/text_embedding/huggingfacehub

In [14]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
from langchain_community.vectorstores import Chroma, FAISS

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
)
# vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings)
vectorstore = FAISS.from_documents(documents=all_splits, embedding=embeddings)


In [15]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x7f93a66f41c0>

### Retrieve relevant documents

Options:
1. Chroma (example from LangChain)
2. FAISS (Facebook AI Similarity Search) - using this because it has score function


In [20]:
# Retrieve relevant documents given a query, i.e. question
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [21]:
query = "What is X drive?"

In [22]:
retrieved_docs = retriever.get_relevant_documents(query)

In [23]:
len(retrieved_docs) # should have the same number as `k` specified above

2

In [24]:
print(retrieved_docs[1].page_content)

UC San Diego affiliate can browse the Library's geospatial data holdings that are hosted on the X drive in our Data & GIS Lab. You must be connected to UCSD VPN in order to browse. This is for BROWSING ONLY. You CANNOT DOWNLOAD any files. You must visit the Lab to access files until we are able to publish the data into an online portal (currently in development)
The data is organized geographically, roughly by continent, with topical data arrangement inside each folder.
Remember, if you are looking for data on a smaller area of geography, be sure to check the folder with the larger geography first.  An example would be if you are looking for data for only one of the states in the United States, you would want to look in the United States Data folder as well as the North America data folder.


#### To view scores

Note that **the returned distance score is L2 distance. Therefore, a lower score is better.**

In [25]:
vectorstore.similarity_search_with_score(query)

[(Document(page_content='Browse the GIS data holdings on X drive', metadata={'source': 'https://ucsd.libguides.com/gis/gisdata', 'start_index': 1}),
  0.84911835),
 (Document(page_content="UC San Diego affiliate can browse the Library's geospatial data holdings that are hosted on the X drive in our Data & GIS Lab. You must be connected to UCSD VPN in order to browse. This is for BROWSING ONLY. You CANNOT DOWNLOAD any files. You must visit the Lab to access files until we are able to publish the data into an online portal (currently in development)\nThe data is organized geographically, roughly by continent, with topical data arrangement inside each folder.\nRemember, if you are looking for data on a smaller area of geography, be sure to check the folder with the larger geography first.\xa0 An example would be if you are looking for data for only one of the states in the United States, you would want to look in the United States Data folder as well as the North America data folder.", me

### Generate

#### Using HuggingFace LLMs

Note: 
- Use [`HuggingFacePipeline`](https://python.langchain.com/docs/integrations/llms/huggingface_pipelines) to use [LCEL chain](https://python.langchain.com/docs/expression_language/why).

- Using `HuggingFaceHub` requires another way of creating chain, see [here](https://python.langchain.com/docs/integrations/llms/huggingface_hub)

In [26]:
repo_id = "google/flan-t5-xxl"

llm = HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 1000}
)



In [28]:
llm.invoke("What is UCSD")

'University of California, San Diego'

In [29]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser())

In [30]:
rag_chain.invoke("what is X drive")

"X drive is a drive where UC San Diego affiliate can browse the Library's geospatial data holdings."

In [31]:
from langchain.chains import LLMChain

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""

prompt = PromptTemplate(template=template, input_variables=["question", "context"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [32]:
prompt

PromptTemplate(input_variables=['context', 'question'], template='Use the following pieces of context to answer the question at the end.\nIf you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\nUse three sentences maximum and keep the answer as concise as possible.\nAlways say "thanks for asking!" at the end of the answer.\n\n{context}\n\nQuestion: {question}\n\nHelpful Answer:')

In [33]:
print(llm_chain.run({"question": "What is X drive at UC San Diego Library? Explain what is in it?", 
                     "context": retriever | format_docs}))

X drive is a hard drive in the computer system that contains the operating system and other software that is required to use the library's computers and network.


#### Thoughts
Notice how it doesn't say "thanks for asking" at the end of the answer, this is because the model is meant for question-answering tasks. Feeding in context is not guaranteed to work as well because it largely depends on the data this model was trained on. So we either:
1. Train/finetune this question-answering model
2. Use another type of LLM model that generate responses from context but not actually "answering" - perhaps this is called a chat model?

Some more useful information https://stackoverflow.com/questions/76963864/low-score-and-wrong-answer-for-flan-t5-xxl-question-answering-task

In [34]:
retriever | format_docs

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceInferenceAPIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f93a66f41c0>, search_kwargs={'k': 2})
| RunnableLambda(format_docs)

#### Using chat models
https://python.langchain.com/docs/integrations/chat/litellm

In [46]:
repo_id = "HuggingFaceH4/zephyr-7b-beta"

llm = HuggingFaceHub(
    repo_id=repo_id, 
    task="text-generation",
    model_kwargs={"temperature": 0.1, "max_length": 1000}
)



In [47]:
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

The context is: {context}

Question: {question}
"""

prompt = PromptTemplate(template=template, input_variables=["question", "context"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

In [48]:
print(llm_chain.run({"question": "What is X drive?", 
                     "context": retriever | format_docs}))


Answer: X drive refers to a network storage device commonly used in organizations to share files among employees. It allows users to access and save files from any computer connected to the network. Thanks for asking!


In [49]:
prompt = hub.pull("rlm/rag-prompt")
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [50]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [51]:
for chunk in rag_chain.stream("What is X drive?"):
    print(chunk, end="", flush=True)

 X drive is a storage location for UC San Diego's geospatial data holdings, accessible through the Library's Data & GIS Lab. It can only be browsed while connected to UCSD VPN, and downloading is not allowed. The data is organized by continent and topic within each folder. It's recommended to check larger geographic folders for data within smaller areas.

Confirmed that Retriever + Chat Model works!!

## With Chat History

In [None]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [None]:
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_chain = qa_prompt | llm | StrOutputParser()

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [None]:
chat_history = []

question = "What is X drive?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "What can I find in the X drive?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

### Adding chat history memory
https://python.langchain.com/docs/expression_language/how_to/message_history

For building web app:
1. https://www.youtube.com/watch?v=rxOeOD98AEU
2. https://www.youtube.com/watch?v=O6BB08Zo2uk
3. https://www.youtube.com/watch?v=-XirZSq6Wcs