# LangChain RAG 201

Tentative techniques to explore:

1. MultiQueryRetriever
2. Contextual Compression
3. Ensemble Retrievers
4. Self-quering Retrievers
5. Time weighted vector store retrievers

In [None]:
%pip install --q langchain-community
%pip install --q tiktoken
%pip install --q chromadb
%pip install --q langchain
#################################
# Required for PaperSpace Gradient
# %pip install --q pysqlite3-binary
# %pip install --q typing-inspect==0.8.0 typing_extensions==4.5.0
# %pip install --q pydantic==1.10.8

In [None]:
!python --version
!nvidia-smi

### Configure Sqlite

In [None]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [None]:
# %> curl -fsSL https://ollama.com/install.sh | sh
# %> ollama serve
# %> ollama pull gemma:7b-instruct
# %> ollama pull nomic-embed-text
# %> ollama pull mistral:instruct
# %> ollama pull mixtral:instruct

!ollama list

## Experiment Configuration

In [2]:
# LLM_MODEL = "gemma:7b-instruct"
# LLM_MODEL = "mistral:instruct"
LLM_MODEL = "mixtral:instruct"
EMBEDDING_MODEL = "nomic-embed-text"
TEMPERATURE = 0.9
ENABLE_TRACING = False
### Gemma
# DOCUMENT_CHUNK_SIZE=5000
###
### Mistral/Mixtral
DOCUMENT_CHUNK_SIZE = 7500
###
CHUNK_OVERLAP = 100

### Test LLM generation

In [3]:
from langchain.llms import Ollama
# from langchain.callbacks.manager import CallbackManager
# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = Ollama(
    model=LLM_MODEL,
    #callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    temperature=TEMPERATURE,
)

llm("Who are you?")

  warn_deprecated(


' I am a Large Language Model trained by Mistral AI. I was designed to generate human-like text based on the input that I receive. My purpose is to provide assistance, answer questions, and engage in conversation with users.'

In [None]:
# Optional: LangSmith API keys
import os
import getpass

os.environ["LANGCHAIN_TRACING_V2"] = str(ENABLE_TRACING)
if ENABLE_TRACING:
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
    os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LANGCHAIN_API_KEY")

### Embedding

In [None]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings_nomic = OllamaEmbeddings(model=EMBEDDING_MODEL)
text = "Embed this text"
embed = embeddings_nomic.embed_query(text)
len(embed)

In [None]:
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
doc_list = [item for sublist in docs for item in sublist]

## Splitting

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=DOCUMENT_CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
)
doc_splits = text_splitter.split_documents(doc_list)

In [None]:
import tiktoken

encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
for d in doc_splits:
    print("The document is %s tokens" % len(encoding.encode(d.page_content)))

## Index

In [None]:
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

vector_store = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embeddings_nomic,
)
retriever = vector_store.as_retriever()

In [None]:
retriever.get_relevant_documents("What is task decomposition")

### Add to vector db

In [None]:
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model_local = ChatOllama(model=LLM_MODEL)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)


## Prompt Queries

In [None]:
chain.invoke("What is Task Decomposition?")