# Retrieval-augmented generation with LangChain

## References: 
1. [LangChain docs - question-answering](https://python.langchain.com/docs/use_cases/question_answering/)
1. [Medium/the-techlife - Using HF and Cohere models](https://medium.com/the-techlife/using-huggingface-openai-and-cohere-models-with-langchain-db57af14ac5b)
1. [Lightning AI - Comparing different language models for question-answering](https://lightning.ai/pages/community/community-discussions/the-ultimate-battle-of-language-models-lit-llama-vs-gpt3.5-vs-bloom-vs/)

In [6]:
!pip install langchain
# !pip install openai
!pip install sentence_transformers
# !pip install chromadb

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting langchain
  Downloading langchain-0.0.265-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting langsmith<0.1.0,>=0.0.11 (from langchain)
  Downloading langsmith-0.0.22-py3-none-any.whl (32 kB)
Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain)
  Downloading openapi_schema_pydantic-1.2.4-py3-none-any.whl (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openapi-schema-pydantic, langsmith, langchain
Successfully installed langchain-0.0.265 langsmith-0.0.22 openap

In [2]:
from langchain import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import WebBaseLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import FAISS, Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import HuggingFacePipeline
from sentence_transformers import SentenceTransformer



In [None]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer('all-MiniLM-L6-v2')

#Sentences are encoded by calling model.encode()
emb1 = model.encode("This is a red cat with a hat.")
emb2 = model.encode("Have you seen my red cat?")

cos_sim = util.cos_sim(emb1, emb2)
print("Cosine-Similarity:", cos_sim)

template = PromptTemplate(input_variables=["input"], template="{input}")
chain = LLMChain(llm=model, verbose=True, prompt=template)
chain("What is the meaning of life?")

In [None]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline

model= pipeline(model="google/flan-t5-large") #'text2text-generation'
model.save_pretrained("~/flan-t5-large")
llm = HuggingFacePipeline.from_model_id(model_id="~/flan-t5-large", task="text2text-generation", model_kwargs={"temperature":9})

In [11]:
template = PromptTemplate(input_variables=["input"], template="{input}")
chain = LLMChain(llm=llm, verbose=True, prompt=template)
chain("What is the meaning of life?")



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWhat is the meaning of life?[0m

[1m> Finished chain.[0m


{'input': 'What is the meaning of life?', 'text': 'love'}

In [1]:
# https://github.com/AndreasFischer1985/code-snippets/blob/master/py/LangChain_HuggingFace_examples.py
from transformers import pipeline
model= pipeline(model="sentence-transformers/all-MiniLM-L6-v2") 
model.save_pretrained("~/all-MiniLM-L6-v2")


caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


Downloading (…)lve/main/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

In [39]:
HUGGING_FACE_API_KEY = "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"

model = HuggingFacePipeline.from_model_id(
    model_id="sentence-transformers/all-MiniLM-L6-v2",
    task="text-generation",
    model_kwargs={"temperature": 0, "max_length": 512},
)

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
embeddings = (
    HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-MiniLM-L6-v2', 
        encode_kwargs={"max_new_tokens": 512}
    )
)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BertLMHeadModel were not initialized from the model checkpoint at sentence-transformers/all-MiniLM-L6-v2 and are newly initialized: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [40]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={'max_new_tokens': 512}, multi_process=False)

In [41]:
index = (
    VectorstoreIndexCreator(
        embedding=embeddings, 
        vectorstore_cls=Chroma,
        text_splitter=CharacterTextSplitter(chunk_size=100,chunk_overlap=0)
    ).from_loaders([loader])
)

In [25]:
model

HuggingFacePipeline(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7f41118b1990>, model_id='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'temperature': 0, 'max_length': 512}, pipeline_kwargs={})

In [26]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)

In [27]:
index

VectorStoreIndexWrapper(vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x7f4111ae6f50>)

In [34]:
index.query("Compare chain of though vs tree of thought methods", llm=model)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2452 > 512). Running this sequence through the model will result in indexing errors
Input length of input_ids is 2452, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.
