<a href="https://colab.research.google.com/github/Fahad-Aslam/LangChain/blob/main/LangChain_Chains.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain

In [None]:
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.indexes.vectorstore import VectorstoreIndexCreator
from langchain.document_loaders import TextLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')




documents = TextLoader('/content/drive/MyDrive/docs.txt').load()


# Loading the model

In [None]:
!pip install huggingface_hub
!pip install langchain

In [None]:
# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token

from getpass import getpass

HUGGINGFACEHUB_API_TOKEN = getpass()

In [None]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [None]:
from langchain import HuggingFaceHub
from langchain import PromptTemplate, LLMChain

In [None]:
#repo_id = "google/flan-t5-xxl"
repo_id = "tiiuae/falcon-7b-instruct"

In [None]:
llm= HuggingFaceHub(
    repo_id=repo_id, model_kwargs={"temperature": 0.5,
                                   "max_new_tokens":400,
                                   "max_length": 300,
                                   "num_return_sequences":300,
                                   "batch_size":5
                                  #  "top_k": 50,
                                  #  "top_p": .95,
                                  #  "do_sample": True,
                                  #  "early_stopping": False,
                                  #  "num_beams": 1,
                                  #  "no_repeat_ngram_size": 3
                                   }
)

# Embedding the document and chunking it into 1000 words/chunk

In [None]:
!pip install sentence_transformers
!pip install chromadb==0.3.29


In [None]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import VectorDBQA


In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
#embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embeddings = SentenceTransformerEmbeddings(model_name="tiiuae/falcon-7b-instruct")

In [None]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)



In [None]:
# retriever = FAISS.from_documents(texts, llm).as_retriever()
vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings)
retriever = vectorstore.as_retriever()



In [None]:
query = "what is finma"
docs = retriever.get_relevant_documents(query)

In [None]:
print(docs)

# Loading QA chain

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

# 1 Quick Start

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")
query = "what is finma"
chain.run(input_documents=docs, question=query)

# 2 The stuff Chain

In [None]:
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "how does cobra ensures liquidity for banks"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

### Custom Prompts

In [None]:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer in URDU:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

# 3 The map_reduce Chain

In [None]:
chain = load_qa_chain(llm, chain_type="map_reduce")

In [None]:
query = "What is finma?"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

### Intermediate Steps

In [None]:
chain = load_qa_chain(llm, chain_type="map_reduce", return_map_steps=True)

In [None]:
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

### Custom Prompts

In [None]:
question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question.
Return any relevant text translated into italian.
{context}
Question: {question}
Relevant text, if any, in Italian:"""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer italian.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

QUESTION: {question}
=========
{summaries}
=========
Answer in Italian:"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "question"]
)
chain = load_qa_chain(llm, chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

# 4 The refine Chain

In [None]:
chain = load_qa_chain(llm, chain_type="refine")

In [None]:
query = "What is finma in swiss"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

### Intermediate Steps

In [None]:
chain = load_qa_chain(llm, chain_type="refine", return_refine_steps=True)

In [None]:
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

### Custom Prompts

In [None]:
refine_prompt_template = (
    "The original question is as follows: {question}\n"
    "We have provided an existing answer: {existing_answer}\n"
    "We have the opportunity to refine the existing answer"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{context_str}\n"
    "------------\n"
    "Given the new context, refine the original answer to better "
    "answer the question. "
    "If the context isn't useful, return the original answer. Reply in Italian."
)
refine_prompt = PromptTemplate(
    input_variables=["question", "existing_answer", "context_str"],
    template=refine_prompt_template,
)


initial_qa_template = (
    "Context information is below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question: {question}\nYour answer should be in Italian.\n"
)
initial_qa_prompt = PromptTemplate(
    input_variables=["context_str", "question"], template=initial_qa_template
)
chain = load_qa_chain(llm, chain_type="refine", return_refine_steps=True,
                     question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

# 5 The map-rerank Chain

In [None]:
chain = load_qa_chain(llm, chain_type="map_rerank", return_intermediate_steps=True)

In [None]:
query = "what is not finma"
results = chain({"input_documents": docs, "question": query}, return_only_outputs=True)

In [None]:
results["output_text"]

### Intermediate steps

In [None]:
results["intermediate_steps"]

### Custom Prompts

In [None]:
from langchain.output_parsers import RegexParser

output_parser = RegexParser(
    regex=r"(.*?)\nScore: (.*)",
    output_keys=["answer", "score"],
)

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

Question: [question here]
Helpful Answer In Italian: [answer here]
Score: [score between 0 and 100]/


Begin!

Context:
---------
{context}
---------
Question: {question}
Helpful Answer In Italian:"""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"],
    output_parser=output_parser,
    # handle_parsing_errors=True

)

chain = load_qa_chain(llm, chain_type="map_rerank", return_intermediate_steps=True, prompt=PROMPT)
query = "What did the president say about Justice Breyer"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

#6 Document QA with sources

In [None]:
# retriever = FAISS.from_documents(texts, llm).as_retriever()
docsearch = Chroma.from_documents(documents=texts, embedding=embeddings)
query = "What is a candy"
docs = docsearch.similarity_search(query)
print(docs)

In [None]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

chain = load_qa_with_sources_chain(llm, chain_type="stuff")
query = "What is a cyclone"
chain({"input_documents": docs, "question": query}, return_only_outputs=True)