# Chat

Recall the overall workflow for retrieval augmented generation (RAG):

We discussed `Document Loading` and `Splitting` as well as `Storage` and `Retrieval`.

We then showed how `Retrieval` can be used for output generation in Q+A using `RetrievalQA` chain.

In [None]:
!pip install openai langchain "langchain[docarray]" pypdf chromadb tiktoken

In [None]:
import os
import openai

# import panel as pn  # GUI
# pn.extension()

# from dotenv import load_dotenv, find_dotenv
# _ = load_dotenv(find_dotenv()) # read local .env file

key = 'OPENAI_API_KEY'
api_key = 'sk-' + key + 'bkFJm28ZY54dRNHk3u5edkod'

In [None]:
#import os
#os.environ["LANGCHAIN_TRACING_V2"] = "true"
#os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
#os.environ["LANGCHAIN_API_KEY"] = "ls__*"

If you wish to experiment on `LangChain plus platform`:

 * Go to [langchain plus platform](https://www.langchain.plus/) and sign up
 * Create an api key from your account's settings
 * Use this api key in the code below

In [None]:
llm_name = "gpt-3.5-turbo"
print(llm_name)

## LLM Chat: Memoryless

In [None]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0, api_key=api_key)

In [None]:
from langchain.prompts import PromptTemplate
full_prompt = PromptTemplate.from_template(
    template="<s>[INST]<<SYS>>{sys_msg}<</SYS>>\n\nContext:\n{history}\n\nHuman: {input}\n[/INST] {primer}",
)


In [None]:
prompt = full_prompt.partial(
    sys_msg = (
        "You are a helpful, respectful and honest AI assistant."
        "\nAlways answer as helpfully as possible, while being safe."
        "\nPlease be brief and efficient unless asked to elaborate, and follow the conversation flow."
        "\nYour answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content."
        "\nEnsure that your responses are socially unbiased and positive in nature."
        "\nIf a question does not make sense or is not factually coherent, explain why instead of answering something incorrect."
        "\nIf you don't know the answer to a question, please don't share false information."
        "\nIf the user asks for a format to output, please follow it as closely as possible."
    ),
    primer = "",
    history = "",
)

print(prompt.format(input="Help me with my homework"))

In [None]:
from langchain.chains import LLMChain

chain = LLMChain(llm=llm, prompt= prompt, verbose=True)
print(chain.run(input="Hello World"))

In [None]:
chain.run(input="What was the first thing I asked you?")

## LLM Chat: Memory

In [None]:
from langchain.chains import ConversationChain

hist_prompt = prompt.copy()
#print("hist=",hist_prompt)
hist_prompt.input_variables = ['input', 'history']
#print("hist2=",hist_prompt)

conv_chain = ConversationChain(llm=llm, prompt=hist_prompt, verbose=True)
conv_chain.run(input="Hello World")

In [None]:
conv_chain.run(input="What was the first thing I asked you?")

In [None]:
conv_chain.memory

### RAG + LLM

In [None]:
from langchain.document_loaders import PyPDFLoader

# Load PDF
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("sample_docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("sample_docs/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("sample_docs/MachineLearning-Lecture02.pdf"),
    PyPDFLoader("sample_docs/MachineLearning-Lecture03.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)

In [None]:
splits = text_splitter.split_documents(docs)
len(splits)

## Embeddings

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(api_key=api_key)

## Vectorstores

In [None]:
from langchain.vectorstores import Chroma
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding
)

### similarity

In [None]:
question = "What are major topics for this class?"
docs = vectordb.similarity_search(question, k=3)
print(docs[0], "\n\n", docs[1], "\n\n", docs[2])

### mmr   : max_marginal_relevance

In [None]:
question = "What are major topics for this class?"
docs = vectordb.max_marginal_relevance_search(question, k=3)
len(docs)

In [None]:
print(docs[0], "\n\n", docs[1], "\n\n", docs[2])

In [None]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0, api_key=api_key)
llm.predict("Hello world!")

## RetrivalQA1: Similarity

In [None]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(\
                                           search_type='similarity',\
                                        search_kwargs={'k': 3},\
                                       return_source_documents=True,\
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}))


result = qa_chain({"query": question})
result["result"]

## RetrivalQA2: mmr : max_marginal_relevance

In [None]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(\
                                           search_type='mmr',\
                                        search_kwargs={'k': 3},\
                                       return_source_documents=True,\
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}))


result = qa_chain({"query": question})
result["result"]

### Memory

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

### ConversationalRetrievalChain 1

In [None]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
)

### ConversationalRetrievalChain 2

In [None]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever(search_type='mmr',search_kwargs={'k': 3})
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
)

In [None]:
question = "Is probability a class topic?"
result = qa({"question": question})

In [None]:
result['answer']

In [None]:
question = "why are those prerequesites needed?"
result = qa({"question": question})

In [None]:
result['answer']

# Create a chatbot on same documents

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader

The chatbot code has been updated a bit since filming. The GUI appearance also varies depending on the platform it is running on.

In [None]:
k = 4
chain_type = "stuff"

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
splits = text_splitter.split_documents(docs)

# define embedding
embeddings = OpenAIEmbeddings(api_key=api_key)
# create vector database from data
db = DocArrayInMemorySearch.from_documents(splits, embeddings)
# define retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
# create a chatbot chain. Memory is managed externally.
qa = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(model_name=llm_name, temperature=0, api_key=api_key),
#    chain_type=chain_type,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "Is probability a class topic?"
result = qa({"question": question})

In [None]:
result['answer']

In [None]:
question = "why are those prerequesites needed?"
result = qa({"question": question})

In [None]:
result['answer']