See documentation [here](https://python.langchain.com/docs/integrations/providers/vectara/vectara_chat#get_chat_history-function).

In [2]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Vectara
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

In [3]:
os.environ['OPENAI_API_KEY'] = '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'

In [4]:
loader = DirectoryLoader(
    '/Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes', 
    glob="**/*.pdf")

In [5]:
documents = loader.load()

In [6]:
embeddings = OpenAIEmbeddings()

In [7]:
vectorstore = Chroma.from_documents(documents, embeddings)

In [8]:
from langchain.memory import ConversationBufferMemory

In [11]:
memory = ConversationSummaryBufferMemory(llm=llm, input_key='question', output_key='answer')

In [10]:
llm = ChatOpenAI(temperature=0, model="gpt-4")

In [12]:
retriever = vectorstore.as_retriever()

In [13]:
d = retriever.get_relevant_documents(
    "What did Andrew do?"
)

In [14]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)

In [15]:
query = "What did Andrew do?"

In [16]:
result = qa({"question": query})

ValueError: Missing some input keys: {'chat_history'}

In [17]:
result['answer']

NameError: name 'result' is not defined

In [18]:
result

NameError: name 'result' is not defined

In [19]:
query = "Did he work for Deloitte?"

In [20]:
result = qa({"question": query})

ValueError: Missing some input keys: {'chat_history'}

In [21]:
result["answer"]

NameError: name 'result' is not defined

## Return Source Documents

In [22]:
qa = ConversationalRetrievalChain.from_llm(llm, 
                                           retriever, 
                                           memory=memory,
                                           return_source_documents=True
                                          )

In [23]:
chat_history = []

In [24]:
query = "What are Andrew's hobbies?"

In [25]:
result = qa({"question": query, "chat_history": chat_history})

In [26]:
result['answer']

"Andrew's interests include eBay Selling, Take-Stock in Children, UF Club Hockey, Non-fiction and Biographical Books, Udemy, Hockey, Traveling, and Music."

In [27]:
result['source_documents'][0].dict()['metadata']['source']

'/Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf'

## Pass in chat history
Less cool way than using a memory object imo. Does the same as above, only explicitly using chat history. See the link [here](https://python.langchain.com/docs/integrations/providers/vectara/vectara_chat#pass-in-chat-history).

## Conversation retrieval chain with `map_reduce`

You can do a chain instead of an LLM. LLMs are interchangeable with chains in a way.

LLMs generate text but don't provide orchestration, while chains provide that orchestration and can incorporate LLMs as needed. The two work together, with chains coordinating and LLMs generating. But LLMs can't fully replace the functionality of chains.

In [28]:
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

In [29]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

In [30]:
doc_chain = load_qa_chain(llm, chain_type="map_reduce")

In [31]:
chain = ConversationalRetrievalChain(
    retriever=retriever,
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

In [32]:
chat_history = []

In [33]:
query = "What did Andrew do?"

In [34]:
%%time
result = chain({"question": query, "chat_history": chat_history})
print('with gpt-4')

with gpt-4
CPU times: user 120 ms, sys: 15.5 ms, total: 136 ms
Wall time: 1min 9s


In [35]:
result["answer"]

'Andrew Arthur Kelly has a Master’s of Science in International Business with a concentration in Finance from the University of Florida, Hough Graduate School of Business and a Bachelor of Science in Business Administration, Major in Finance from the University of Florida, Warrington College of Business. He has worked as a Summer Analyst at Citi Security Services and as a Business Development Intern at Drumcondra Credit Union. He also held leadership roles as the Florida Blue Key Outreach Committee Chair and Chairman of Student Government Productions. He was a Financial Analyst for the Heavener School of Business National Case Competition Team. His technical skills include Microsoft VBA and Power BI.'

## Conversational retrieval chain Q&A with sources

This could be improved by reviewing the documentation [here](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html) and mayber [here](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_with_sources.loading.load_qa_with_sources_chain.html).

In [36]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

In [37]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

In [38]:
doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")

In [39]:
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

In [40]:
chat_history = []

In [41]:
query = "What are Andrew's hobbies?"

In [57]:
result = chain({"question": query, "chat_history": chat_history})

In [43]:
result["answer"]

"Andrew's hobbies include eBay selling, participating in Take-Stock in Children, playing UF Club Hockey, reading non-fiction and biographical books, using Udemy, traveling, and listening to music.\nSOURCES: /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Accenture_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Finance_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Resume.pdf"

In [45]:
type(result)

dict

In [46]:
result.keys()

dict_keys(['question', 'chat_history', 'answer'])

In [47]:
result['chat_history']

[]

In [58]:
result = list(result["answer"].split("\n"))

In [59]:
result

['Andrew studied a Master’s of Science in International Business with a concentration in Finance and a Bachelor of Science in Business Administration with a major in Finance at the University of Florida.',
 'SOURCES: /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Accenture_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_MIB_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Resume.pdf']

In [62]:
import re

In [63]:
pattern = r"/([^/]+)$"

In [68]:
result[1].split(",")[1]

' /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Accenture_Resume.pdf'

In [70]:
type(result[1].split(",")[1])

str

In [71]:
text = result[1].split(",")[1]

In [74]:
print(re.search(pattern, text).group(1))

Kelly_Andrew_Accenture_Resume.pdf


In [48]:
query = "What did Andrew study?"

In [49]:
%%time
result = chain({"question": query, "chat_history": chat_history})

CPU times: user 130 ms, sys: 18 ms, total: 148 ms
Wall time: 1min 8s


In [50]:
result['chat_history']

[]

In [1]:
result["answer"]

NameError: name 'result' is not defined

In [None]:
from openai import Model

In [None]:
models = Model.list()

In [None]:
for model in models["data"]:
    print(model["id"])