In [47]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Vectara
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

In [5]:
os.environ['OPENAI_API_KEY'] = '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'

In [10]:
loader = DirectoryLoader(
    '/Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes', 
    glob="**/*.pdf")

In [11]:
documents = loader.load()

In [12]:
embeddings = OpenAIEmbeddings()

In [13]:
vectorstore = Chroma.from_documents(documents, embeddings)

In [14]:
from langchain.memory import ConversationBufferMemory

In [33]:
memory = ConversationSummaryBufferMemory(llm=llm, input_key='question', output_key='answer')

In [48]:
llm = ChatOpenAI(temperature=0, model="gpt-4")

In [14]:
retriever = vectorstore.as_retriever()

In [18]:
d = retriever.get_relevant_documents(
    "What did Andrew do?"
)

In [19]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)

In [20]:
query = "What did Andrew do?"

In [21]:
result = qa({"question": query})

In [23]:
result['answer']

' Andrew Arthur Kelly is a graduate of the University of Florida, Hough Graduate School of Business and the Warrington College of Business. He has professional experience as a Citi Summer Analyst and a Drumcondra Credit Union Business Development Intern. He has also held leadership positions with Florida Blue Key Outreach Committee and Student Government Productions. He has technical skills in Microsoft VBA and interests in eBay Selling, Take-Stock in Children, UF Club Hockey, Non-fiction and Biographical Books.'

In [22]:
result

{'question': 'What did Andrew do?',
 'chat_history': [HumanMessage(content='What did Andrew do?'),
  AIMessage(content=' Andrew Arthur Kelly is a graduate of the University of Florida, Hough Graduate School of Business and the Warrington College of Business. He has professional experience as a Citi Summer Analyst and a Drumcondra Credit Union Business Development Intern. He has also held leadership positions with Florida Blue Key Outreach Committee and Student Government Productions. He has technical skills in Microsoft VBA and interests in eBay Selling, Take-Stock in Children, UF Club Hockey, Non-fiction and Biographical Books.')],
 'answer': ' Andrew Arthur Kelly is a graduate of the University of Florida, Hough Graduate School of Business and the Warrington College of Business. He has professional experience as a Citi Summer Analyst and a Drumcondra Credit Union Business Development Intern. He has also held leadership positions with Florida Blue Key Outreach Committee and Student Go

In [24]:
query = "Did he work for Deloitte?"

In [25]:
result = qa({"question": query})

In [26]:
result["answer"]

' No, Andrew Arthur Kelly did not work for Deloitte.'

## Return Source Documents

In [34]:
qa = ConversationalRetrievalChain.from_llm(llm, 
                                           retriever, 
                                           memory=memory,
                                           return_source_documents=True
                                          )

In [35]:
chat_history = []

In [36]:
query = "What are Andrew's hobbies?"

In [37]:
result = qa({"question": query, "chat_history": chat_history})

In [38]:
result['answer']

" Andrew's hobbies include eBay selling, non-fiction and biographical books, Udemy, hockey, traveling, and music."

In [60]:
result['source_documents'][0].dict()['metadata']['source']

'/Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf'

## Pass in chat history
Less cool way than using a memory object imo. Does the same as above, only explicitly using chat history. See the link [here](https://python.langchain.com/docs/integrations/providers/vectara/vectara_chat#pass-in-chat-history).

## Conversation retrieval chain with `map_reduce`

You can do a chain instead of an LLM. LLMs are interchangeable with chains in a way.

LLMs generate text but don't provide orchestration, while chains provide that orchestration and can incorporate LLMs as needed. The two work together, with chains coordinating and LLMs generating. But LLMs can't fully replace the functionality of chains.

In [1]:
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT

In [49]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

In [50]:
doc_chain = load_qa_chain(llm, chain_type="map_reduce")

In [51]:
chain = ConversationalRetrievalChain(
    retriever=retriever,
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

In [52]:
chat_history = []

In [53]:
query = "What did Andrew do?"

In [54]:
%%time
result = chain({"question": query, "chat_history": chat_history})
print('with gpt-4')



with gpt-4
CPU times: user 131 ms, sys: 18.4 ms, total: 149 ms
Wall time: 1min 3s


In [55]:
result["answer"]

'Andrew Arthur Kelly has a Master’s of Science in International Business with a concentration in Finance from the University of Florida, Hough Graduate School of Business and a Bachelor of Science in Business Administration, Major in Finance from the University of Florida, Warrington College of Business. He has worked as a Summer Analyst at Citi Security Services where he programmed macros using VBA to streamline workstreams and coordinated global data collection. He also developed a strategy for a local-marketing budget. He was a Business Development Intern at Drumcondra Credit Union where he developed strategies for acquiring clientele and improving operational efficiency. He also presented research and analysis to the board of directors. He held leadership roles at Florida Blue Key Outreach Committee and Student Government Productions. He was also a Financial Analyst at Heavener School of Business National Case Competition Team.'

## Conversational retrieval chain Q&A with sources

In [27]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

In [28]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)

In [29]:
doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")

In [30]:
chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)

In [31]:
chat_history = []

In [32]:
query = "What are Andrew's hobbies?"

In [33]:
result = chain({"question": query, "chat_history": chat_history})

In [34]:
result["answer"]

" Andrew's hobbies include eBay Selling, Non-fiction and Biographical Books, Udemy, Hockey, Traveling, and Music.\nSOURCES: /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Accenture_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Finance_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Resume.pdf"

In [35]:
query = "What did Andrew study?"

In [44]:
%%time
result = chain({"question": query, "chat_history": chat_history})

CPU times: user 56.8 ms, sys: 10.1 ms, total: 66.9 ms
Wall time: 6.39 s


In [45]:
result["answer"]

" Andrew studied a Master's of Science in International Business with a concentration/specialization in Finance and a Bachelor of Science in Business Administration with a major in Finance. \nSOURCES: /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Accenture_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Consulting_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_MIB_Resume.pdf, /Users/andkelly/Library/CloudStorage/Dropbox/Applications Folder/Resumes/Kelly_Andrew_Finance_Resume.pdf"

In [38]:
from openai import Model

In [39]:
models = Model.list()

In [42]:
for model in models["data"]:
    print(model["id"])

text-search-babbage-doc-001
gpt-3.5-turbo-16k-0613
curie-search-query
gpt-3.5-turbo-16k
text-search-babbage-query-001
babbage
babbage-search-query
text-babbage-001
fanw-json-eval
whisper-1
text-similarity-davinci-001
gpt-4
davinci
davinci-similarity
code-davinci-edit-001
curie-similarity
babbage-search-document
curie-instruct-beta
text-search-ada-doc-001
davinci-instruct-beta
text-similarity-babbage-001
text-search-davinci-doc-001
gpt-4-0314
babbage-similarity
davinci-search-query
text-similarity-curie-001
text-davinci-001
text-search-davinci-query-001
ada-search-document
ada-code-search-code
babbage-002
gpt-4-0613
davinci-002
davinci-search-document
curie-search-document
babbage-code-search-code
text-search-ada-query-001
code-search-ada-text-001
babbage-code-search-text
code-search-babbage-code-001
ada-search-query
ada-code-search-text
text-search-curie-query-001
text-davinci-002
text-embedding-ada-002
text-davinci-edit-001
code-search-babbage-text-001
gpt-3.5-turbo-instruct-0914
ada
