# Document Loading

In [54]:
import os
import openai
import sys
# sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

# openai.api_key  = os.environ['OPENAI_API_KEY']

In [55]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("docs/dm.pdf")
pages = loader.load()

In [56]:
len(pages)

11

In [57]:
page = pages[0]

In [58]:
print(page.page_content[0:500])

A Typology of Decision-Making Tasks for Visualization
Camelia D. Brumar, Sam Molnar, Gabriel Appleby, Kristi Potter, and Remco Chang
CHOOSE
f1f2f1f2f1f2
f1f2f1f2update
comparison
criteriaoptions
f1f2
f1f2f3criteriaCREATE
f1f2f1f2f1f2
f1f2f3f1f2f3f1f2f3f1f2f1f2f1f2
f1f2f1f2update
scoring
criteria
Assesses
options and
outputs a subset
deemed optimal
or best. ACTIVATE
Evaluates
the inputs,
and only those
that meet or exceed
a threshold are returned.Represents
decisions on
assembling,
synthesizing,



In [59]:
page.metadata

{'source': 'docs/dm.pdf', 'page': 0}

# Document Splitting

## Character Splitter

In [60]:
from langchain.text_splitter import CharacterTextSplitter

In [61]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=2000,
    chunk_overlap=300,
    length_function=len
)

In [62]:
docs = text_splitter.split_documents(pages)

In [63]:
len(docs)

50

In [64]:
docs[10]

Document(page_content='tions with a direct decision-making focus. We added to that list the\npapers with “decision” mentioned in titles or abstracts to be consistent\nwith [48], adding up to 74 papers.To refine our selection, we prioritized papers featuring specific do-\nmain applications, and eliminated generic systems lacking evaluations\nwith domain experts. This guaranteed that the chosen tools were uti-\nlized by domain users with actual decisions and requirements that\ncould be validated. For instance, papers solely based on hypothetical\ncase studies were excluded (e.g., Podium [63], Zooids [42], etc.). This\nfiltering process resulted in a final corpus of 69 papers that directly\ncontribute to our study’s objectives.\n3.2 Coding Process\nOur coding process followed an inductive approach to derive the codes,\ni.e. the design goals for our typology, from the survey corpus outlined\nin the preceding section. The process involved a team-based approach\nwith two coders, who are the 

## Token Splitter

In [65]:
from langchain.text_splitter import TokenTextSplitter

In [66]:
text_splitter = TokenTextSplitter(chunk_size=100, chunk_overlap=25)

In [67]:
docs = text_splitter.split_documents(pages)

In [68]:
len(docs)

260

In [69]:
docs[0]

Document(page_content='A Typology of Decision-Making Tasks for Visualization\nCamelia D. Brumar, Sam Molnar, Gabriel Appleby, Kristi Potter, and Remco Chang\nCHOOSE\nf1f2f1f2f1f2\nf1f2f1f2update\ncomparison\ncriteriaoptions\nf1f2\nf1f2f3criteriaCREATE\nf1f2f1f2f1f', metadata={'source': 'docs/dm.pdf', 'page': 0})

# Embeddings
Let's take our splits and embed them. Using SBERT.net's sentence transformer instead of using the one that the course uses because I don't have an openai api key.

In [70]:
# from langchain.embeddings.openai import OpenAIEmbeddings
# embedding = OpenAIEmbeddings()

from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
embedding = SentenceTransformer("all-MiniLM-L6-v2")

# The sentences to encode
# sentences = [
#     "The weather is lovely today.",
#     "It's so sunny outside!",
#     "He drove to the stadium.",
# ]

# sentence = "I'm learning how to use Sentence Transformers."

# 2. Calculate embeddings by calling model.encode()
# embeddings = embedding_model.encode(sentences)
# embedding = embedding_model.encode(sentence)
# print(embedding.shape)
# print(type(embedding))
# print(embedding.tolist())



# [3, 384]

# 3. Calculate the embedding similarities
# similarities = embedding_model.similarity(embeddings, embeddings)
# print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])




Actually the SBERT.net one didn't work with the langchain vector store. I found some free ones available here: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

In [71]:
# example from here: https://python.langchain.com/v0.1/docs/integrations/text_embedding/bge_huggingface/
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

## Vectorstores

In [72]:
from langchain.vectorstores import Chroma

In [73]:
persist_directory = 'docs/chroma/'
!rm -rf docs/chroma  # remove old database files if any

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [74]:
vectordb = Chroma.from_documents( # had an error previuously, downgraded to chromadb version 0.4.3 using command: pip install chromadb==0.4.3. See https://github.com/zylon-ai/private-gpt/issues/1012
    documents=docs,
    embedding=hf,
    persist_directory=persist_directory
)

In [75]:
print(vectordb._collection.count())

260


## Similarity Search

In [76]:
question = "what are the three types of decisions in the typology of decision-making tasks?"

In [77]:
docs_ss = vectordb.similarity_search(question,k=3)

In [78]:
len(docs_ss)

3

In [79]:
docs_ss[0].page_content

' real-world visualization\nsystems.\n4.1 Decision-Making Tasks\nOur typology consists of three tasks derived from the scientific\nliterature [27, 28] : CHOOSE, ACTIV ATE, and CREATE. Each task is\na function that represents a specific and distinct decision problem. The\ntype of the inputs to these functions does not change the core process\nof the decision task. Some of the key differences between the tasks are\nthe unique transformations of'

# Retrieval
We don't want to retrieve only the splits that are the most relevant to the question. We would also like to retrieve a diverse set of splits. We can use Maximum marginal relevance (MMR) for that.

Another retrieval method is LLM Aided Retrieval, it takes into consideration the semantic part of the question, and also the metadata part of the question. An LLM can be used to identify the metadata part and use it as a filter for our vectorstore.

## Addressing Diversity: Maximum marginal relevance

In [80]:
docs_mmr = vectordb.max_marginal_relevance_search(question,k=3)

In [81]:
docs_mmr[2].page_content

', the result of our validation can be divided into three broad\ncategories: the correctness of the typology, the utility of the typol-\nogy, and how the typology compared to existing task taxonomies and\ntypologies. The correctness of the typology can be further broken\ndown into the consideration of completeness and expressiveness. In\nour interview study, participants found that the typology is complete\nregarding the three decision tasks and that they could successfully\nexpress'

## Addressing Specificity: working with metadata

In [82]:
# GONNA SKIP THIS FOR NOW SINCE THERE IS NO NEED FOR IT IN MY SIMPLE EXAMPLE WITH JUST MY PDF FILE

## Additional tricks: compression
Another approach for improving the quality of retrieved docs is compression.

Information most relevant to a query may be buried in a document with a lot of irrelevant text. 

Passing that full document through your application can lead to more expensive LLM calls and poorer responses.

Contextual compression is meant to fix this. 

In [83]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [84]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

In [85]:
# Wrap our vectorstore
# llm = OpenAI(temperature=0, model="gpt-3.5-turbo-instruct")

from langchain_community.llms import Ollama

# To Load Local models through Ollama
llm = Ollama(model="llama3:8b", temperature=0.0) # setting temperature to 0.0 to get deterministic results, with low variability and gives us highest fidelity and reliable answers

compressor = LLMChainExtractor.from_llm(llm)

In [86]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_type="mmr") # or "similarity" or "mmr" or "combined" or "combined_mmr" or "combined_similarity" <--- this is the search type, recommended by Copilot
)

In [87]:
compressed_docs = compression_retriever.get_relevant_documents(question)
pretty_print_docs(compressed_docs)

Document 1:

Extracted relevant parts:

...three tasks derived from the scientific literature : CHOOSE, ACTIV ATE, and CREATE...

These three types of decisions are relevant to answer the question.
----------------------------------------------------------------------------------------------------
Document 2:

Extracted relevant part:

4 T YPOLOGY OF DECISION -MAKING TASKS
As we outlined in the previous section, we created a typology that reflects and captures the ubiquity of decision-making while allowing for flexibility in the execution...

This is relevant to answer the question about the three types of decisions in the typology of decision-making tasks.


# Question Answering

In [88]:
from langchain.chains import RetrievalQA # RetrievalQA is a class that combines a retriever and a compressor to answer questions, it's a retrieval step-backed chat

In [89]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever() # I think this is a similarity based search/retriever
)

In [90]:
result = qa_chain({"query": question})
result['result']

'According to the provided context, the three types of decisions in the typology of decision-making tasks are:\n\n1. CHOOSE\n2. ACTIVATE\n3. CREATE\n\nThese tasks represent specific and distinct decision problems that can be composed or decomposed into other tasks to capture complex decision-making structures.'

## Prompt that takes in the question and the documents and passes them to the LLM

In [91]:
from langchain.prompts import PromptTemplate

# Build prompt / prompt template
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [92]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [93]:
result = qa_chain({"query": question})
result["result"]

'The three types of decisions in the typology of decision-making tasks are CHOOSE, ACTIVATE, and CREATE. These tasks can be composed or decomposed into other tasks to represent complex decision-making problems. Thanks for asking!'

In [94]:
result["source_documents"][0]

Document(page_content=' real-world visualization\nsystems.\n4.1 Decision-Making Tasks\nOur typology consists of three tasks derived from the scientific\nliterature [27, 28] : CHOOSE, ACTIV ATE, and CREATE. Each task is\na function that represents a specific and distinct decision problem. The\ntype of the inputs to these functions does not change the core process\nof the decision task. Some of the key differences between the tasks are\nthe unique transformations of', metadata={'page': 3, 'source': 'docs/dm.pdf'})

## Other RetrievalQA chain types
### MapReduce

In [95]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type="map_reduce" # others are refine, and map_rerank
)

In [96]:
# result = qa_chain_mr({"query": question}) # takes a while to run

In [97]:
# result["result"] # the result is not satisfactory, as the llm doesn't find the answer in the paper using this chain type.

# Chat (adding memory to keep track of previous questions and answers)
## Memory

In [98]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True # returns the chat history as a list of messages as opposed to a string, more details on memory in the previous class from this guy
)

## ConversationalRetrievalChain

In [99]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm( # adds something more on top of the retrieval chain, not just memory. It adds a step that takes the history and the new question and condenses it into a standalone question that's passed to the vectorstore to look up relevant documents.               
    llm,
    retriever=retriever,
    memory=memory
)

In [100]:
question = "What are the types of decisions in the typology of decision-making tasks?"
result = qa({"question": question})
result['answer']

'According to the provided context, the typology of decision-making tasks consists of three tasks:\n\n1. CHOOSE\n2. ACTIVATE\n3. CREATE\n\nThese tasks represent specific and distinct decision problems that can be composed or decomposed into other tasks, allowing for flexibility in the execution of decisions.'

In [101]:
question = "What are the differences between them?"
result = qa({"question": question})
result['answer']

"According to the provided context, the key differences between CHOOSE, ACTIVATE, and CREATE types of decisions are not explicitly stated. The text only describes each task briefly without highlighting their unique characteristics or differences.\n\nTherefore, I don't know the answer to this question as it is not explicitly mentioned in the given context."