In [51]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
import sys
sys.path.append("../src")

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import CohereEmbeddings

In [2]:
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content[0:100] for i, d in enumerate(docs)]
        )
    )

* Load documents

# Embedding function


In [3]:
embeddings_function = CohereEmbeddings(model="embed-english-light-v3.0")

# Faiss Index

In [4]:
FAISS_INDEX_PATH = "../app/scenario/faiss_index"


index_load =  FAISS.load_local(FAISS_INDEX_PATH, embeddings_function)
print("Docs in the vector store:", len(index_load.docstore._dict.items()))

Docs in the vector store: 504


# Similarity Search with score


In [71]:
query = "How can we explains the habit 9 in a simple way?" #habit9 : minimizing
docs_and_scores = index_load.similarity_search_with_score(query, k=10)

In [72]:
docs_and_scores

[(Document(page_content='Habit 10 , T oo Much, is rooted in the quest to be authentic and connect\nwith others based on shared experience.\nHabit 1 1, Ruminating, is rooted in the capacity for thinking deep ly about\nwhat ma tters m ost to you inste ad of skimming along the surf ace of your\nlife.', metadata={'pagina': 187, 'fichero': 'How-Women-Rise.pdf'}),
  1.1068301),
 (Document(page_content='about standing your ground. This can result in your being overlooked\nor disregarded.\n• Habit 1 1, Ruminating, is often a consequence of Habit  12, Letting\nY our Radar Distract Y ou. Because you notice so much, you have a lot\nto process and may end up mulling over negatives in a way that\nundermines you and keeps you stuck. This can make you appear\ndisor ganized or a bit clueless.\nOnce yo u identi fy the cluster o f habits that’ s getting in your way , you\ncan choose the one you want to tackle first. Y ou’ll find suggesti ons on how\nto overcome counterproductive behaviors in the next tw

# Retriever

In [73]:
retriever = index_load.as_retriever(search_kwargs={"k": 5})

In [92]:
query = "How can we explains the habit 9 in a simple way?" #habit9 : minimizing
items =retriever.get_relevant_documents(query)

In [93]:
items

[Document(page_content='Habit 10 , T oo Much, is rooted in the quest to be authentic and connect\nwith others based on shared experience.\nHabit 1 1, Ruminating, is rooted in the capacity for thinking deep ly about\nwhat ma tters m ost to you inste ad of skimming along the surf ace of your\nlife.', metadata={'pagina': 187, 'fichero': 'How-Women-Rise.pdf'}),
 Document(page_content='about standing your ground. This can result in your being overlooked\nor disregarded.\n• Habit 1 1, Ruminating, is often a consequence of Habit  12, Letting\nY our Radar Distract Y ou. Because you notice so much, you have a lot\nto process and may end up mulling over negatives in a way that\nundermines you and keeps you stuck. This can make you appear\ndisor ganized or a bit clueless.\nOnce yo u identi fy the cluster o f habits that’ s getting in your way , you\ncan choose the one you want to tackle first. Y ou’ll find suggesti ons on how\nto overcome counterproductive behaviors in the next two chapters. But 

In [94]:
pretty_print_docs(items)

Document 1:

Habit 10 , T oo Much, is rooted in the quest to be authentic and connect
with others based on shared experience.
Habit 1 1, Ruminating, is rooted in the capacity for thinking deep ly about
what ma tters m ost to you inste ad of skimming along the surf ace of your
life.
----------------------------------------------------------------------------------------------------
Document 2:

about standing your ground. This can result in your being overlooked
or disregarded.
• Habit 1 1, Ruminating, is often a consequence of Habit  12, Letting
Y our Radar Distract Y ou. Because you notice so much, you have a lot
to process and may end up mulling over negatives in a way that
undermines you and keeps you stuck. This can make you appear
disor ganized or a bit clueless.
Once yo u identi fy the cluster o f habits that’ s getting in your way , you
can choose the one you want to tackle first. Y ou’ll find suggesti ons on how
to overcome counterproductive behaviors in the next two chapters. 

# BM25

BM25 also known as the Okapi BM25, is a ranking function used in information retrieval systems to estimate the relevance of documents to a given search query.


In [88]:
#!pip install --upgrade --quiet  rank_bm25

In [91]:
from langchain.retrievers import BM25Retriever
#retriever_BM25 = BM25Retriever.from_texts(texts)
retriever_BM25 = BM25Retriever.from_documents(docs)

In [96]:
query = "How can we explains the habit 9 in a simple way?" #habit9 : minimizing
items =retriever_BM25.get_relevant_documents(query)


In [97]:
items

[Document(page_content='she saw it. When she finished, she received her first-ever standing ovation.\nReframing is powerful because it doesn’ t force you to choose between\nthe thoughts racing through yo ur mind and whatever it is you’re actually\ntrying to communicate. It enables you to access all the richness of your left-\nhand co lumn without getting bogged down in the trap of either/or . By\nacknowledging what you’re feeling and finding strength in that, you harness\nthe power of your radar to banish its shadow side.\nSo the good news about overact ive radar is that it’ s nothing more than a\nhabit. It’ s not a  deep characterological flaw . It’ s not a cons equence of\npermanent neur al wiring. It’ s not an unchangeable manifestation of who\nyou are.  Like the other eleven behaviors described in this book , it’ s a habit\nyou can mitigate with the help of a few simple tools.', metadata={'pagina': 153, 'fichero': 'How-Women-Rise.pdf'}),
 Document(page_content='culture, except thos

In [100]:
pretty_print_docs(items)

Document 1:

she saw it. When she finished, she received her first-ever standing ovation.
Reframing is powerful b
----------------------------------------------------------------------------------------------------
Document 2:

culture, except those in which their autonomy is severely restricted.
But workplace cultural standar
----------------------------------------------------------------------------------------------------
Document 3:

say something like this to a trusted colleague:
“Sharon, I’m wondering if you could help me out. I’m
----------------------------------------------------------------------------------------------------
Document 4:

Everyone has self-limiting beha viors, for the simple reason that we are
all human. But although men


# MultiQueryRetriever


In [116]:
from langchain_community.llms import Cohere

llm = Cohere(model="command",temperature=0)
llm.invoke("What is the capital of France?.Be specific.")

' The capital of France is Paris. It is located in the north-central part of the country, on the River Seine, and is one of the most populous and well-known cities in the world. \n\nParis is known for its history, culture, and many iconic landmarks, including the Eiffel Tower, the Louvre Museum, the Arc de Triomphe, and the Notre-Dame Cathedral. \nIt is also renowned for its fashion, art, cuisine, and lively atmosphere, making it a popular tourist destination. \n\nWould you like to know more about Paris? '

In [105]:
from langchain_community.chat_models import ChatCohere

chat = ChatCohere()
print(chat.invoke("What is the capital of France?. Be specific."))


content='The capital of France is Paris. It is located in the north-central part of the country, on the River Seine, and is one of the most popular tourist destinations in the world. \n\nParis is renowned for its history, culture, and iconic landmarks, including the Eiffel Tower, the Louvre Museum, the Arc de Triomphe, and Notre-Dame Cathedral. The city is also known for its culinary scene, fashion, art, and architectural styles ranging from Haussmannism to Art Nouveau and Baroque. \n\nBeyond its iconic attractions, Paris offers a vibrant atmosphere and a diverse range of neighborhoods, each with its own unique character and charm.'


In [117]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever_multiquery = MultiQueryRetriever.from_llm(
    retriever=index_load.as_retriever(), llm=llm
)

In [118]:
query = "How can we explains the habit 9 in a simple way?" #habit9 : minimizing
items = retriever_multiquery.get_relevant_documents(query=query)


In [119]:
items

[Document(page_content='Habit 10 , T oo Much, is rooted in the quest to be authentic and connect\nwith others based on shared experience.\nHabit 1 1, Ruminating, is rooted in the capacity for thinking deep ly about\nwhat ma tters m ost to you inste ad of skimming along the surf ace of your\nlife.', metadata={'pagina': 187, 'fichero': 'How-Women-Rise.pdf'}),
 Document(page_content='• If you identify with Habit 3, Overvaluing Expertise, you may also\nstruggle with Habit 6, Putting Y our Job Before Y our Career . Both\nreflect a desire to keep your head down and focus on the task\nimmediately before you instead of aiming at a lar ger long-term goal.\n• These two behaviors often overlap with Habit 7, The Perfection T rap,\nsince all three are rooted in the hope or expectation that you’ll be\nrewarded if you just get every detail right. These habits often appear\nto others as a tendency to think small. They can result in your getting\ntagged as someone who’ll willingly take on drudge work b

In [120]:
pretty_print_docs(items)

Document 1:

Habit 10 , T oo Much, is rooted in the quest to be authentic and connect
with others based on shared
----------------------------------------------------------------------------------------------------
Document 2:

• If you identify with Habit 3, Overvaluing Expertise, you may also
struggle with Habit 6, Putting Y
----------------------------------------------------------------------------------------------------
Document 3:

conviction that you should valu e others for who they are rather than how
they can be of use to you.
----------------------------------------------------------------------------------------------------
Document 4:

CHAPTER 17
S t a r t  w i t h  O n e  T h i n g
S o no w you kno w which habit—or , let’ s be honest
----------------------------------------------------------------------------------------------------
Document 5:

PART II
T h e  H a b i t s  T h a t  K e e p  W o m e n  f r o m
R e a c h i n g  T h e i r  G o a l
--------------------------

# RAG Fusion

Employing multiple query generation and Reciprocal Rank Fusion to re-rank search result
https://github.com/langchain-ai/langchain/blob/master/cookbook/rag_fusion.ipynb

In [121]:
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages([
     ("system", "You are a helpful assistant that generates multiple search queries based on a single input query."),
     ("user", "Generate multiple search queries related to: {original_query}"),
     ("user", "OUTPUT (4 queries):")
 ])

In [122]:
from langchain_core.output_parsers import StrOutputParser
generate_queries = (
    prompt | llm | StrOutputParser() | (lambda x: x.split("\n"))
)

In [123]:
retriever=index_load.as_retriever()

In [128]:
from langchain_core.load import dumps, loads
def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [129]:
chain = generate_queries | retriever.map() | reciprocal_rank_fusion

In [130]:
items = chain.invoke({"original_query": query})

In [131]:
items

[(Document(page_content='conviction that you should valu e others for who they are rather than how\nthey can be of use to you.\nHabit 5, Failing to Enlist Allies from Day One, is rooted in the belief\nthat you should not call on others for help until you’ve done your\nhomework and know the parameters of your job.\nHabit 6,  Putting  Y our Job Before Y our Career , is rooted in the desire to\ndemonstrate loyalty and commit ment, as well as the sensible belief that you\nshould take life one step at a tim e instead of getting all wrappe d up in the\nfuture.\nHabit 7,  The Perfection T rap, is rooted in the desire not to disappoint\nothers (i ncluding and perhaps especially your family of origin), along with\na commitment to making the world a better place.\nHabit 8, The Disease to Please, is rooted in an unselfish passion for\nmaking other people happy .\nHabit 9, Minimizing, is rooted in an awareness of other people’ s needs\nand the wish to show them that you value their presence and in

In [133]:
print(items[0][0].page_content)

conviction that you should valu e others for who they are rather than how
they can be of use to you.
Habit 5, Failing to Enlist Allies from Day One, is rooted in the belief
that you should not call on others for help until you’ve done your
homework and know the parameters of your job.
Habit 6,  Putting  Y our Job Before Y our Career , is rooted in the desire to
demonstrate loyalty and commit ment, as well as the sensible belief that you
should take life one step at a tim e instead of getting all wrappe d up in the
future.
Habit 7,  The Perfection T rap, is rooted in the desire not to disappoint
others (i ncluding and perhaps especially your family of origin), along with
a commitment to making the world a better place.
Habit 8, The Disease to Please, is rooted in an unselfish passion for
making other people happy .
Habit 9, Minimizing, is rooted in an awareness of other people’ s needs
and the wish to show them that you value their presence and insights.


In [134]:
print(items[0][0].metadata)

{'pagina': 187, 'fichero': 'How-Women-Rise.pdf'}


In [135]:
print(items[0][1])

0.06612021857923497


# Doing Reranking

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)