### Hybrid Search Langchain

In [1]:
!pip install --upgrade --quiet  pinecone-client pinecone-text pinecone-notebooks


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Access and print the api_key to verify it's loaded
api_key = os.getenv('api_key')

In [3]:
from langchain_community.retrievers import PineconeHybridSearchRetriever

In [4]:
import os
from pinecone import Pinecone,ServerlessSpec
index_name="hybrid-search-langchain-pinecone"
## initialize the Pinecone client
pc=Pinecone(api_key=api_key)

#create the index
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # dimensionality of dense model
        metric="dotproduct",  # sparse values supported only for dotproduct
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

  from tqdm.autonotebook import tqdm


In [5]:
index=pc.Index(index_name)
index

<pinecone.data.index.Index at 0x110167b50>

In [6]:
## vector embedding and sparse matrix
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings



HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
import ssl
import urllib.request

ssl._create_default_https_context = ssl._create_unverified_context

In [9]:
from pinecone_text.sparse import BM25Encoder

bm25_encoder=BM25Encoder().default()
bm25_encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x32dd84f90>

In [10]:
sentences=[
    "In 2023, I visited Paris",
        "In 2022, I visited New York",
        "In 2021, I visited New Orleans",

]

## tfidf values on these sentence
bm25_encoder.fit(sentences)

## store the values to a json file
bm25_encoder.dump("bm25_values.json")

# load to your BM25Encoder object
bm25_encoder = BM25Encoder().load("bm25_values.json")


100%|██████████| 3/3 [00:00<00:00, 159.96it/s]


In [11]:
retriever=PineconeHybridSearchRetriever(embeddings=embeddings,sparse_encoder=bm25_encoder,index=index)

In [12]:
retriever

PineconeHybridSearchRetriever(embeddings=HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x32f753350>, index=<pinecone.data.index.Index object at 0x110167b50>)

In [13]:
retriever.add_texts(
    [
    "In 2023, I visited Paris",
        "In 2022, I visited New York",
        "In 2021, I visited New Orleans",

]
)

100%|██████████| 1/1 [00:00<00:00,  1.20it/s]


In [14]:
retriever.invoke("What city did i visit first")

[Document(metadata={'score': 0.239868209}, page_content='In 2021, I visited New Orleans'),
 Document(metadata={'score': 0.233195603}, page_content='In 2022, I visited New York'),
 Document(metadata={'score': 0.212445945}, page_content='In 2023, I visited Paris')]

In [15]:
retriever.invoke("What city did i visit most recent")

[Document(metadata={'score': 0.265936852}, page_content='In 2021, I visited New Orleans'),
 Document(metadata={'score': 0.23994641}, page_content='In 2022, I visited New York'),
 Document(metadata={'score': 0.2080466}, page_content='In 2023, I visited Paris')]

In [16]:
retriever.invoke("What is my name ")

[Document(metadata={'score': 0.0334131531}, page_content='In 2023, I visited Paris'),
 Document(metadata={'score': 0.0331086591}, page_content='In 2022, I visited New York'),
 Document(metadata={'score': 0.0194055885}, page_content='In 2021, I visited New Orleans')]

In [18]:
from transformers import pipeline
import os

# Load your environment variables
hf_token = os.getenv("HF_TOKEN")

# Load the LLaMA 2 model using the Hugging Face token from the .env file
model_name = "meta-llama/Llama-2-7b-chat-hf"  # Example model, choose according to your needs
qa_pipeline = pipeline("question-answering", model=model_name, tokenizer=model_name, use_auth_token=hf_token)



Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  6.99it/s]
Some weights of LlamaForQuestionAnswering were not initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf and are newly initialized: ['embed_tokens.weight', 'layers.0.input_layernorm.weight', 'layers.0.mlp.down_proj.weight', 'layers.0.mlp.gate_proj.weight', 'layers.0.mlp.up_proj.weight', 'layers.0.post_attention_layernorm.weight', 'layers.0.self_attn.k_proj.weight', 'layers.0.self_attn.o_proj.weight', 'layers.0.self_attn.q_proj.weight', 'layers.0.self_attn.v_proj.weight', 'layers.1.input_layernorm.weight', 'layers.1.mlp.down_proj.weight', 'layers.1.mlp.gate_proj.weight', 'layers.1.mlp.up_proj.weight', 'layers.1.post_attention_layernorm.weight', 'layers.1.self_attn.k_proj.weight', 'layers.1.self_attn.o_proj.weight', 'layers.1.self_attn.q_proj.weight', 'layers.1.self_attn.v_proj.weight', 'layers.10.input_layernorm.weight', 'layers.10.mlp.down_proj.weight', 'layers.10.mlp.gate_proj.weight', 'layers.10

In [20]:
result = flexible_prompt("What happened in Paris in 2023?")
print(result)



The closest match I found is: In 2023, I visited Paris. Let me provide more details: In 2023, I visited Paris.


In [26]:
from transformers import pipeline
import os
import re

In [27]:
def extract_year(text):
    """Extract the year from the text."""
    match = re.search(r'\b(19|20)\d{2}\b', text)
    return int(match.group()) if match else None

def flexible_prompt(question):
    # Retrieve documents related to the question
    retrieved_docs = retriever.invoke(question)
    
    if not retrieved_docs:
        return "Unrelated question"
    
    # Sort retrieved documents by the year in descending order to find the most recent one
    retrieved_docs = sorted(retrieved_docs, key=lambda doc: extract_year(doc.page_content), reverse=True)
    best_match = retrieved_docs[0]

    if "city" in question.lower() and "recent" in question.lower():
        # Response based on the most recent city visited
        response = qa_pipeline(question=question, context=best_match.page_content)
        if response['score'] > 0.5:  # Threshold for relevance
            return f"The most recent city you visited is: {response['answer']} in {best_match.page_content}"
    
    # Default detailed explanation for an entity
    return f"The closest match I found is: {best_match.page_content}. Let me provide more details: {qa_pipeline(question=question, context=best_match.page_content)['answer']}."

In [28]:
result = flexible_prompt("Tell me the latest city I have visited most recent.")
print(result)


The closest match I found is: In 2023, I visited Paris. Let me provide more details: In 2023, I visited Paris.
