In [1]:
!pip install -qU chromadb

In [2]:
pip install -U bitsandbytes



In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [4]:
# Sample documents
documents = [
    "This is a list which containing sample documents.",
    "Keywords are important for keyword-based search.",
    "Document analysis involves extracting keywords.",
    "Keyword-based search relies on spare embeddings."
]

In [5]:
query = "keyword-based search"

In [6]:
import re
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and punctuation
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

In [7]:
preprocess_documents = list(map(preprocess_text, documents))

In [8]:
preprocess_documents

['this is a list which containing sample documents',
 'keywords are important for keywordbased search',
 'document analysis involves extracting keywords',
 'keywordbased search relies on spare embeddings']

In [9]:
print("Preprocessed Documents:")
for idx, doc in enumerate(preprocess_documents):
    print(f"Document {idx + 1}: {doc}")

Preprocessed Documents:
Document 1: this is a list which containing sample documents
Document 2: keywords are important for keywordbased search
Document 3: document analysis involves extracting keywords
Document 4: keywordbased search relies on spare embeddings


In [10]:
preprocessed_query = preprocess_text(query)
preprocessed_query

'keywordbased search'

In [11]:
vector = TfidfVectorizer()

In [12]:
X = vector.fit_transform(preprocess_documents)

In [13]:
X.toarray()

array([[0.        , 0.        , 0.37796447, 0.        , 0.37796447,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.37796447, 0.        , 0.        , 0.37796447, 0.        ,
        0.        , 0.37796447, 0.        , 0.        , 0.37796447,
        0.37796447],
       [0.        , 0.4533864 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.4533864 , 0.4533864 , 0.        ,
        0.        , 0.35745504, 0.35745504, 0.        , 0.        ,
        0.        , 0.        , 0.35745504, 0.        , 0.        ,
        0.        ],
       [0.46516193, 0.        , 0.        , 0.46516193, 0.        ,
        0.        , 0.46516193, 0.        , 0.        , 0.46516193,
        0.        , 0.        , 0.36673901, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.43671931, 0.        , 0.        , 0.       

In [14]:
X.toarray()[0]

array([0.        , 0.        , 0.37796447, 0.        , 0.37796447,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.37796447, 0.        , 0.        , 0.37796447, 0.        ,
       0.        , 0.37796447, 0.        , 0.        , 0.37796447,
       0.37796447])

In [15]:
y = vector.transform([preprocessed_query])
y.toarray()

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.70710678, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.70710678, 0.        , 0.        ,
        0.        ]])

In [16]:
similarities = cosine_similarity(X, y)

In [17]:
similarities

array([[0.        ],
       [0.50551777],
       [0.        ],
       [0.48693426]])

In [18]:
# Ranking
ranked_indices = np.argsort(similarities.flatten(), axis=0)[::-1]

In [19]:
ranked_documents = [documents[i] for i in ranked_indices]

In [20]:
ranked_indices

array([1, 3, 2, 0])

In [21]:
print(f"This is your query - {query}")
for idx, doc in enumerate(ranked_documents):
    print(f"Rank {idx + 1}: {doc}")

This is your query - keyword-based search
Rank 1: Keywords are important for keyword-based search.
Rank 2: Keyword-based search relies on spare embeddings.
Rank 3: Document analysis involves extracting keywords.
Rank 4: This is a list which containing sample documents.


In [22]:
document_embeddings = np.array([
    [0.634, 0.234, 0.867, 0.042, 0.249],
    [0.123, 0.456, 0.789, 0.321, 0.654],
    [0.987, 0.654, 0.321, 0.123, 0.456]
])

In [23]:
# Sample Search Query(represented as a dense vector)
query_embedding = np.array([[0.789, 0.321, 0.654, 0.987, 0.123]])

In [24]:
# Calculate Consine Similarity between query and documents
similarities = cosine_similarity(document_embeddings, query_embedding)

In [25]:
similarities

array([[0.73558979],
       [0.67357898],
       [0.71517305]])

In [26]:
ranked_indices = np.argsort(similarities.flatten(), axis=0)[::-1]

In [27]:
ranked_indices

array([0, 2, 1])

In [28]:
# Output the ranked documents
for idx, doc_idx in enumerate(ranked_indices):
    print(f"Rank {idx + 1}: Document {doc_idx + 1}")

Rank 1: Document 1
Rank 2: Document 3
Rank 3: Document 2


In [29]:
doc_path = "/content/Chatbot"

In [30]:
!pip install -qU pypdf langchain_community

In [31]:
from langchain_community.document_loaders import PyPDFLoader

In [32]:
loader = PyPDFLoader(doc_path)

In [33]:
docs = loader.load()

In [34]:
docs[0]

Document(metadata={'producer': 'Skia/PDF m127', 'creator': 'Chromium', 'creationdate': '2025-04-08T16:58:37+00:00', 'moddate': '2025-04-08T16:58:37+00:00', 'source': '/content/Chatbot', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='This comprehensive design document outlines both the high-level architecture and low-level\nimplementation details for a desktop chatbot application that operates locally without internet\nconnectivity. The application leverages OpenVINO for optimized neural language model\ninference and Electron for cross-platform desktop deployment.\nThe desktop chatbot application follows a modular, layered architecture that separates concerns\nbetween model inference, business logic, and user interface components. The system employs a\nMulti-Context Protocol \x00MCP) architecture to enable efficient communication between\ncomponents while maintaining local operation.\nThe high-level architecture consists of four primary layers:\n!\x00High-Level Architect

In [35]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [36]:
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=30)

In [37]:
chunks = splitter.split_documents(docs)

In [38]:
for idx, chunk in enumerate(chunks):
    print(f"Chunk {idx + 1}: {chunk}")

Chunk 1: page_content='This comprehensive design document outlines both the high-level architecture and low-level
implementation details for a desktop chatbot application that operates locally without internet' metadata={'producer': 'Skia/PDF m127', 'creator': 'Chromium', 'creationdate': '2025-04-08T16:58:37+00:00', 'moddate': '2025-04-08T16:58:37+00:00', 'source': '/content/Chatbot', 'total_pages': 7, 'page': 0, 'page_label': '1'}
Chunk 2: page_content='connectivity. The application leverages OpenVINO for optimized neural language model
inference and Electron for cross-platform desktop deployment.' metadata={'producer': 'Skia/PDF m127', 'creator': 'Chromium', 'creationdate': '2025-04-08T16:58:37+00:00', 'moddate': '2025-04-08T16:58:37+00:00', 'source': '/content/Chatbot', 'total_pages': 7, 'page': 0, 'page_label': '1'}
Chunk 3: page_content='The desktop chatbot application follows a modular, layered architecture that separates concerns
between model inference, business logic, and user

In [39]:
!pip install -q langchain_huggingface

In [41]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Use local embeddings (more stable)
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

In [42]:
# Create vectorstore
vectorstore = Chroma.from_documents(chunks, embeddings)

In [43]:
vectorstore

<langchain_community.vectorstores.chroma.Chroma at 0x788d136e11d0>

In [44]:
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
vector_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x788d136e11d0>, search_kwargs={'k': 3})

In [45]:
!pip install -q rank_bm25

In [46]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

In [47]:
keyword_retriver = BM25Retriever.from_documents(chunks)

In [48]:
keyword_retriver.k = 3

In [49]:
ensemble_retriever = EnsembleRetriever(retrievers=[vector_retriever, keyword_retriver], weights=[0.7, 0.3])

### Mixingvector search and keyword search for Hybrid Search
hybrid_score = (1 - alpha) * `sparse_score` + alpha * `dense_score`

In [50]:
model_name = "HuggingFaceH4/zephyr-7b-beta"

In [51]:
!pip install -qU bitsandbytes accelerate

In [52]:
import torch
from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline, )
from langchain import HuggingFacePipeline

In [53]:
# function for laoding 4-bit quantized model
def load_quantized_model(model_name:str):
  """
  model_name: Name or path of the model to be loaded.
  return: Loaded quantized model.
  """
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_use_double_quant=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16,
  )
  model = AutoModelForCausalLM.from_pretrained(
      model_name,
      torch_dtype=torch.bfloat16,
      quantization_config=bnb_config,
  )
  return model

In [54]:
# Initializing tokenizer
def initialize_tokenizer(model_name:str):
  """
  model_name: Name or path of the model to be loaded.
  return: Tokenizer for the model.
  """
  tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False)
  tokenizer.bos_token_id = 1 # Set beginning of sentence token id
  return tokenizer

In [55]:
tokenizer = initialize_tokenizer(model_name)

In [56]:
pip install -U bitsandbytes



In [57]:
model = load_quantized_model(model_name)

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [58]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=2048,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

Device set to use cuda:0


In [59]:
llm = HuggingFacePipeline(pipeline=pipeline)

  llm = HuggingFacePipeline(pipeline=pipeline)


In [60]:
from langchain.chains import RetrievalQA

In [61]:
normal_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_retriever,
)

In [62]:
hybrid_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=ensemble_retriever,
)

In [66]:
response1 = normal_chain.invoke("What is History Manager?")

In [67]:
response1

{'query': 'What is History Manager?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nFrontend Components\nUI Interface: Streamlit-based responsive user interface for chatbot interactions\x001\x00\nChat History Manager: Maintains conversation state and history\n\n- get_context_history(context_id: ContextID, limit: int): List[Message]\n- prune_context(context_id: ContextID, max_tokens: int): void\nThe user interface is designed with:\n\nContext Management\n\x00\x00\x00Conversation State: Tracking ongoing conversations and their states\n\x00\x00\x00Memory Management: Efficient handling of conversation history with sliding windows\n\nQuestion: What is History Manager?\nHelpful Answer: The History Manager in this context is responsible for managing and maintaining the history of conversations. It provides two main functionalities - getting the context history fo

In [68]:
print(response1.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Frontend Components
UI Interface: Streamlit-based responsive user interface for chatbot interactions 1 
Chat History Manager: Maintains conversation state and history

- get_context_history(context_id: ContextID, limit: int): List[Message]
- prune_context(context_id: ContextID, max_tokens: int): void
The user interface is designed with:

Context Management
   Conversation State: Tracking ongoing conversations and their states
   Memory Management: Efficient handling of conversation history with sliding windows

Question: What is History Manager?
Helpful Answer: The History Manager in this context is responsible for managing and maintaining the history of conversations. It provides two main functionalities - getting the context history for a specific conversation and pruning the conversation history when it reaches a certain 

In [70]:
response2 = hybrid_chain.invoke("What is History Manager?")

In [71]:
response2

{'query': 'What is History Manager?',
 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nFrontend Components\nUI Interface: Streamlit-based responsive user interface for chatbot interactions\x001\x00\nChat History Manager: Maintains conversation state and history\n\n- get_context_history(context_id: ContextID, limit: int): List[Message]\n- prune_context(context_id: ContextID, max_tokens: int): void\nThe user interface is designed with:\n\nContext Management\n\x00\x00\x00Conversation State: Tracking ongoing conversations and their states\n\x00\x00\x00Memory Management: Efficient handling of conversation history with sliding windows\n\n\x00\x00\x00https://docs.openvino.ai/2024/notebooks/llm-chatbot-with-output.html\n\x00\x00\x00https://www.ibm.com/topics/chatbot-design\n\nQuestion: What is History Manager?\nHelpful Answer: The History Manager is a component in th

In [72]:
print(response2.get("result"))

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Frontend Components
UI Interface: Streamlit-based responsive user interface for chatbot interactions 1 
Chat History Manager: Maintains conversation state and history

- get_context_history(context_id: ContextID, limit: int): List[Message]
- prune_context(context_id: ContextID, max_tokens: int): void
The user interface is designed with:

Context Management
   Conversation State: Tracking ongoing conversations and their states
   Memory Management: Efficient handling of conversation history with sliding windows

   https://docs.openvino.ai/2024/notebooks/llm-chatbot-with-output.html
   https://www.ibm.com/topics/chatbot-design

Question: What is History Manager?
Helpful Answer: The History Manager is a component in the chatbot system responsible for maintaining the conversation state and history. It allows the chatbot to reme