In [9]:
import os
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from pinecone_text.sparse import BM25Encoder
# from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever
from langchain.vectorstores.utils import maximal_marginal_relevance
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_core.runnables import RunnableLambda
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnableMap

In [2]:
env_path = "/Users/kumarpersonal/Downloads/Inquiry-Assistant/venv/.env"
load_dotenv(dotenv_path=env_path)

os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_PROJECT'] = 'inquiry-assistant'
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_index_name = os.getenv("PINECONE_INDEX_NAME")

groq_api_key = os.getenv("GROQ_API_KEY")

In [3]:
corpus_path = "/Users/kumarpersonal/Downloads/Inquiry-Assistant/Context/corpus.txt"
with open(corpus_path, "r", encoding="utf-8") as f:
    full_text = f.read()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", " "]
)
documents = splitter.create_documents([full_text])

for doc in documents:
    doc.metadata["chunk_text"] = doc.page_content

In [4]:
hf_encoder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
bm25_encoder = BM25Encoder().fit([doc.page_content for doc in documents])

  0%|          | 0/961 [00:00<?, ?it/s]

In [None]:
# texts = [doc.page_content for doc in documents]

# dense_vectors = dense_encoder.embed_documents(texts)
# sparse_vectors = sparse_encoder.encode_documents(texts)

In [5]:
pc = Pinecone(api_key=pinecone_api_key)
index = pc.Index(pinecone_index_name)

In [None]:
# vectors_to_upsert = []
# for i, (doc, dense, sparse) in enumerate(zip(documents, dense_vectors, sparse_vectors)):
#     vector = {
#         "id": f"doc-{i}",
#         "values": dense,
#         "sparse_values": sparse,
#         "metadata": doc.metadata
#     }
#     vectors_to_upsert.append(vector)

# index.upsert(vectors=vectors_to_upsert, namespace="default")

# print(f"Upserted {len(vectors_to_upsert)} hybrid vectors to Pinecone index.")

In [43]:
vectorstore = PineconeHybridSearchRetriever(
    embeddings=hf_encoder,
    sparse_encoder=bm25_encoder,
    index=index,
    namespace="default",
    text_key="chunk_text"  
)

def hybrid_search(query_text):
    # retriever = vectorstore.as_retriever(
    #     search_type="hybrid",
    #     search_kwargs={"alpha": 0.8, "k": fetch_k}
    # )
    # return retriever.get_relevant_documents(query_text)
    return vectorstore.invoke(query_text)

def hybrid_mmr_search(query, fetch_k=50, k=5, lambda_mult=1.0):
    candidates = hybrid_search(query)
    if len(candidates) <= k:
        return candidates
    
    candidate_texts = [doc.page_content for doc in candidates]
    candidate_embeddings = hf_encoder.embed_documents(candidate_texts)
    query_embedding = hf_encoder.embed_query(query)
    
    selected_indices = maximal_marginal_relevance(
        query_embedding=query_embedding,
        embeddings=candidate_embeddings,
        k=k,
        fetch_k=fetch_k,
        lambda_mult=lambda_mult,
    )
    
    return [candidates[i] for i in selected_indices]

# retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, 'fetch_k': 50, 'lambda_mult': 0.5})

In [44]:
llm = ChatGroq(groq_api_key=groq_api_key, model="gemma2-9b-it")

compressor = LLMChainExtractor.from_llm(llm)

class HybridMMRRetriever:
    def __init__(self, query_func):
        self.query_func = query_func

    def invoke(self, query_text):
        return self.query_func(query_text)

base_retriever = RunnableLambda(lambda query_text: hybrid_mmr_search(query_text))

compression_retriever = ContextualCompressionRetriever(
    base_retriever=base_retriever,
    base_compressor=compressor
)

# compression_retriever = ContextualCompressionRetriever(
#     base_retriever=retriever,
#     base_compressor=compressor
# )

In [45]:
# def hybrid_search(query_text, top_k=5, namespace="default", alpha=0.8):
#     dense_query = dense_encoder.embed_query(query_text)
#     sparse_query = sparse_encoder.encode_queries([query_text])[0]
    
#     results = index.query(
#         dense_vector=dense_query,
#         sparse_vector=sparse_query,
#         top_k=top_k,
#         include_metadata=True,
#         namespace=namespace,
#         alpha=alpha
#     )
    
#     return [
#         Document(page_content=match['metadata'].get('chunk_text', ''),
#                  metadata=match['metadata'])
#         for match in results['matches']
#     ]

In [46]:
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are an intelligent assistant for Scaler Academy, trained on internal documents, placement records, program curricula, and student feedback. "
     "You should answer questions only based on the context provided. "
     "If the answer is not found in the context, reply with: "
     "“I'm sorry, I couldn't find that information in the available documents.” "
     "Be precise, concise, and maintain a professional and helpful tone."
    ),
    ("human", 
     "Context:\n{context}\n\nQuestion:\n{question}")
])

output_parser = StrOutputParser()

In [47]:
retriever_runnable = RunnableLambda(lambda x: compression_retriever.invoke(x["question"]))
# retriever_runnable = RunnableLambda(lambda x: hybrid_search(x["question"]))

In [48]:
def format_inputs(inputs):
    return {
        "context": "\n\n".join([doc.page_content for doc in inputs["documents"]]),
        "question": inputs["question"]
    }

In [49]:
rag_chain = (
    RunnableMap({
        "documents": retriever_runnable,
        "question": lambda x: x["question"]
    })
    | format_inputs | chat_prompt | llm | output_parser
)

In [50]:
query_text = input("Enter your question: ")
print("\nQuestion:", query_text)

response = rag_chain.invoke({"question": query_text})
print("\nAnswer:", response)


Question: fee structure

Answer: I'm sorry, I couldn't find that information in the available documents. 

