## Setup and Import Libraries

In [2]:
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model
from langchain.schema.runnable import RunnableLambda, RunnableMap
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [3]:
load_dotenv()

True

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
model = SentenceTransformer("all-MiniLM-L6-v2")
llm = init_chat_model(model="groq:openai/gpt-oss-20b")

## Custom Semantic Chunker with Threshold

In [None]:
class ThresholdSemanticChunker:
    def __init__(self, model_name="all-MiniLM-L6-v2", threshold=0.7):
        self.model = SentenceTransformer(model_name)
        self.threshold = threshold

    def split(self, text: str):
        sentences = [sentence.strip() for sentence in text.split("\n") if sentence.strip()]
        embeddings = self.model.encode(sentences)

        chunks = []
        current_chunk = [sentences[0]]

        for sentence in range(1, len(sentences)):
            similarity = cosine_similarity(
                X=[embeddings[sentence - 1]],
                Y=[embeddings[sentence]]
            )[0][0]

            if similarity >= self.threshold:
                current_chunk.append(sentences[sentence])
            else:
                chunks.append(" ".join(current_chunk) + ".")
                current_chunk=[sentences[sentence]]

        chunks.append(" ".join(current_chunk) + ".")
        return chunks
    
    def split_documents(self, documents):
        result = []
        for document in documents:
            for chunk in self.split(document.page_content):
                result.append(Document(page_content=chunk, metadata=document.metadata))

        return result

In [8]:
sample_text = """ 
LangChain is a framework for building applications with LLMs.
LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.
You can create chains, agents, memory and retrievers.
The Eiffel Tower is located in Paris.
France is a popular tourist destination
"""

document = Document(page_content=sample_text)
document

Document(metadata={}, page_content=' \nLangChain is a framework for building applications with LLMs.\nLangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone.\nYou can create chains, agents, memory and retrievers.\nThe Eiffel Tower is located in Paris.\nFrance is a popular tourist destination\n')

In [9]:
chunker = ThresholdSemanticChunker(threshold=0.7)

chunks = chunker.split_documents([document])
chunks

[Document(metadata={}, page_content='LangChain is a framework for building applications with LLMs. LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone..'),
 Document(metadata={}, page_content='You can create chains, agents, memory and retrievers..'),
 Document(metadata={}, page_content='The Eiffel Tower is located in Paris..'),
 Document(metadata={}, page_content='France is a popular tourist destination.')]

## Storing Data into Vector Store

In [10]:
vector_store = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings
)

retriever = vector_store.as_retriever()

## Create Prompt Template

In [11]:
template = """ Answer the question based on the following context:

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template=template)
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template=' Answer the question based on the following context:\n\nContext: {context}\n\nQuestion: {question}\n')

## Creating RAG Chain

In [12]:
rag_chain = (
    RunnableMap(
        {
            "context": lambda x: retriever.invoke(x["question"]),
            "question": lambda x: x["question"]
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [16]:
question = {"question": "What is Langchain used for?"}

result = rag_chain.invoke(input=question)
result

'LangChain is a framework for building applications that use large language models (LLMs). It offers modular abstractions—such as chains, agents, memory, and retrievers—that let developers combine LLMs with tools like OpenAI and Pinecone to create more complex, reusable AI workflows.'