## Setup and Import Libraries

In [2]:
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langchain.document_loaders import TextLoader
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model
from langchain.schema.runnable import RunnableLambda, RunnableMap
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_experimental.text_splitter import SemanticChunker
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

In [3]:
load_dotenv()

True

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
model = SentenceTransformer("all-MiniLM-L6-v2")
llm = init_chat_model(model="groq:openai/gpt-oss-20b")

## Load the Documents

In [6]:
loader = TextLoader(file_path="data.txt")
documents = loader.load()

## Create Semantic Chunker

In [7]:
chunker = SemanticChunker(embeddings=embeddings)

chunks = chunker.split_documents(documents=documents)

In [8]:
for i, chunk in enumerate(chunks):
    print(f"\n Chunk {i+1}: \n{chunk.page_content}")


 Chunk 1: 
LangChain is a framework for building applications with LLMs. LangChain provides modular abstractions to combine LLMs with tools like OpenAI and Pinecone. You can create chains, agents, memory and retrievers.

 Chunk 2: 
The Eiffel Tower is located in Paris. France is a popular tourist destination


## Storing Data into Vector Store

In [9]:
vector_store = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings
)

retriever = vector_store.as_retriever()

## Create Prompt Template

In [10]:
template = """ Answer the question based on the following context:

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template=template)
prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template=' Answer the question based on the following context:\n\nContext: {context}\n\nQuestion: {question}\n')

## Creating RAG Chain

In [11]:
rag_chain = (
    RunnableMap(
        {
            "context": lambda x: retriever.invoke(x["question"]),
            "question": lambda x: x["question"]
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [12]:
question = {"question": "What is Langchain used for?"}

result = rag_chain.invoke(input=question)
result

'LangChain is a framework for building applications that use large language models (LLMs). It provides modular abstractions so developers can combine LLMs with tools such as OpenAI, Pinecone, and others, and create components like chains, agents, memory, and retrievers to build more complex, interactive AI applications.'

In [13]:
question = {"question": "Why France is so famous?"}

result = rag_chain.invoke(input=question)
result

'France is famous mainly for its rich cultural heritage and iconic landmarks.  \n- **Eiffel Tower**: The towering iron lattice in Paris is one of the most recognizable symbols of France worldwide.  \n- **Paris**: As the capital, Paris attracts millions of visitors each year with its art, fashion, cuisine, and historic sites.  \n- **Tourism**: The combination of these attractions, along with France’s reputation for cuisine, fashion, and history, makes it a top tourist destination.'