In [1]:
#Data ingestion
from langchain_core.documents import Document
sample_documents = [
    Document(
        page_content="""
        Artificial Intelligence (AI) is the simulation of human intelligence in machines.
        These systems are designed to think like humans and mimic their actions.
        AI can be categorized into narrow AI and general AI.
        """,
        metadata={"source": "AI Introduction", "page": 1, "topic": "AI"}
    ),
    Document(
        page_content="""
        Machine Learning is a subset of AI that enables systems to learn from data.
        Instead of being explicitly programmed, ML algorithms find patterns in data.
        Common types include supervised, unsupervised, and reinforcement learning.
        """,
        metadata={"source": "ML Basics", "page": 1, "topic": "ML"}
    ),
    Document(
        page_content="""
        Deep Learning is a subset of machine learning based on artificial neural networks.
        It uses multiple layers to progressively extract higher-level features from raw input.
        Deep learning has revolutionized computer vision, NLP, and speech recognition.
        """,
        metadata={"source": "Deep Learning", "page": 1, "topic": "DL"}
    ),
    Document(
        page_content="""
        Natural Language Processing (NLP) is a branch of AI that helps computers understand human language.
        It combines computational linguistics with machine learning and deep learning models.
        Applications include chatbots, translation, sentiment analysis, and text summarization.
        """,
        metadata={"source": "NLP Overview", "page": 1, "topic": "NLP"}
    )
]

print(sample_documents)

[Document(metadata={'source': 'AI Introduction', 'page': 1, 'topic': 'AI'}, page_content='\n        Artificial Intelligence (AI) is the simulation of human intelligence in machines.\n        These systems are designed to think like humans and mimic their actions.\n        AI can be categorized into narrow AI and general AI.\n        '), Document(metadata={'source': 'ML Basics', 'page': 1, 'topic': 'ML'}, page_content='\n        Machine Learning is a subset of AI that enables systems to learn from data.\n        Instead of being explicitly programmed, ML algorithms find patterns in data.\n        Common types include supervised, unsupervised, and reinforcement learning.\n        '), Document(metadata={'source': 'Deep Learning', 'page': 1, 'topic': 'DL'}, page_content='\n        Deep Learning is a subset of machine learning based on artificial neural networks.\n        It uses multiple layers to progressively extract higher-level features from raw input.\n        Deep learning has revolu

In [2]:
# Text splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    separators=[" "]

)

chunks=splitter.split_documents(sample_documents)

print(f"Number of chunks is {len(chunks)}")
print(chunks[0].page_content[:100])
print(chunks[0].metadata)

Number of chunks is 4
Artificial Intelligence (AI) is the simulation of human intelligence in machines.
        These syst
{'source': 'AI Introduction', 'page': 1, 'topic': 'AI'}


In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
import os
os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

In [5]:
from langchain_openai import OpenAIEmbeddings
embedding=OpenAIEmbeddings(
    model="text-embedding-3-small"
)
embedding

  from .autonotebook import tqdm as notebook_tqdm


OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000002096C9F1D30>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000002096C9F2660>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [6]:
text="what is ML ?"
text_embedding=embedding.embed_query(text)
len(text_embedding)

1536

## Create FAISS Vectorstore

In [7]:
from langchain_community.vectorstores import FAISS
vectorstore=FAISS.from_documents(
    documents=chunks,
    embedding=embedding
)
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x2096d481e80>

In [8]:
print(f"Loaded vector store contains {vectorstore.index.ntotal} vectors")

Loaded vector store contains 4 vectors


In [9]:
# Save vectorstoreto local file
vectorstore.save_local("faiss_index")

In [10]:
# Load vectorstore from local file
db=vectorstore.load_local("faiss_index",embedding,allow_dangerous_deserialization=True)
db

<langchain_community.vectorstores.faiss.FAISS at 0x2096cdc56d0>

In [11]:
print(f"Loaded vector store contains {db.index.ntotal} vectors")

Loaded vector store contains 4 vectors


In [12]:
## Similarity Search 
query="What is deep learning"

results=db.similarity_search(query,k=3)
print(results)

[Document(id='3e9b010d-406f-44c1-8d46-c290b07f9ea1', metadata={'source': 'Deep Learning', 'page': 1, 'topic': 'DL'}, page_content='Deep Learning is a subset of machine learning based on artificial neural networks.\n        It uses multiple layers to progressively extract higher-level features from raw input.\n        Deep learning has revolutionized computer vision, NLP, and speech recognition.'), Document(id='f7c49e64-75fb-497d-8d0f-13981e384948', metadata={'source': 'ML Basics', 'page': 1, 'topic': 'ML'}, page_content='Machine Learning is a subset of AI that enables systems to learn from data.\n        Instead of being explicitly programmed, ML algorithms find patterns in data.\n        Common types include supervised, unsupervised, and reinforcement learning.'), Document(id='90396bb5-d748-450f-83ee-053964330d86', metadata={'source': 'NLP Overview', 'page': 1, 'topic': 'NLP'}, page_content='Natural Language Processing (NLP) is a branch of AI that helps computers understand human lang

In [13]:
### Search with metadata filtering
filter_dict={"topic":"ML"}
filtered_results=db.similarity_search(
    query,
    k=3,
    filter=filter_dict
)
print(filtered_results)

[Document(id='f7c49e64-75fb-497d-8d0f-13981e384948', metadata={'source': 'ML Basics', 'page': 1, 'topic': 'ML'}, page_content='Machine Learning is a subset of AI that enables systems to learn from data.\n        Instead of being explicitly programmed, ML algorithms find patterns in data.\n        Common types include supervised, unsupervised, and reinforcement learning.')]


## Building RAG Chain with LCEL

In [36]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

In [37]:
import os
from dotenv import load_dotenv
load_dotenv()


True

In [38]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [39]:
llm=ChatOpenAI(model="gpt-3.5-turbo")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000209B30834D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000209B51E8440>, root_client=<openai.OpenAI object at 0x00000209B50E5F90>, root_async_client=<openai.AsyncOpenAI object at 0x00000209B50E5950>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [40]:
llm.invoke("what is deep learning")

AIMessage(content='Deep learning is a subset of machine learning that uses neural networks with multiple layers to analyze and learn from data. These networks are designed to mimic the way the human brain processes and learns information. Deep learning algorithms can be used to automatically analyze and classify large amounts of data, such as images, text, and speech, and make predictions or recommendations based on that data. It has been used in various fields, including computer vision, natural language processing, and speech recognition.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 93, 'prompt_tokens': 11, 'total_tokens': 104, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-CVYco9qrOogzsRsBZIQJQ8VINL

In [41]:
custom_prompt=ChatPromptTemplate.from_template(
    """
    You are a helpful assistant. Answer the question based on the given context.
    <context>
    {context}
    </context>
    Question: {question}
    """
)

In [42]:
retriever=db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":3}
)

In [43]:
retriever


VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002096CDC56D0>, search_kwargs={'k': 3})

In [44]:
from typing import List
def format_docs(docs:list[Document])->str:
    formated_docs=[]
    for i,doc in enumerate(docs):
        source=doc.metadata.get("source","unknown")
        formated_docs.append(f" doc {i+1} (source: {source}) \n (content: {doc.page_content})")
        return "\n\n".join(formated_docs)

In [45]:
rag_lcel_chain=(
    {"context":retriever | format_docs ,"question":RunnablePassthrough()}
    | custom_prompt
    | llm
    | StrOutputParser()
)

In [46]:
query="What is deep learning"
rag_lcel_chain.invoke(query)

'Deep learning is a subset of machine learning based on artificial neural networks. It uses multiple layers to progressively extract higher-level features from raw input. Deep learning has revolutionized computer vision, NLP, and speech recognition.'

In [51]:
### Conversational RAg Chain

conversational_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the provided context to answer questions."),
    ("placeholder", "{chat_history}"),
    ("human", "Context: {context}\n\nQuestion: {input}"),
])

In [53]:
def create_conversational_rag():
    """Create a conversational RAG chain with memory"""
    return (
        RunnablePassthrough.assign(
            context=lambda x: format_docs(retriever.invoke(x["input"]))
        )
        | conversational_prompt
        | llm
        | StrOutputParser()
    )

conversational_rag = create_conversational_rag()

In [54]:
### streaming RAG chain
streaming_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_prompt
    | llm
)

print("Modern RAG chains created successfully!")
print("Available chains:")
print("- simple_rag_chain: Basic Q&A")
print("- conversational_rag: Maintains conversation history")
print("- streaming_rag_chain: Supports token streaming")

Modern RAG chains created successfully!
Available chains:
- simple_rag_chain: Basic Q&A
- conversational_rag: Maintains conversation history
- streaming_rag_chain: Supports token streaming


In [56]:
# Test function for different chain types
def test_rag_chains(question: str):
    """Test all RAG chain variants"""
    print(f"Question: {question}")
    print("=" * 80)
    
    # 1. Simple RAG
    print("\n1. Simple RAG Chain:")
    answer = rag_lcel_chain.invoke(question)
    print(f"Answer: {answer}")

    print("\n2. Streaming RAG:")
    print("Answer: ", end="", flush=True)
    for chunk in streaming_rag_chain.stream(question):
        print(chunk.content, end="", flush=True)
    print()

In [57]:
test_rag_chains("What is the difference between AI and machine learning")

Question: What is the difference between AI and machine learning

1. Simple RAG Chain:
Answer: AI, or artificial intelligence, is a broader field that encompasses various technologies and techniques aimed at creating machines that can simulate human intelligence. Machine learning, on the other hand, is a specific subfield of AI that focuses on developing algorithms that can learn from data and make predictions or decisions without being explicitly programmed. In essence, machine learning is a subset of AI that specifically deals with the ability of machines to learn from data.

2. Streaming RAG:
Answer: AI (Artificial Intelligence) is a broader concept that encompasses various technologies that enable machines to simulate human intelligence, such as reasoning, learning, problem-solving, perception, and language understanding. Machine Learning is a specific subset of AI that focuses on algorithms and statistical models that allow computers to learn from and make predictions or decisions

In [58]:
## Conversational example
print("\n3. Conversational RAG Example:")
chat_history = []

# First question
q1 = "What is machine learning?"
a1 = conversational_rag.invoke({
    "input": q1,
    "chat_history": chat_history
})

print(f"Q1: {q1}")
print(f"A1: {a1}")


3. Conversational RAG Example:
Q1: What is machine learning?
A1: Machine Learning is a subset of AI that enables systems to learn from data. Instead of being explicitly programmed, ML algorithms find patterns in data. Common types of machine learning include supervised, unsupervised, and reinforcement learning.


In [60]:
from langchain_core.messages import HumanMessage,AIMessage
chat_history.extend(
    [HumanMessage(content=q1),
    AIMessage(content=a1)]
)

In [62]:
chat_history

[HumanMessage(content='What is machine learning?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Machine Learning is a subset of AI that enables systems to learn from data. Instead of being explicitly programmed, ML algorithms find patterns in data. Common types of machine learning include supervised, unsupervised, and reinforcement learning.', additional_kwargs={}, response_metadata={})]

In [61]:
q2="what are its types?"
a2 = conversational_rag.invoke({
    "input": q2,
    "chat_history": chat_history
})
print(a2)

The types of AI mentioned in the provided content are narrow AI and general AI.
