In [None]:
import os

In [None]:
os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API_KEY"
# To activate LangSmith.
os.environ["LANGCHAIN_TRACING_V2"] = "true" 
os.environ["LANGCHAIN_API_KEY"] = "YOUR_LANGCHAIN_API_KEY"
os.environ["LANGCHAIN_PROJECT"] = "My_Rag_Model"

### Initializing a LLM Model for our Model to access

In [None]:
"""CALLING THE LLM INSIDE THE LANGCHAIN"""
from langchain_google_genai import ChatGoogleGenerativeAI # Importing the chat model.
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash") # Selecting the model we want to use. 

llm_response = llm.invoke("Tell me a good joke") # Asking something to th llm model (invoking).

print(llm_response.content) # Getting the response.

### An Output Parser to Parse our Output so that we could get only the relevant Information

In [None]:
"""PARSING THE OUTPUT"""
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
output_parser.invoke(llm_response)


In [None]:
"""CREATING A SIMPLE CHAIN"""
chain = llm | output_parser
chain.invoke("Tell me a Joke.")

### Structured Output

In [None]:
from typing import List
from pydantic import BaseModel, Field

class MobileReview(BaseModel):
    phone_model: str = Field(description="Name and model of the phone")
    rating: float = Field(description="Overall rating out of 5")
    pros: List[str] = Field(description="List of positive aspects")
    cons: List[str] = Field(description="List of negative aspects")
    summary: str = Field(description="Brief summary of the review")

review_text = """
Just got my hands on the new Galaxy S21 and wow, this thing is slick! The screen is gorgeous,
colors pop like crazy. Camera's insane too, especially at night - my Insta game's never been
stronger. Battery life's solid, lasts me all day no problem.
Not gonna lie though, it's pretty pricey. And what's with ditching the charger? C'mon Samsung.
Also, still getting used to the new button layout, keep hitting Bixby by mistake.
Overall, I'd say it's a solid 4 out of 5. Great phone, but a few annoying quirks keep it from
being perfect. If you're due for an upgrade, definitely worth checking out!
"""

structured_llm = llm.with_structured_output(MobileReview)
output = structured_llm.invoke(review_text)
print(output)
print(output.pros)

### Prompt Template 
###### How langchain allows us to specify placeholders for some input text. So the the templates are nothing but those strings with the placeholders.

In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
prompt.invoke({"topic": "programming"})

In [None]:
chain = prompt | llm | output_parser
chain.invoke({"topic": "programmer"})

### LLM Messages


In [None]:
# Ways of creating a Prompt Template.
# Using Human and System message.
from langchain_core.messages import HumanMessage, SystemMessage

system_message = SystemMessage(content= "You are a helpful assistant that tells jokes.") # Defines the character or behaviour of the bot or agent that we are creating.
# Therefore we see some character defining content in there

human_message = HumanMessage(content= "Tell me a joke about programming")
response = llm.invoke([system_message, human_message])
print(response)

In [None]:
# Similar way but without directly using System and Human functions.
template = ChatPromptTemplate([
    ("system", "You are a helpful assistant that tells jokes."),
    ("human", "Tell me a joke about {topic}")
])

chain = template | llm
response = chain.invoke({"topic": "programming"})
print(response)

## Building a RAG Model from Scratch

### Loading Documents

##### Uploaded a few Research paper like "Attention is all you need!" and asked questions regarding it.

In [None]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter # Splits the data in recursive manner, it starts by splitting by paragraph then it splits lines the words.
from typing import List
from langchain_core.documents import Document

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

folder_path = "docs"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


### Splitting the Documents into Chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000, # Keeping the chunk size to be 1000 characters i.e. dividing our document into several chunks of 1000 characters.
    chunk_overlap = 200, # As recursive technique can split into chunks at anywhere(maybe between a sentance or an importent/relevant point) to avoid this we set the overlapping between them so some important thing that we have should be present in both chunks.
    length_function = len 
)
splits = text_splitter.split_documents(documents)

print(f"Split the documents into {len(splits)} chunks")

In [None]:
splits[0]

### Embedding the Documnets


In [None]:
# Initially Using the Gemini Embedding
# from langchain_google_genai import GoogleGenerativeAIEmbeddings

# embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# document_embedding = embeddings = embeddings.embed_documents([split.page_content for split in splits])

# print(f"Created embeddings for {len(document_embedding)} document chunks.")
# This was not working for some reason.

In [None]:
# Using the Traditional langchain community embeddings:
from langchain_community.embeddings.sentence_transformer import HuggingFaceEmbeddings  # Can also use HuggingFaceEmbeddings
embedding_function = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'}
)
# document_embeddings = embedding_function.embed_documents([split.page_content for split in splits])
# document_embeddings[0][:5]

### Creating a Vector DB store -> Chroma DB, To Store our embeddings init.

In [None]:
from langchain_chroma import Chroma

collection_name = "my_collection"
vectorstore = Chroma.from_documents(collection_name= collection_name,
                                    documents= splits, 
                                    embedding= embedding_function, 
                                    persist_directory= "./chroma_db")

print("Vector store created and persisted to ./chroma_db")

### Perform Similarity Check 
##### Look for the chunks that are similar to our query in the data (vectorstore)

In [None]:
query = "What is Attention function?"

# Using MMR to force the model to find two DIFFERENT documents
# fetch_k=10 (finds 10 closest candidates) and k=2 (returns 2 diverse ones)
results_mmr = vectorstore.max_marginal_relevance_search(query, k=2, fetch_k=10)

print(f"\nTop 2 most RELEVANT (and diverse) chunks for: '{query}'\n")

for i, result in enumerate(results_mmr, 1):
    # Note: MMR search does NOT return scores by default
    print(f"Result {i}:") 
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content[:200]}...")
    print("-" * 20)

#### Corverting the vectorstore or the data into retriever object. Using our vector store as the retriever.

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
retriever_results = retriever.invoke("What is Attention Function?")
print(retriever_results)

### Adding the Retriever to our Chain, Making it a complete Chain.

In [None]:
from langchain_core.runnables import RunnablePassthrough

template = """Answer the question based only on the following context:
{context}
Question: {question}
Answer: """

prompt = ChatPromptTemplate.from_template(template)

def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = ( 
    # Context is whatever we are getting from the Retriever
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser() 
)
# Whaterver we are seeking while invoking the chain it becomes the question through RunnablePassthrough.
# This way the prompt will get access to both our question and context. 
rag_chain.invoke("What is Attention Function?")
# Now we have the promt that could br given to the LLM to get our final answer.

In [None]:
question = "What is Attention Function?"
response = rag_chain.invoke(question)
print(f"Question: {question}")
print(f"Answer: {response}")

### Generating the Answer by Adding LLM to the Chain

In [None]:
# RAG Chain
rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
question = "What is Attention Function?"
response = rag_chain.invoke(question)
print(f"Question: {question}")
print(f"Answer: {response}")

### Adding Ablity to Answer Follow Up Question

### Conversation Rag or ChatBot


In [None]:
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.runnables import RunnableSequence, RunnableLambda


chat_history = []
chat_history.extend([
    HumanMessage(content= question),
    AIMessage(content= response)
])
chat_history

In [None]:

from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()
print(contextualize_chain.invoke({"input": "How is it applied?", "chat_history": chat_history}))

In [None]:

def history_aware_retriever(inputs):
    standalone_question = contextualize_chain.invoke({
        "input": inputs["input"],
        "chat_history": inputs["chat_history"]
    })
    docs = retriever.invoke(standalone_question)
    return {
        "context": "\n\n".join([d.page_content for d in docs]),
        "input": inputs["input"],
        "chat_history": inputs["chat_history"]
    }
history_aware_retriever_runnable = RunnableLambda(history_aware_retriever)


In [None]:

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the provided context to answer accurately."),
    ("system", "Context:\n{context}"),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

qa_chain = qa_prompt | llm | StrOutputParser()

In [None]:
rag_chain = RunnableSequence(
    first=history_aware_retriever_runnable,
    last=qa_chain
)

In [None]:
result = rag_chain.invoke({
    "input": "How is it applied?",
    "chat_history": chat_history
})

print(result)

### Building Multi User Chatbot

In [None]:
import sqlite3
from datetime import datetime
import uuid

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    conn = get_db_connection()
    conn.execute('''CREATE TABLE IF NOT EXISTS application_logs
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    user_query TEXT,
    gpt_response TEXT,
    model TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

# Initialize the database
create_application_logs()


In [None]:
# Example usage for a new user
session_id = str(uuid.uuid4())
question1 = "What is Attention Function?"
chat_history = get_chat_history(session_id)
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history}) 
insert_application_logs(session_id, question1, answer1, "gpt-3.5-turbo")
print(f"Human: {question1}")
print(f"AI: {answer1}\n")



In [None]:
# Example of a follow-up question
question2 = "How is it applied?"
chat_history = get_chat_history(session_id)
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})
insert_application_logs(session_id, question2, answer2, "gpt-3.5-turbo")
print(f"Human: {question2}")
print(f"AI: {answer2}")

### Voila! we have our RAG CHAIN ready and woking!