In [1]:
import os

In [2]:

from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-70b-8192",
    groq_api_key = os.getenv("GROQ_API_KEY"),
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

response = llm.invoke("The first dog on the space was...")
print(response.content)

You're thinking of Laika!

Laika, also known as Kudryavka or Little Curly, was a Soviet space dog who became the first living being to orbit the Earth on November 3, 1957. She was launched on the Sputnik 2 spacecraft, which was designed to test the safety and feasibility of space travel for living organisms.

Laika, a stray dog from Moscow, was chosen for the mission due to her small size and calm temperament. Unfortunately, Laika did not survive the flight, as the technology at the time did not allow for her to be safely returned to Earth. However, her pioneering mission paved the way for future human spaceflight and animal research in space.

Laika's legacy continues to inspire space exploration and research, and she remains an iconic symbol of the early space age.


## Loading pdf and text data

In [3]:
from langchain_community.document_loaders import PyPDFLoader, TextLoader

In [4]:

pdf_path = "../data/python_notes.pdf"
if not os.path.isfile(pdf_path):
	print(f"File not found: {pdf_path}. Please check the path or provide a valid PDF file.")
else:
	pdf_loader = PyPDFLoader(pdf_path)

pdf_docs = pdf_loader.load()

In [5]:
pdf_docs.pop().page_content

'PYTHON PROGRAMMING                                      III YEAR/II SEM                  MRCET \n137 \n \n<function read at 0x03BD1070> \n>>> read() \nDepartment \n>>> from IIYEAR.CSE.student import write \n>>> write() \nStudent \n# Write a program to create and import module? \ndef add(a=4,b=6): \n    c=a+b \n    return c \nOutput: \nC:\\Users\\MRCET\\AppData\\Local\\Programs\\Python\\Python38-32\\IIYEAR\\modu1.py \n>>> from IIYEAR import modu1 \n>>> modu1.add() \n10 \n# Write a program to create and rename the existing module. \n \ndef a(): \n    print("hello world") \na() \nOutput: \nC:/Users/MRCET/AppData/Local/Programs/Python/Python38-32/IIYEAR/exam.py \nhello world \n>>> import exam as ex \nhello world'

In [6]:
text_path = "../data/python_guide.txt"
if not os.path.isfile(text_path):
	print(f"File not found: {pdf_path}. Please check the path or provide a valid PDF file.")
else:
    text_loader = TextLoader(text_path,encoding = "utf-8")

text_docs = text_loader.load()

In [7]:
text_docs.pop().page_content

"\n🌟 Python Programming: A Complete Guide\n\n🐍 Introduction to Python\n\nPython is a high-level, interpreted, general-purpose programming language created by Guido van Rossum and first released in 1991. It's known for its simplicity, readability, and vast standard libraries, which makes it ideal for beginners and professionals alike.\n\nPython is widely used in:\n- Web development (Django, Flask)\n- Data Science & Machine Learning (Pandas, NumPy, Scikit-learn, TensorFlow)\n- Automation & Scripting\n- Software Testing\n- Game Development (Pygame)\n- Cybersecurity\n- Internet of Things (IoT)\n\n🗺️ Python Learning Roadmap\n\nHere's a step-by-step roadmap to master Python:\n\n1. Beginner Level\n- Syntax and Basics: Variables, data types, operators\n- Control Structures: if, else, elif, loops (for, while)\n- Functions: Defining and calling functions, *args, **kwargs\n- Lists, Tuples, Dictionaries, Sets\n- String Manipulation\n- Input/Output Operations\n- Error Handling: try, except, finally

In [8]:
all_docs = pdf_docs + text_docs

## Splitting texts into chunks

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [10]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

In [11]:
docs = splitter.split_documents(all_docs)

## Embeddings and Vector Store

In [12]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

In [13]:
embedding = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  embedding = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [17]:
persist_directory = "./chroma_db"

vector_store = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory=persist_directory
)
vector_store.persist()

# Create a retriever to search relevant chunks
retriever = vector_store.as_retriever(search_kwargs={"k": 4})

## Contextualized Retriever Chain (History-Aware)

In [15]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [18]:
# Prompt to help convert follow-up questions into standalone questions
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

In [20]:
# Define the contextualize_q_prompt using ChatPromptTemplate
contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_q_system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
])

In [None]:
# Create retriever that understands prior messages
history_aware_retriever = create_history_aware_retriever(
    llm=llm,
    retriever=retriever,
    prompt=contextualize_q_prompt
)

In [22]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [26]:
# Main assistant behavior system prompt
system_prompt = (
    """
    You are a knowledgeable and helpful assistant capable of answering questions accurately and concisely.
    Use the provided context below to guide your responses when relevant:
    {context}
    For general questions or when the context is not directly applicable, respond naturally and confidently based on your broader knowledge.
    If you are unsure or lack the necessary information, acknowledge it politely.
    """
)

In [24]:
# Prompt for the answer generation stage
qa_prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}")
])

In [27]:
# Create a document chain that uses the LLM to generate answers
question_answer_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=qa_prompt
)

#### Full Retrieval-Augmented Generation (RAG) Chain

In [28]:
from langchain.chains import create_retrieval_chain

In [31]:
# Combining history-aware retriever with answer generator
rag_chain = create_retrieval_chain(
    retriever=history_aware_retriever,
    combine_docs_chain=question_answer_chain
)

In [36]:
# Chat history buffer
chat_history_buffer = []  # Global list to maintain conversation context

In [37]:
def query_rag(user_query: str) -> str:
    """
    Function to query the RAG pipeline with only the user input.
    Maintains internal chat history to ensure context-aware answers.
    """
    global chat_history_buffer

    # Send query to the full RAG chain
    response = rag_chain.invoke({
        "input": user_query,
        "chat_history": chat_history_buffer
    })

    # Append to history: user question and AI response
    chat_history_buffer.append(("human", user_query))
    chat_history_buffer.append(("ai", response["answer"]))

    return response["answer"]

In [39]:
query_rag("What is python?")

'Python is a widely used, general-purpose, high-level programming language. It was initially designed by Guido van Rossum in 1991 and developed by the Python Software Foundation. Python was mainly developed for emphasis on code readability, and its syntax allows programmers to express concepts in fewer lines of code. Python is a programming language that lets you work quickly and integrate systems more efficiently.'

In [40]:
query_rag("Provide me a detailed roadmap for learning python within 4 weeks")

'Here is a suggested 4-week roadmap to learn Python:\n\n**Week 1: Foundations (1-2 hours/day)**\n\n* Day 1-2: Introduction to Python, Installation, and Setup\n\t+ Understand the basics of Python, its history, and features\n\t+ Set up Python on your computer, IDE, and text editor\n* Day 3-4: Variables, Data Types, and Operators\n\t+ Learn about variables, data types (int, float, string, list, etc.)\n\t+ Understand operators (arithmetic, comparison, logical, assignment, etc.)\n* Day 5-6: Control Structures (if-else, for loops, while loops)\n\t+ Learn about conditional statements (if-else) and loops (for, while)\n\t+ Practice exercises and examples\n* Project: Simple Calculator Program\n\n**Week 2: Functions and Data Structures (1-2 hours/day)**\n\n* Day 7-8: Functions\n\t+ Learn about functions, function arguments, and return types\n\t+ Understand how to define and call functions\n* Day 9-10: Lists and Tuples\n\t+ Learn about lists and tuples, indexing, slicing, and manipulating them\n\t

In [41]:
query_rag("Skip the foundation part as I already know about it")

'Here is a revised 4-week roadmap to learn Python, assuming you already know the foundations:\n\n**Week 1: Functions and Data Structures (1-2 hours/day)**\n\n* Day 1-2: Functions\n\t+ Learn about function arguments, return types, and lambda functions\n\t+ Understand how to define and call functions, including recursive functions\n* Day 3-4: Lists and Tuples\n\t+ Learn about list comprehensions, slicing, and indexing\n\t+ Understand how to manipulate lists and tuples\n* Day 5-6: Dictionaries and Sets\n\t+ Learn about dictionary comprehensions, dictionary methods, and set operations\n\t+ Understand how to work with dictionaries and sets\n* Project: To-Do List App\n\n**Week 2: File Input/Output and Modules (1-2 hours/day)**\n\n* Day 7-8: File Input/Output\n\t+ Learn about reading and writing files in Python\n\t+ Understand how to work with CSV, JSON, and other file formats\n* Day 9-10: Modules and Packages\n\t+ Learn about built-in modules and packages in Python\n\t+ Understand how to imp