# RAG Pipelines- Data Ingestion to Vector DB Pipeline

In [1]:
import os 
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def process_all_pdfs(pdf_directory):
    """ Process all PDF files in a given directory. """
    all_documents = []
    pdf_dir = Path(pdf_directory)

    # Find all PDF recursively
    pdf_files = list(pdf_dir.glob("**/*.pdf"))

    print(f"Found {len(pdf_files)} PDF files.")

    for pdf_file in pdf_files:
        print(f"Processing file: {pdf_file}")

        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()

            # Add source metadata to each document
            for doc in documents:
                doc.metadata["source_file"] = pdf_file.name
                doc.metadata["file_type"] = "pdf"
            
            all_documents.extend(documents)
            print(f" Loaded {len(documents)} pages")

        except Exception as e:
            print(f" Error processing {pdf_file}: {e}")
        
    return all_documents

all_pdf_documents = process_all_pdfs("../data")

Found 2 PDF files.
Processing file: ../data/pdf_files/LLM_finetuning.pdf
 Loaded 115 pages
Processing file: ../data/pdf_files/python_intro.pdf
 Loaded 1 pages


In [3]:
all_pdf_documents

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-10-31T00:19:12+00:00', 'author': '', 'keywords': '', 'moddate': '2024-10-31T00:19:12+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../data/pdf_files/LLM_finetuning.pdf', 'total_pages': 115, 'page': 0, 'page_label': '1', 'source_file': 'LLM_finetuning.pdf', 'file_type': 'pdf'}, page_content='The Ultimate Guide to Fine-Tuning LLMs from\nBasics to Breakthroughs: An Exhaustive Review of\nTechnologies, Research, Best Practices, Applied\nResearch Challenges and Opportunities\n(Version 1.1)\nVenkatesh Balavadhani Parthasarathy, Ahtsham Zafar, Aafaq Khan, and\nArsalan Shahid\n@ CeADAR Connect Group\nCeADAR: Ireland’s Centre for AI, University College Dublin, Belfield, Dublin, Ireland\n{ venkatesh.parthasarathy, ahtsham.zafar, aafaq.khan, arsalan.shahid } @ ucd.

In [4]:
### Text splitting get into chunks

def split_documents(documents,chunk_size=1000,chunk_overlap=200):
    """Split documents into smaller chunks for better RAG performance"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")
    
    # Show example of a chunk
    if split_docs:
        print(f"\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")
    
    return split_docs

In [5]:
chunks = split_documents(all_pdf_documents)
chunks

Split 116 documents into 434 chunks

Example chunk:
Content: The Ultimate Guide to Fine-Tuning LLMs from
Basics to Breakthroughs: An Exhaustive Review of
Technologies, Research, Best Practices, Applied
Research Challenges and Opportunities
(Version 1.1)
Venkate...
Metadata: {'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-10-31T00:19:12+00:00', 'author': '', 'keywords': '', 'moddate': '2024-10-31T00:19:12+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../data/pdf_files/LLM_finetuning.pdf', 'total_pages': 115, 'page': 0, 'page_label': '1', 'source_file': 'LLM_finetuning.pdf', 'file_type': 'pdf'}


[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-10-31T00:19:12+00:00', 'author': '', 'keywords': '', 'moddate': '2024-10-31T00:19:12+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../data/pdf_files/LLM_finetuning.pdf', 'total_pages': 115, 'page': 0, 'page_label': '1', 'source_file': 'LLM_finetuning.pdf', 'file_type': 'pdf'}, page_content='The Ultimate Guide to Fine-Tuning LLMs from\nBasics to Breakthroughs: An Exhaustive Review of\nTechnologies, Research, Best Practices, Applied\nResearch Challenges and Opportunities\n(Version 1.1)\nVenkatesh Balavadhani Parthasarathy, Ahtsham Zafar, Aafaq Khan, and\nArsalan Shahid\n@ CeADAR Connect Group\nCeADAR: Ireland’s Centre for AI, University College Dublin, Belfield, Dublin, Ireland\n{ venkatesh.parthasarathy, ahtsham.zafar, aafaq.khan, arsalan.shahid } @ ucd.

# Embedding and VectorStoreDB

In [6]:
import numpy as np
from sentence_transformers import SentenceTransformer 
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity


In [7]:
class EmbeddingManager:
    """Handles document embedding generation using SentenceTransformer"""
    
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        """
        Initialize the embedding manager
        
        Args:
            model_name: HuggingFace model name for sentence embeddings
        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        """Load the SentenceTransformer model"""
        try:
            print(f"Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")
            raise

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeddings for a list of texts
        
        Args:
            texts: List of text strings to embed
            
        Returns:
            numpy array of embeddings with shape (len(texts), embedding_dim)
        """
        if not self.model:
            raise ValueError("Model not loaded")
        
        print(f"Generating embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings


## initialize the embedding manager

embedding_manager=EmbeddingManager()
embedding_manager

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x1259f6e90>

In [8]:
class VectorStore:
    """Manages document embeddings in a ChromaDB vector store"""
    
    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        """
        Initialize the vector store
        
        Args:
            collection_name: Name of the ChromaDB collection
            persist_directory: Directory to persist the vector store
        """
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()

    def _initialize_store(self):
        """Initialize ChromaDB client and collection"""
        try:
            # Create persistent ChromaDB client
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            
            # Get or create collection
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description": "PDF document embeddings for RAG"}
            )
            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error initializing vector store: {e}")
            raise

    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        """
        Add documents and their embeddings to the vector store
        
        Args:
            documents: List of LangChain documents
            embeddings: Corresponding embeddings for the documents
        """
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        
        print(f"Adding {len(documents)} documents to vector store...")
        
        # Prepare data for ChromaDB
        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []
        
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate unique ID
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)
            
            # Prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            
            # Document content
            documents_text.append(doc.page_content)
            
            # Embedding
            embeddings_list.append(embedding.tolist())
        
        # Add to collection
        try:
            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documents_text
            )
            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise

vectorstore=VectorStore()
vectorstore
    

Vector store initialized. Collection: pdf_documents
Existing documents in collection: 0


<__main__.VectorStore at 0x125be7fd0>

In [9]:
### Convert the text to embeddings
texts=[doc.page_content for doc in chunks]

## Generate the Embeddings

embeddings=embedding_manager.generate_embeddings(texts)

##store int he vector dtaabase
vectorstore.add_documents(chunks,embeddings)

Generating embeddings for 434 texts...


Batches: 100%|██████████| 14/14 [00:02<00:00,  4.95it/s]

Generated embeddings with shape: (434, 384)
Adding 434 documents to vector store...
Successfully added 434 documents to vector store
Total documents in collection: 434





# Retriever Pipeline From VectorStore

In [10]:
class RAGRetriever:
    """Handles query-based retrieval from the vector store"""
    
    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        """
        Initialize the retriever
        
        Args:
            vector_store: Vector store containing document embeddings
            embedding_manager: Manager for generating query embeddings
        """
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents for a query
        
        Args:
            query: The search query
            top_k: Number of top results to return
            score_threshold: Minimum similarity score threshold
            
        Returns:
            List of dictionaries containing retrieved documents and metadata
        """
        print(f"Retrieving documents for query: '{query}'")
        print(f"Top K: {top_k}, Score threshold: {score_threshold}")
        
        # Generate query embedding
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]
        
        # Search in vector store
        try:
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k
            )
            
            # Process results
            retrieved_docs = []
            
            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]
                
                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                    # Convert distance to similarity score (ChromaDB uses cosine distance)
                    similarity_score = 1 - distance
                    
                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i + 1
                        })
                
                print(f"Retrieved {len(retrieved_docs)} documents (after filtering)")
            else:
                print("No documents found")
            
            return retrieved_docs
            
        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []

rag_retriever=RAGRetriever(vectorstore,embedding_manager)

In [11]:
rag_retriever

<__main__.RAGRetriever at 0x125abc350>

In [12]:
rag_retriever.retrieve("Types of LLM Fine-Tuning")

Retrieving documents for query: 'Types of LLM Fine-Tuning'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.71s/it]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)





[{'id': 'doc_f539d1c4_56',
  'content': '1.10.3 Overview of the Report Structure\nThe rest of the report provides a comprehensive understanding of fine-tuning LLMs. The main chapters\ninclude an in-depth look at the fine-tuning pipeline, practical applications, model alignment, evaluation\nmetrics, and challenges. The concluding sections discuss the evolution of fine-tuning techniques, highlight\nongoing research challenges, and provide insights for researchers and practitioners.\n13',
  'metadata': {'subject': '',
   'doc_index': 56,
   'moddate': '2024-10-31T00:19:12+00:00',
   'source_file': 'LLM_finetuning.pdf',
   'page_label': '13',
   'source': '../data/pdf_files/LLM_finetuning.pdf',
   'trapped': '/False',
   'page': 14,
   'keywords': '',
   'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5',
   'author': '',
   'title': '',
   'producer': 'pdfTeX-1.40.25',
   'creationdate': '2024-10-31T00:19:12+00:00',
   'file_type': 

In [14]:
rag_retriever.retrieve("Challenges in Scaling Fine-Tuning Processes")

Retrieving documents for query: 'Challenges in Scaling Fine-Tuning Processes'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 19.50it/s]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)





[{'id': 'doc_1934b624_356',
  'content': 'Chapter 12\nOpen Challenges and Research\nDirections\n12.1 Scalability Issues\nThe fine-tuning of Large Language Models (LLMs) such as GPT-4, PaLM1 , and T52 has become a critical\narea of research, presenting several significant challenges and opening up new avenues for exploration,\nparticularly in scaling these processes efficiently. This discussion focuses on the two main aspects: the\nchallenges in scaling fine-tuning processes and potential research directions for scalable solutions.\n12.1.1 Challenges in Scaling Fine-Tuning Processes\n1. Computational Resources: Large-scale models such as GPT-3 and PaLM require enormous\ncomputational resources for fine-tuning. For instance, fine-tuning a 175-billion parameter model\nlike GPT-3 necessitates high-performance GPUs or TPUs capable of handling vast amounts of data\nand complex operations. The sheer volume of parameters translates to extensive computational',
  'metadata': {'total_pages': 115

# RAG Pipeline- VectorDB To LLM Output Generation

In [18]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
# from langchain.prompts import PromptTemplate
# from langchain.schema import HumanMessage, SystemMessage


In [21]:
### Initialize the Groq LLM (set your GROQ_API_KEY in environment)
groq_api_key = os.getenv("GROQ_API_KEY")

llm=ChatGroq(groq_api_key=groq_api_key,model_name="llama-3.1-8b-instant",temperature=0.1,max_tokens=1024)

## 2. Simple RAG function: retrieve context + generate response
def rag_simple(query,retriever,llm,top_k=3):
    ## retriever the context
    results=retriever.retrieve(query,top_k=top_k)
    context="\n\n".join([doc['content'] for doc in results]) if results else ""
    if not context:
        return "No relevant context found to answer the question."
    
    ## generate the answwer using GROQ LLM
    prompt=f"""Use the following context to answer the question concisely.
        Context:
        {context}

        Question: {query}

        Answer:"""
    
    response=llm.invoke([prompt.format(context=context,query=query)])
    return response.content

In [22]:
answer=rag_simple("Challenges in Scaling Fine-Tuning Processes",rag_retriever,llm)
print(answer)

Retrieving documents for query: 'Challenges in Scaling Fine-Tuning Processes'
Top K: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.74it/s]


Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
The main challenges in scaling fine-tuning processes for Large Language Models (LLMs) include:

1. Computational Resources: Large-scale models require enormous computational resources, including high-performance GPUs or TPUs, to handle vast amounts of data and complex operations.
2. The sheer volume of parameters in large models, such as GPT-3, necessitates extensive computational resources.


In [23]:
answer=rag_simple("Types of LLM Fine-Tuning",rag_retriever,llm)
print(answer)

Retrieving documents for query: 'Types of LLM Fine-Tuning'
Top K: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.65it/s]


Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
There are three types of LLM fine-tuning mentioned in the context:

1. Unsupervised Fine-Tuning
2. Supervised Fine-Tuning (SFT)
3. Instruction Fine-Tuning via Prompt Engineering


In [24]:
# --- Enhanced RAG Pipeline Features ---
def rag_advanced(query, retriever, llm, top_k=5, min_score=0.2, return_context=False):
    """
    RAG pipeline with extra features:
    - Returns answer, sources, confidence score, and optionally full context.
    """
    results = retriever.retrieve(query, top_k=top_k, score_threshold=min_score)
    if not results:
        return {'answer': 'No relevant context found.', 'sources': [], 'confidence': 0.0, 'context': ''}
    
    # Prepare context and sources
    context = "\n\n".join([doc['content'] for doc in results])
    sources = [{
        'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
        'page': doc['metadata'].get('page', 'unknown'),
        'score': doc['similarity_score'],
        'preview': doc['content'][:300] + '...'
    } for doc in results]
    confidence = max([doc['similarity_score'] for doc in results])
    
    # Generate answer
    prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {query}\n\nAnswer:"""
    response = llm.invoke([prompt.format(context=context, query=query)])
    
    output = {
        'answer': response.content,
        'sources': sources,
        'confidence': confidence
    }
    if return_context:
        output['context'] = context
    return output

# Example usage:
result = rag_advanced("Types of LLM Fine-Tuning", rag_retriever, llm, top_k=3, min_score=0.1, return_context=True)
print("Answer:", result['answer'])
print("Sources:", result['sources'])
print("Confidence:", result['confidence'])
print("Context Preview:", result['context'][:300])

Retrieving documents for query: 'Types of LLM Fine-Tuning'
Top K: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.87it/s]


Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Answer: There are three main types of LLM fine-tuning:

1. Unsupervised Fine-Tuning
2. Supervised Fine-Tuning (SFT)
3. Instruction Fine-Tuning via Prompt Engineering
Sources: [{'source': 'LLM_finetuning.pdf', 'page': 14, 'score': 0.5928163230419159, 'preview': '1.10.3 Overview of the Report Structure\nThe rest of the report provides a comprehensive understanding of fine-tuning LLMs. The main chapters\ninclude an in-depth look at the fine-tuning pipeline, practical applications, model alignment, evaluation\nmetrics, and challenges. The concluding sections disc...'}, {'source': 'LLM_finetuning.pdf', 'page': 2, 'score': 0.40476083755493164, 'preview': '1.6 Types of LLM Fine-Tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9\n1.6.1 Unsupervised Fine-Tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9\n1.6.2 Supervised Fine-Tuning (SFT) . . . . . . . . . . . . .

In [25]:
# --- Advanced RAG Pipeline: Streaming, Citations, History, Summarization ---
from typing import List, Dict, Any
import time

class AdvancedRAGPipeline:
    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm
        self.history = []  # Store query history

    def query(self, question: str, top_k: int = 5, min_score: float = 0.2, stream: bool = False, summarize: bool = False) -> Dict[str, Any]:
        # Retrieve relevant documents
        results = self.retriever.retrieve(question, top_k=top_k, score_threshold=min_score)
        if not results:
            answer = "No relevant context found."
            sources = []
            context = ""
        else:
            context = "\n\n".join([doc['content'] for doc in results])
            sources = [{
                'source': doc['metadata'].get('source_file', doc['metadata'].get('source', 'unknown')),
                'page': doc['metadata'].get('page', 'unknown'),
                'score': doc['similarity_score'],
                'preview': doc['content'][:120] + '...'
            } for doc in results]
            # Streaming answer simulation
            prompt = f"""Use the following context to answer the question concisely.\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer:"""
            if stream:
                print("Streaming answer:")
                for i in range(0, len(prompt), 80):
                    print(prompt[i:i+80], end='', flush=True)
                    time.sleep(0.05)
                print()
            response = self.llm.invoke([prompt.format(context=context, question=question)])
            answer = response.content

        # Add citations to answer
        citations = [f"[{i+1}] {src['source']} (page {src['page']})" for i, src in enumerate(sources)]
        answer_with_citations = answer + "\n\nCitations:\n" + "\n".join(citations) if citations else answer

        # Optionally summarize answer
        summary = None
        if summarize and answer:
            summary_prompt = f"Summarize the following answer in 2 sentences:\n{answer}"
            summary_resp = self.llm.invoke([summary_prompt])
            summary = summary_resp.content

        # Store query history
        self.history.append({
            'question': question,
            'answer': answer,
            'sources': sources,
            'summary': summary
        })

        return {
            'question': question,
            'answer': answer_with_citations,
            'sources': sources,
            'summary': summary,
            'history': self.history
        }

# Example usage:
adv_rag = AdvancedRAGPipeline(rag_retriever, llm)
result = adv_rag.query("Types of LLM Fine-Tuning", top_k=3, min_score=0.1, stream=True, summarize=True)
print("\nFinal Answer:", result['answer'])
print("Summary:", result['summary'])
print("History:", result['history'][-1])

Retrieving documents for query: 'Types of LLM Fine-Tuning'
Top K: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.70it/s]

Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Streaming answer:
Use the following context to answer the question concisely.
Context:
1.10.3 Overview of the Report Structure
The rest of the report provides a comprehensive understanding of fine-tuning LLMs. The main chapters
include an in-depth look at the fine-tuning pipeline, practical applications, model alignment, evaluation
metr




ics, and challenges. The concluding sections discuss the evolution of fine-tuning techniques, highlight
ongoing research challenges, and provide insights for researchers and practitioners.
13

1.6 Types of LLM Fine-Tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
1.6.1 Unsupervised Fine-Tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
1.6.2 Supervised Fine-Tuning (SFT) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9
1.6.3 Instruction Fine-Tuning via Prompt Engineering . . . . . . . . . . . . . . . . . . . 10
1.7 Pre-training vs Fine-tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10
1.8 Importance of Fine-Tuning LLMs . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 10
1.9 Retrieval Augmented Generation (RAG) . . . . . . . . . . . . . . . . . . . . . . . . . . . . 11
1.9.1 Traditional RAG Pipeline and Steps . . . . . . . . . . . . . . . . . . . . . . . . . . 11
1.9.2 Benefits of