# Installations

In [None]:
!curl -fsSL https://ollama.com/install.sh | sh
!ollama pull mistral:instruct

In [None]:
%pip install pypdf -q
%pip install faiss-cpu -q
%pip install -U langchain-community
%pip install langchain-ollama
%pip install colorama

# imports

In [2]:
from typing import List, Dict, Optional
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.document_loaders import (
    PyPDFLoader,
    CSVLoader,
    Docx2txtLoader,
    UnstructuredExcelLoader
)
from langchain.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from colorama import Fore

# RAG

In [3]:
class RAG:
    def __init__(self):
        self.embedder:object = OllamaEmbeddings(model="mistral:instruct")
        self.llm:object = ChatOllama(model="mistral:instruct")
        self.text_splitter:object = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
        )
        self.vector_db = object
    
    def load_documents(self, paths: List[str]) -> List[Document]:
        """Load and split documents from various file formats.
        
        Supported formats:
        - PDF (.pdf) - uses PyPDFLoader
        - CSV (.csv) - uses CSVLoader
        - Word (.docx) - uses Docx2txtLoader
        - Excel (.xlsx) - uses UnstructuredExcelLoader
        """
        documents: list = []
        for path in paths:
            try:
                if path.endswith('.pdf'):
                    loader = PyPDFLoader(path)
                    documents.extend(loader.load_and_split(self.text_splitter))
                elif path.endswith('.csv'):
                    loader = CSVLoader(path)
                    documents.extend(loader.load_and_split(self.text_splitter))
                elif path.endswith('.docx'):
                    loader = Docx2txtLoader(path)
                    documents.extend(loader.load_and_split(self.text_splitter))
                elif path.endswith('.xlsx'):
                    loader = UnstructuredExcelLoader(path)
                    documents.extend(loader.load_and_split(self.text_splitter))
            except Exception as e:
                print(f"Couldn't process {path}: {e}")
        return documents
    
    def create_vectorstore(self, documents: List[Document]) -> None:
        """Create our vector database"""
        if not documents:
            raise ValueError("No documents were successfully loaded")
        self.vector_db = FAISS.from_documents(documents, self.embedder)
    
    def analyze(self, pdf_paths: List[str], query: str):
        """Complete analysis pipeline"""
        print(Fore.LIGHTBLUE_EX,f"\nAnalyzing {len(pdf_paths)} documents...")
        
        # 1. Load documents
        documents = self.load_documents(pdf_paths)
        if not documents:
            return "No valid documents could be loaded"
        
        # 2. Create vector store
        self.create_vectorstore(documents)
        
        # 3. Retrieve relevant information
        results = self.vector_db.similarity_search(query, k=3)
        context = "\n\n".join(doc.page_content for doc in results)
        
        # 4. Generate answer
        prompt = f"""
        QUESTION: {query}
        
        RELEVANT INFORMATION:
        {context}
        
        Please provide a comprehensive answer to the question using the above information.
        """
        response = self.llm.invoke(prompt)
        
        print("\n📝 RESULTS:\n")
        return response.content


In [4]:
analyzer:object = RAG()

documents:list[str] = ['java.pdf','python.pdf','rust.pdf']

prompt1:str = "tell me about python"

result1 = analyzer.analyze(documents, prompt1)
print(result1,'\n')

prompt2:str = "what is the difference between python , java and rust ?"

result2 = analyzer.analyze(documents, prompt2)
print(result2)

[94m 
Analyzing 3 documents...



📝 RESULTS:

 Python is a versatile and influential programming language that has played a significant role in modern computing, particularly in education. Its creator designed Python with a focus on simplicity and readability, aiming for a language that would allow programmers to write code more naturally. This philosophy has contributed to Python's gentle learning curve, making it an ideal choice for introducing students to programming.

Python stands out from other programming languages due to its clean, straightforward code and the absence of strict syntax and verbose structures. It encourages experimentation and quick learning, empowering a new generation of programmers by lowering the barrier to entry, whether they are formal students or self-learners.

One aspect where Python may not perform as well is in performance-critical tasks like operating system development or real-time applications, given that it is an interpreted language and generally slower than compiled languages su