In [None]:
pip install langchain langchain-community pdfplumber pymupdf faiss-cpu sentence-transformers

In [None]:
import os
import fitz  # PyMuPDF for PDF parsing
import pdfplumber
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    try:
        # Using PyMuPDF (fitz)
        with fitz.open(pdf_path) as doc:
            for page in doc:
                text += page.get_text("text") + "\n"
    except Exception as e:
        print("Error with PyMuPDF, trying pdfplumber:", e)
        try:
            # Using pdfplumber as an alternative
            with pdfplumber.open(pdf_path) as pdf:
                text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
        except Exception as e:
            print("Error extracting text from PDF:", e)
    return text

In [None]:
# Function to store text in FAISS vector database
def store_in_vector_db(text):
    # Split text into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_text(text)

    # Load embeddings model
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    # Store text in FAISS vector store
    vector_store = FAISS.from_texts(texts, embedding_model)

    # Save FAISS index for later use
    vector_store.save_local("faiss_index")
    print("Vector database saved!")


In [None]:
# Main function
def main():
    pdf_path = "/content/ncert maths.pdf"

    if not os.path.exists(pdf_path):
        print("File not found!")
        return

    print("Extracting text from PDF...")
    text = extract_text_from_pdf(pdf_path)

    if text:
        print("Text extracted successfully!")
        print("Storing in vector database...")
        store_in_vector_db(text)
    else:
        print("Failed to extract text.")

if __name__ == "__main__":
    main()


In [None]:
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

# Load FAISS index with safe deserialization
vector_store = FAISS.load_local(
    "faiss_index",
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
    allow_dangerous_deserialization=True  # Add this line
)

# Test query
query = "what are types of relations?"
results = vector_store.similarity_search(query, k=3)
print(results)

[Document(id='0fa0d73b-313a-4147-9dbe-6d744b549f1b', metadata={}, page_content='and their graphs. The concept of the term ‘relation’ in\nmathematics has been drawn from the meaning of relation\nin English language, according to which two objects or\nquantities are related if there is a recognisable connection\nor link between the two objects or quantities. Let  A be\nthe set of students of Class XII of a school and B be the\nset of students of Class XI of the same school. Then some\nof the examples of relations from A to B are\n(i)\n{(a, b) ∈A × B: a is brother of b},\n(ii)'), Document(id='cca1d164-796f-47f2-b869-4f4a73d3d6e4', metadata={}, page_content='of A × B.\nIf (a, b) ∈ R, we say that a is related to b under the relation R and we write as\na R b. In general, (a, b) ∈ R, we do not bother whether there is a recognisable\nconnection or link between a and b. As seen in Class XI, functions are special kind of\nrelations.\nIn this chapter, we will study different types of relations an