In [None]:
!pip install -U \langchain \langchain-community \langchain-text-splitters \unstructured \
python-docx \
python-pptx \
openpyxl \
pandas \
msoffcrypto-tool \
faiss-cpu \
sentence-transformers
!pip install -U langchain langchain-community langchain-core
!pip install rank_bm25 sentence-transformers langchain chromadb pypdf faiss-cpu  langchain_community scikit-learn matplotlib seaborn numpy mistralai langchain-mistralai langchain_classic


In [None]:
import os
import pandas as pd
from langchain_community.document_loaders import (
    PyPDFLoader,
    TextLoader,
    UnstructuredWordDocumentLoader,
    UnstructuredPowerPointLoader,
    UnstructuredExcelLoader,
    DataFrameLoader
)

def load_documents(file_paths):
    all_docs = []

    for path in file_paths:
        ext = os.path.splitext(path)[1].lower()

        if ext == ".pdf":
            docs = PyPDFLoader(path).load()

        elif ext == ".txt":
            docs = TextLoader(path, encoding="utf-8").load()

        elif ext in [".doc", ".docx"]:
            docs = UnstructuredWordDocumentLoader(path).load()

        elif ext in [".ppt", ".pptx"]:
            docs = UnstructuredPowerPointLoader(path).load()

        elif ext in [".xls", ".xlsx"]:
            try:
                docs = UnstructuredExcelLoader(path).load()
            except:
                df = pd.read_excel(path)
                docs = DataFrameLoader(df, page_content_column=df.columns[0]).load()

        else:
            continue

        all_docs.extend(docs)

    return all_docs


In [None]:
file_paths = [
    "/content/Day 2.pdf",
    "/content/Interstellar Story.txt",
    "/content/Facial Expression Detection.docx",
    "/content/Intruder-Detection-System-using-.pptx",
    "/content/3_Logical_Functions.xlsx"
]

docs = load_documents(file_paths)

print("Documents loaded:", len(docs))


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=50
)

chunks = splitter.split_documents(docs)
print("Total chunks:", len(chunks))

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


In [None]:
import os
from getpass import getpass

if not os.getenv("MISTRAL_API_KEY"):
    os.environ["MISTRAL_API_KEY"] = getpass("Enter your Mistral API Key: ")

In [None]:

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import UnstructuredPowerPointLoader
from langchain_community.document_loaders import UnstructuredExcelLoader
from langchain_community.document_loaders import UnstructuredWordDocumentLoader

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS,Chroma
from langchain_classic.schema import Document
from langchain_core.prompts import ChatPromptTemplate
import os

In [None]:
import langchain
print(langchain.__version__)


In [None]:
from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(model="mistral-tiny")

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain



prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context:


{context}


Question: {input}
""")





In [None]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
import os
os.environ["MISTRAL_API_KEY"]

In [None]:
rag_chain = create_retrieval_chain(
    retriever,
    document_chain
)

user_query = input("Enter Your Question : ")
response = rag_chain.invoke({"input": user_query})

print("\n ----THE RESPONSE IS----")
print(response["answer"])