In [None]:
# Install dependencies
!pip install python-docx langchain faiss-cpu sentence-transformers

In [None]:
# Upload your .docx file
from google.colab import files
uploaded = files.upload()

# Parse .docx content
from docx import Document

def load_docx(file_path):
    doc = Document(file_path)
    full_text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
    return full_text

doc_text = load_docx("sop_manufacturing_dummy.docx")
print("Document Loaded:\n", doc_text[:500])  # Preview

In [None]:
# Split into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_text(doc_text)
print(f"Number of chunks: {len(chunks)}")

# Embed and create FAISS index
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_texts(chunks, embedding_model)

# QA with OpenAI (or you can use your local LLM later)
!pip install openai
import os
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"  # Replace with your key

from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

retriever = db.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), retriever=retriever)

# Ask a question
question = "What should be done before servicing the equipment?"
answer = qa_chain.run(question)
print(f"Q: {question}\nA: {answer}")
