## Setup and Import Libraries

In [1]:
from langchain_community.document_loaders import Docx2txtLoader, UnstructuredWordDocumentLoader

## Method 1: Using DocxTxtLoader

In [2]:
try:
    loader = Docx2txtLoader("data/word_files/proposal.docx")
    documents = loader.load()
    print(f"  Loaded {len(documents)} documents")
    print(f"  Content Preview: {documents[0].page_content[:100]}")
    print(f"  Metadata: {documents[0].metadata}")
except Exception as e:
    print(f"Error: {e}")

  Loaded 1 documents
  Content Preview: Project Proposal: RAG Implementation

Executive Summary

This proposal outlines the implementation o
  Metadata: {'source': 'data/word_files/proposal.docx'}


## Method 2: Unstructured Word Document Loader

In [5]:
try:
    loader = UnstructuredWordDocumentLoader(
        file_path="data/word_files/proposal.docx", mode="elements")
    documents = loader.load()

    print(f"Loaded {len(documents)} documents")
    for i, document in enumerate(documents[:3]):
        print(f"  \nElement {i + 1}")
        print(f"  Content Preview: {document.page_content[:100]}")
        print(f"  Type: {document.metadata.get('category', 'unknown')}")
except Exception as e:
    print(f"Error: {e}")

Loaded 20 documents
  
Element 1
  Content Preview: Project Proposal: RAG Implementation
  Type: Title
  
Element 2
  Content Preview: Executive Summary
  Type: Title
  
Element 3
  Content Preview: This proposal outlines the implementation of a Retrieval-Augmented Generation system for our organiz
  Type: NarrativeText
