# Abstraction

In [2]:
from abc import ABC, abstractmethod
from typing import List, Dict, Optional
from datetime import datetime
import hashlib
import uuid

# Abstract base class for document processing
class DocumentProcessor(ABC):
    def __init__(self, title: str, author: str):
        self._title = title
        self._author = author
        self._created_at = datetime.now()
        self._id = str(uuid.uuid4())
        self._content_hash = None
        self._metadata: Dict[str, str] = {}

    # Abstract properties
    @property
    @abstractmethod
    def content(self) -> str:
        """Abstract property for document content"""
        pass

    @content.setter
    @abstractmethod
    def content(self, value: str):
        pass

    # Abstract methods
    @abstractmethod
    def validate(self) -> bool:
        """Validate the document's integrity"""
        pass

    @abstractmethod
    def process(self) -> str:
        """Process the document and return result"""
        pass

    # Concrete methods
    def get_metadata(self) -> Dict[str, str]:
        return {
            "id": self._id,
            "title": self._title,
            "author": self._author,
            "created_at": self._created_at.isoformat(),
            **self._metadata
        }

    def _update_hash(self, content: str):
        """Update content hash for integrity checking"""
        self._content_hash = hashlib.sha256(content.encode()).hexdigest()

    def verify_integrity(self) -> bool:
        """Verify document hasn't been tampered with"""
        current_hash = hashlib.sha256(self.content.encode()).hexdigest()
        return current_hash == self._content_hash

In [3]:
# Concrete class for text documents
class TextDocument(DocumentProcessor):
    def __init__(self, title: str, author: str):
        super().__init__(title, author)
        self._content = ""
        self._metadata["type"] = "text"

    @property
    def content(self) -> str:
        return self._content

    @content.setter
    def content(self, value: str):
        self._content = value
        self._update_hash(value)
        self._metadata["word_count"] = str(len(value.split()))

    def validate(self) -> bool:
        if not self._content.strip():
            return False
        if len(self._content) > 10000:  # Arbitrary max length
            return False
        return True

    def process(self) -> str:
        if not self.validate():
            raise ValueError("Invalid text document")
        return f"Processed text document: {self._title}\nSummary: {self._content[:50]}..."

In [4]:
# Concrete class for PDF documents
class PDFDocument(DocumentProcessor):
    def __init__(self, title: str, author: str, page_count: int):
        super().__init__(title, author)
        self._content = ""  # Simulated PDF content
        self._page_count = page_count
        self._metadata["type"] = "pdf"
        self._metadata["page_count"] = str(page_count)

    @property
    def content(self) -> str:
        return self._content

    @content.setter
    def content(self, value: str):
        self._content = value
        self._update_hash(value)

    def validate(self) -> bool:
        if self._page_count <= 0:
            return False
        if not self._content:
            return False
        return True

    def process(self) -> str:
        if not self.validate():
            raise ValueError("Invalid PDF document")
        return f"Processed PDF document: {self._title}\nPages: {self._page_count}"

In [5]:
# Document management system
class DocumentManager:
    def __init__(self):
        self.documents: List[DocumentProcessor] = []

    def add_document(self, doc: DocumentProcessor):
        self.documents.append(doc)

    def process_all(self) -> List[str]:
        results = []
        for doc in self.documents:
            try:
                results.append(doc.process())
                if not doc.verify_integrity():
                    results.append(f"Warning: Integrity check failed for {doc.get_metadata()['id']}")
            except ValueError as e:
                results.append(f"Error processing {doc.get_metadata()['id']}: {str(e)}")
        return results

    def get_document_info(self, doc_id: str) -> Optional[Dict[str, str]]:
        for doc in self.documents:
            if doc.get_metadata()["id"] == doc_id:
                return doc.get_metadata()
        return None

In [6]:
# Demonstration
def main():
    # Create document manager
    manager = DocumentManager()

    # Create and configure text document
    text_doc = TextDocument("Research Paper", "Dr. Smith")
    text_doc.content = "This is a sample research paper about Python OOP concepts"
    
    # Create and configure PDF document
    pdf_doc = PDFDocument("User Manual", "Tech Corp", 15)
    pdf_doc.content = "PDF content about product usage"
    
    # Add documents to manager
    manager.add_document(text_doc)
    manager.add_document(pdf_doc)

    # Process all documents
    print("Processing documents:")
    results = manager.process_all()
    for result in results:
        print(result)

    # Display metadata
    print("\nDocument metadata:")
    for doc in manager.documents:
        print(f"ID: {doc.get_metadata()['id']}")
        for key, value in doc.get_metadata().items():
            print(f"  {key}: {value}")

    # Demonstrate integrity check
    print("\nTesting integrity:")
    text_doc.content = "Modified content"  # Tamper with content
    print(f"Text document integrity: {text_doc.verify_integrity()}")

if __name__ == "__main__":
    main()

Processing documents:
Processed text document: Research Paper
Summary: This is a sample research paper about Python OOP c...
Processed PDF document: User Manual
Pages: 15

Document metadata:
ID: cfb422f0-5ab6-49a6-a9a2-86aab837f63e
  id: cfb422f0-5ab6-49a6-a9a2-86aab837f63e
  title: Research Paper
  author: Dr. Smith
  created_at: 2025-03-01T16:33:05.053234
  type: text
  word_count: 10
ID: cfc1fe73-f150-4ee9-a6fd-37ec6d8315f1
  id: cfc1fe73-f150-4ee9-a6fd-37ec6d8315f1
  title: User Manual
  author: Tech Corp
  created_at: 2025-03-01T16:33:05.053352
  type: pdf
  page_count: 15

Testing integrity:
Text document integrity: True
