In [1]:
from docxlatex import Document
import re
import anthropic
from typing import List, Tuple

def extract_document_with_equations(docx_path: str) -> Tuple[str, List[str]]:
    """Extract full document text with LaTeX equations and separate equations list"""
    doc = Document(docx_path)
    
    # Customize delimiters for AI workflow compatibility
    doc.inline_delimiter = r"\(%"  # Custom inline marker
    doc.block_delimiter = r"\)%"   # Custom block marker
    
    full_text = doc.get_text()
    
    # Find equations using custom delimiters
    equations = re.findall(r'\(\%(.*?)\%\)', full_text, re.DOTALL)
    
    return full_text, equations

def analyze_with_claude(equation: str, context: str = "", 
                       domain: str = "AI workflows") -> str:
    """Analyze equations using Anthropic Claude with domain-specific context"""
    client = anthropic.Anthropic(api_key="your-anthropic-api-key")
    
    prompt = f"""Analyze this equation in the context of {domain}:
    
    {equation}
    
    Context from document: {context[:500]}...
    
    Provide:
    1. Mathematical purpose
    2. Key variables and their significance
    3. Potential applications in {domain}
    4. Suggested Python implementation approach
    """
    
    response = client.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=1000,
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    
    return response.content[0].text

def process_document(docx_path: str, domain: str = "financial systems") -> None:
    """Full processing pipeline with domain-specific analysis"""
    full_text, equations = extract_document_with_equations(docx_path)
    
    print(f"Found {len(equations)} equations in document")
    
    for idx, eq in enumerate(equations, 1):
        print(f"\nEquation {idx}: {eq}")
        analysis = analyze_with_claude(eq, full_text, domain)
        print(f"Claude Analysis:\n{analysis}")
        print("-" * 80)

if __name__ == "__main__":
    # Example usage for healthcare domain
    process_document("healthcare_report.docx", domain="healthcare analytics")


ModuleNotFoundError: No module named 'anthropic'

In [2]:
# Required installations first
# !pip install docxlatex anthropic

import docxlatex
import anthropic
import json
import re

# Configuration
DOCX_FILE = "your_document.docx"
ANTHROPIC_API_KEY = "your_api_key_here"

# 1. Extract document content with equations
doc = docxlatex.Document(DOCX_FILE)
doc.inline_delimiter = "[[EQUATION]]"
doc.block_delimiter = "[[BLOCK_EQUATION]]"
full_text = doc.get_text()

# 2. Extract equations using regex
equations = re.findall(r'\[\[(?:BLOCK_)?EQUATION\]\](.*?)\[\[/(?:BLOCK_)?EQUATION\]\]', full_text, re.DOTALL)

# 3. Initialize Anthropic client
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

# 4. Process each equation
results = []

for idx, eq in enumerate(equations):
    # Build JSON prompt template
    prompt_data = {
        "system": "Analyze this mathematical equation and provide technical insights:",
        "equation": eq.strip(),
        "requirements": {
            "components": True,
            "applications": ["AI workflows", "financial systems", "healthcare"],
            "implementation": {
                "python": True,
                "optimization": True
            }
        }
    }
    
    # Get Claude analysis
    response = client.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=1000,
        messages=[{
            "role": "user",
            "content": json.dumps(prompt_data, indent=2)
        }],
        response_format={"type": "json_object"}
    )
    
    # Store results
    results.append({
        "equation": eq,
        "analysis": json.loads(response.content[0].text)
    })

# 5. Output results
print(json.dumps(results, indent=2, ensure_ascii=False))


ModuleNotFoundError: No module named 'anthropic'