In [2]:
#!/usr/bin/env python3
"""
Montesquieu Digital Humanities Analysis - FIXED VERSION
LLM-based Translation and Semantic Comparison for ALL chapters
"""

# %% Import libraries
import json
import hashlib
import requests
from datetime import datetime
from typing import Dict, List, Tuple, Optional
import pandas as pd
import numpy as np
from pathlib import Path
import openai
from anthropic import Anthropic
import time
from itertools import combinations  # Added this import

# %% Configuration
class Config:
    """Configuration for reproducible analysis"""
    MODEL_TYPE = "anthropic"
    
    # API Keys
    OPENAI_API_KEY = 
    ANTHROPIC_API_KEY = 
    
    # Model versions
    OPENAI_MODEL = "gpt-4-0613"
    ANTHROPIC_MODEL = "claude-3-5-sonnet-20241022"
    
    # Generation parameters
    TEMPERATURE = 0.3
    MAX_TOKENS = 2000
    SEED = 42
    
    # File paths
    SOURCE_DATA_URL = "https://raw.githubusercontent.com/lokalkosmos/Lesprit/refs/heads/main/montesquieu_editions.json"
    OUTPUT_DIR = Path("montesquieu_llm_output")
    CACHE_FILE = "llm_cache.json"
    
    # Analysis parameters
    BATCH_SIZE = 5
    RATE_LIMIT_DELAY = 2

# %% Data Loading
def load_montesquieu_data():
    """Load the original Montesquieu data"""
    print("Loading Montesquieu data from GitHub...")
    response = requests.get(Config.SOURCE_DATA_URL)
    data = response.json()
    print(f"Loaded {len(data)} languages with {sum(len(editions) for editions in data.values())} total editions")
    return data

# %% LLM Interface
class LLMInterface:
    """Unified interface for different LLM providers"""
    
    def __init__(self, provider: str = "anthropic"):
        self.provider = provider
        self.cache = self._load_cache()
        
        if provider == "anthropic":
            self.client = Anthropic(api_key=Config.ANTHROPIC_API_KEY)
            self.model = Config.ANTHROPIC_MODEL
        elif provider == "openai":
            openai.api_key = Config.OPENAI_API_KEY
            self.model = Config.OPENAI_MODEL
    
    def _load_cache(self):
        """Load cached responses"""
        try:
            with open(Config.CACHE_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except:
            return {}
    
    def _save_cache(self):
        """Save cache to file"""
        with open(Config.CACHE_FILE, 'w', encoding='utf-8') as f:
            json.dump(self.cache, f, indent=2, ensure_ascii=False)
    
    def _get_cache_key(self, prompt: str, function: str):
        """Generate cache key"""
        prompt_hash = hashlib.sha256(prompt.encode()).hexdigest()
        return f"{self.provider}_{self.model}_{function}_{prompt_hash}"
    
    def translate_text(self, text: str, source_lang: str, target_lang: str = "English") -> Dict:
        """Translate historical text with scholarly precision"""
        
        prompt = f"""You are an expert translator specializing in 18th-century philosophical texts, 
particularly the works of Montesquieu. Your task is to translate the following {source_lang} 
text to {target_lang} while:

1. Preserving the exact philosophical terminology and concepts
2. Maintaining period-appropriate language and style
3. Keeping sentence structure as close to the original as possible
4. Noting any untranslatable concepts or terms

Original {source_lang} text:
"{text}"

Provide your translation in the following JSON format:
{{
    "translation": "your translation here",
    "translation_notes": ["note about specific terms or choices"],
    "untranslatable_terms": [{{"term": "original", "explanation": "why untranslatable"}}],
    "confidence": 0.0-1.0
}}"""

        cache_key = self._get_cache_key(prompt, "translate")
        
        # Check cache
        if cache_key in self.cache:
            print(f"Using cached translation for {source_lang} text")
            return self.cache[cache_key]
        
        # Generate new translation
        print(f"Translating {source_lang} text ({len(text)} chars)...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        # Parse JSON response
        try:
            result = json.loads(content)
        except:
            result = {
                "translation": content,
                "translation_notes": ["Raw response - JSON parsing failed"],
                "untranslatable_terms": [],
                "confidence": 0.7
            }
        
        # Add metadata
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "source_language": source_lang,
            "target_language": target_lang,
            "timestamp": datetime.now().isoformat(),
            "prompt_hash": hashlib.sha256(prompt.encode()).hexdigest()
        }
        
        # Cache result
        self.cache[cache_key] = result
        self._save_cache()
        
        # Rate limiting
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result
    
    def semantic_comparison(self, text1: str, text2: str, metadata1: Dict, metadata2: Dict) -> Dict:
        """Perform detailed semantic comparison"""
        
        prompt = f"""You are a digital humanities scholar specializing in 18th-century philosophy 
and textual criticism. Perform a detailed semantic comparison of these two versions of 
Montesquieu's text, identifying:

1. Conceptual differences (changes in philosophical meaning)
2. Rhetorical variations (changes in argumentative structure)
3. Lexical shifts (word choice differences and their implications)
4. Syntactic changes (sentence structure modifications)
5. Cultural/historical adaptations

Text 1 ({metadata1.get('language', 'Unknown')} - {metadata1.get('year', 'Unknown')}):
"{text1}"

Text 2 ({metadata2.get('language', 'Unknown')} - {metadata2.get('year', 'Unknown')}):
"{text2}"

Provide your analysis in the following JSON structure:
{{
    "overall_similarity": 0.0-1.0,
    "conceptual_changes": [
        {{
            "type": "addition|deletion|modification",
            "concept": "philosophical concept affected",
            "text1_excerpt": "relevant excerpt",
            "text2_excerpt": "relevant excerpt", 
            "significance": "why this matters philosophically",
            "confidence": 0.0-1.0
        }}
    ],
    "rhetorical_shifts": [
        {{
            "element": "rhetorical element",
            "change": "description of change",
            "effect": "impact on argument"
        }}
    ],
    "lexical_analysis": {{
        "key_terms_added": ["term1", "term2"],
        "key_terms_removed": ["term1", "term2"],
        "semantic_field_shifts": ["description of shifts"]
    }},
    "historical_context": "relevant historical/cultural factors",
    "overall_assessment": "synthesis of the comparison"
}}"""

        cache_key = self._get_cache_key(prompt, "semantic_compare")
        
        if cache_key in self.cache:
            print("Using cached semantic comparison")
            return self.cache[cache_key]
        
        print("Performing semantic comparison...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        try:
            result = json.loads(content)
        except:
            result = {
                "error": "JSON parsing failed",
                "raw_response": content,
                "overall_similarity": 0.5
            }
        
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "comparison": f"{metadata1.get('language')}-{metadata1.get('year')} vs {metadata2.get('language')}-{metadata2.get('year')}",
            "timestamp": datetime.now().isoformat()
        }
        
        self.cache[cache_key] = result
        self._save_cache()
        
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result

# %% FIXED Main Analysis Pipeline
class MontesquieuAnalyzer:
    """Main analysis pipeline - FIXED to accumulate all chapters"""
    
    def __init__(self, llm_interface: LLMInterface):
        self.llm = llm_interface
        self.data = load_montesquieu_data()
        
        # FIXED: Initialize results once and keep accumulating
        self.all_translations = {}
        self.all_comparisons = {}
        
        # Create output directory
        Config.OUTPUT_DIR.mkdir(exist_ok=True)
    
    def translate_all_texts(self, chapter: str = "1", languages: List[str] = None):
        """Translate all non-English texts for a specific chapter"""
        
        if languages is None:
            languages = [lang for lang in self.data.keys() if lang != "English"]
        
        print(f"\n=== Translating Chapter {chapter} ===")
        
        for language in languages:
            if language == "English":
                continue
                
            print(f"\nProcessing {language} editions...")
            
            for edition in self.data[language]:
                year = edition['Year Published']
                publisher = edition['Publisher']
                
                # Get chapter text
                chapter_data = edition['Chapters'].get(chapter)
                if not chapter_data:
                    continue
                
                print(f"  {year} - {publisher}")
                
                # Create unique key
                key = f"{language}_{year}_ch{chapter}"
                
                # Translate
                translation_result = self.llm.translate_text(
                    text=chapter_data['Text'],
                    source_lang=language
                )
                
                # Store result in CUMULATIVE dictionary
                self.all_translations[key] = {
                    "language": language,
                    "year": year,
                    "publisher": publisher,
                    "chapter": chapter,
                    "original_title": chapter_data['Title'],
                    "original_text": chapter_data['Text'],
                    **translation_result
                }
        
        # Save cumulative translations after each chapter
        self._save_cumulative_results("translations")
    
    def compare_editions(self, chapter: str = "1", comparison_type: str = "all"):
        """Compare editions - all pairwise combinations"""
        
        print(f"\n=== Comparing Editions - Chapter {chapter} ===")
        
        if comparison_type == "all" or comparison_type == "chronological":
            # Compare ALL pairs within each language
            for language in self.data.keys():
                editions = sorted(self.data[language], key=lambda x: x['Year Published'])
                
                if len(editions) < 2:
                    continue
                
                print(f"\nComparing {language} editions (all pairs)...")
                
                # Get all possible pairs
                for ed1, ed2 in combinations(editions, 2):
                    # Get texts
                    if language == "English":
                        text1 = ed1['Chapters'][chapter]['Text']
                        text2 = ed2['Chapters'][chapter]['Text']
                    else:
                        # Use translations
                        key1 = f"{language}_{ed1['Year Published']}_ch{chapter}"
                        key2 = f"{language}_{ed2['Year Published']}_ch{chapter}"
                        
                        if key1 in self.all_translations and key2 in self.all_translations:
                            text1 = self.all_translations[key1]["translation"]
                            text2 = self.all_translations[key2]["translation"]
                        else:
                            continue
                    
                    # Compare
                    comparison = self.llm.semantic_comparison(
                        text1, text2,
                        {"language": language, "year": ed1['Year Published']},
                        {"language": language, "year": ed2['Year Published']}
                    )
                    
                    # Store
                    comp_key = f"{language}_{ed1['Year Published']}_vs_{ed2['Year Published']}_ch{chapter}"
                    self.all_comparisons[comp_key] = comparison
                    
                    print(f"  Compared {ed1['Year Published']} vs {ed2['Year Published']}")
        
        if comparison_type == "all" or comparison_type == "cross-linguistic":
            # Compare ALL editions across languages
            print(f"\nComparing across languages...")
            
            # Get all language pairs
            languages = list(self.data.keys())
            for lang1, lang2 in combinations(languages, 2):
                print(f"\n  Comparing {lang1} vs {lang2}...")
                
                # Compare ALL editions between these languages
                for ed1 in self.data[lang1]:
                    for ed2 in self.data[lang2]:
                        # Get texts (use translations for non-English)
                        if lang1 == "English":
                            text1 = ed1['Chapters'][chapter]['Text']
                        else:
                            key1 = f"{lang1}_{ed1['Year Published']}_ch{chapter}"
                            if key1 in self.all_translations:
                                text1 = self.all_translations[key1]["translation"]
                            else:
                                continue
                                
                        if lang2 == "English":
                            text2 = ed2['Chapters'][chapter]['Text']
                        else:
                            key2 = f"{lang2}_{ed2['Year Published']}_ch{chapter}"
                            if key2 in self.all_translations:
                                text2 = self.all_translations[key2]["translation"]
                            else:
                                continue
                        
                        # Compare
                        comparison = self.llm.semantic_comparison(
                            text1, text2,
                            {"language": lang1, "year": ed1['Year Published']},
                            {"language": lang2, "year": ed2['Year Published']}
                        )
                        
                        comp_key = f"{lang1}_{ed1['Year Published']}_vs_{lang2}_{ed2['Year Published']}_ch{chapter}"
                        self.all_comparisons[comp_key] = comparison
                        
                        print(f"    {lang1} {ed1['Year Published']} vs {lang2} {ed2['Year Published']}")
        
        # Save cumulative comparisons after each chapter
        self._save_cumulative_results("comparisons")   
    
    def _save_cumulative_results(self, result_type: str):
        """Save CUMULATIVE results to JSON file"""
        if result_type == "translations":
            data = self.all_translations
        else:
            data = self.all_comparisons
            
        filename = Config.OUTPUT_DIR / f"montesquieu_{result_type}_llm.json"
        
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        print(f"Saved {len(data)} cumulative {result_type} to {filename}")

# %% FIXED Main execution
def run_full_analysis():
    """Run complete analysis for ALL chapters"""
    
    print("=== Montesquieu FULL LLM Analysis Pipeline ===")
    
    # Initialize LLM interface ONCE
    llm = LLMInterface(provider=Config.MODEL_TYPE)
    
    # Initialize analyzer ONCE
    analyzer = MontesquieuAnalyzer(llm)
    
    # Process ALL chapters accumulating results
    for chapter in ["1", "2", "3", "4", "5", "6"]:
        print(f"\n{'='*50}")
        print(f"PROCESSING CHAPTER {chapter}")
        print(f"{'='*50}")
        
        # Translate all texts for this chapter
        analyzer.translate_all_texts(chapter=chapter, languages=None)  # All languages
        
        # Compare editions for this chapter - changed to "all"
        analyzer.compare_editions(chapter=chapter, comparison_type="all")
    
    # Create final web integration file
    create_web_integration_file()
    
    print("\n✅ Analysis complete for ALL chapters and languages!")
    print(f"Total translations: {len(analyzer.all_translations)}")
    print(f"Total comparisons: {len(analyzer.all_comparisons)}")
    
    return analyzer

def create_web_integration_file():
    """Create JSON file for web interface integration"""
    
    # Load cumulative results
    with open(Config.OUTPUT_DIR / "montesquieu_translations_llm.json", 'r', encoding='utf-8') as f:
        translations = json.load(f)
    
    with open(Config.OUTPUT_DIR / "montesquieu_comparisons_llm.json", 'r', encoding='utf-8') as f:
        comparisons = json.load(f)
    
    # Format for web interface
    web_data = {
        "translations": translations,
        "semantic_analysis": {},
        "visualization": {
            "supported": True,
            "types": ["differences", "confidence", "evolution"]
        }
    }
    
    # Process comparisons into web-friendly format
    for comp_key, comp_data in comparisons.items():
        if "conceptual_changes" in comp_data:
            web_data["semantic_analysis"][comp_key] = {
                "changes": comp_data["conceptual_changes"],
                "similarity": comp_data.get("overall_similarity", 0),
                "summary": comp_data.get("overall_assessment", "")
            }
    
    # Save
    with open(Config.OUTPUT_DIR / "montesquieu_web_data.json", 'w', encoding='utf-8') as f:
        json.dump(web_data, f, indent=2, ensure_ascii=False)
    
    print(f"Web integration file created with {len(translations)} translations and {len(comparisons)} comparisons")

# %% Execute
if __name__ == "__main__":
    analyzer = run_full_analysis()

=== Montesquieu FULL LLM Analysis Pipeline ===
Loading Montesquieu data from GitHub...
Loaded 5 languages with 15 total editions

PROCESSING CHAPTER 1

=== Translating Chapter 1 ===

Processing French editions...
  1748 - Barillot & fils, Genève
Using cached translation for French text
  1750 - Huart & Moreau, Paris, chez Barrillot & Fils, Genève
Using cached translation for French text
  1758 - Arkstee & Merkus, Amsterdam & Leipzig / Huart, Paris
Using cached translation for French text
  1784 - Sanson & Compagnie, Deux-Ponts
Using cached translation for French text
  1803 - Didot, Paris
Using cached translation for French text

Processing Italian editions...
  1750 - Simone, Naples
Using cached translation for Italian text
  1777 - Terres, Napoli
Using cached translation for Italian text

Processing German editions...
  1753 - S.N., Frankfurt & Leipzig
Using cached translation for German text
  1799 - Wien & Bauer, Vienna
Using cached translation for German text

Processing Polish ed

In [2]:
#!/usr/bin/env python3
"""
Montesquieu Digital Humanities Analysis - FIXED VERSION
LLM-based Translation and Semantic Comparison for ALL chapters
"""

# %% Import libraries
import json
import hashlib
import requests
from datetime import datetime
from typing import Dict, List, Tuple, Optional
import pandas as pd
import numpy as np
from pathlib import Path
import openai
from anthropic import Anthropic
import time
from itertools import combinations  # Added this import

# %% Configuration
class Config:
    """Configuration for reproducible analysis"""
    MODEL_TYPE = "anthropic"
    
    # API Keys
    OPENAI_API_KEY = 
    ANTHROPIC_API_KEY = 
    
    # Model versions
    OPENAI_MODEL = "gpt-4-0613"
    ANTHROPIC_MODEL = "claude-3-5-sonnet-20241022"
    
    # Generation parameters
    TEMPERATURE = 0.3
    MAX_TOKENS = 2000
    SEED = 42
    
    # File paths
    SOURCE_DATA_URL = "https://raw.githubusercontent.com/lokalkosmos/Lesprit/refs/heads/main/montesquieu_editions.json"
    OUTPUT_DIR = Path("montesquieu_llm_output")
    CACHE_FILE = "llm_cache.json"
    
    # Analysis parameters
    BATCH_SIZE = 5
    RATE_LIMIT_DELAY = 2

# %% Data Loading
def load_montesquieu_data():
    """Load the original Montesquieu data"""
    print("Loading Montesquieu data from GitHub...")
    response = requests.get(Config.SOURCE_DATA_URL)
    data = response.json()
    print(f"Loaded {len(data)} languages with {sum(len(editions) for editions in data.values())} total editions")
    return data

# %% LLM Interface
class LLMInterface:
    """Unified interface for different LLM providers"""
    
    def __init__(self, provider: str = "anthropic"):
        self.provider = provider
        self.cache = self._load_cache()
        
        if provider == "anthropic":
            self.client = Anthropic(api_key=Config.ANTHROPIC_API_KEY)
            self.model = Config.ANTHROPIC_MODEL
        elif provider == "openai":
            openai.api_key = Config.OPENAI_API_KEY
            self.model = Config.OPENAI_MODEL
    
    def _load_cache(self):
        """Load cached responses"""
        try:
            with open(Config.CACHE_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except:
            return {}
    
    def _save_cache(self):
        """Save cache to file"""
        with open(Config.CACHE_FILE, 'w', encoding='utf-8') as f:
            json.dump(self.cache, f, indent=2, ensure_ascii=False)
    
    def _get_cache_key(self, prompt: str, function: str):
        """Generate cache key"""
        prompt_hash = hashlib.sha256(prompt.encode()).hexdigest()
        return f"{self.provider}_{self.model}_{function}_{prompt_hash}"
    
    def translate_text(self, text: str, source_lang: str, target_lang: str = "English") -> Dict:
        """Translate historical text with scholarly precision"""
        
        prompt = f"""You are an expert translator specializing in 18th-century philosophical texts, 
particularly the works of Montesquieu. Your task is to translate the following {source_lang} 
text to {target_lang} while:

1. Preserving the exact philosophical terminology and concepts
2. Maintaining period-appropriate language and style
3. Keeping sentence structure as close to the original as possible
4. Noting any untranslatable concepts or terms

Original {source_lang} text:
"{text}"

Provide your translation in the following JSON format:
{{
    "translation": "your translation here",
    "translation_notes": ["note about specific terms or choices"],
    "untranslatable_terms": [{{"term": "original", "explanation": "why untranslatable"}}],
    "confidence": 0.0-1.0
}}"""

        cache_key = self._get_cache_key(prompt, "translate")
        
        # Check cache
        if cache_key in self.cache:
            print(f"Using cached translation for {source_lang} text")
            return self.cache[cache_key]
        
        # Generate new translation
        print(f"Translating {source_lang} text ({len(text)} chars)...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        # Parse JSON response
        try:
            result = json.loads(content)
        except:
            result = {
                "translation": content,
                "translation_notes": ["Raw response - JSON parsing failed"],
                "untranslatable_terms": [],
                "confidence": 0.7
            }
        
        # Add metadata
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "source_language": source_lang,
            "target_language": target_lang,
            "timestamp": datetime.now().isoformat(),
            "prompt_hash": hashlib.sha256(prompt.encode()).hexdigest()
        }
        
        # Cache result
        self.cache[cache_key] = result
        self._save_cache()
        
        # Rate limiting
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result
    
    def semantic_comparison(self, text1: str, text2: str, metadata1: Dict, metadata2: Dict) -> Dict:
        """Perform detailed semantic comparison"""
        
        prompt = f"""You are a digital humanities scholar specializing in 18th-century philosophy 
and textual criticism. Perform a detailed semantic comparison of these two versions of 
Montesquieu's text, identifying:

1. Conceptual differences (changes in philosophical meaning)
2. Rhetorical variations (changes in argumentative structure)
3. Lexical shifts (word choice differences and their implications)
4. Syntactic changes (sentence structure modifications)
5. Cultural/historical adaptations

Text 1 ({metadata1.get('language', 'Unknown')} - {metadata1.get('year', 'Unknown')}):
"{text1}"

Text 2 ({metadata2.get('language', 'Unknown')} - {metadata2.get('year', 'Unknown')}):
"{text2}"

Provide your analysis in the following JSON structure:
{{
    "overall_similarity": 0.0-1.0,
    "conceptual_changes": [
        {{
            "type": "addition|deletion|modification",
            "concept": "philosophical concept affected",
            "text1_excerpt": "relevant excerpt",
            "text2_excerpt": "relevant excerpt", 
            "significance": "why this matters philosophically",
            "confidence": 0.0-1.0
        }}
    ],
    "rhetorical_shifts": [
        {{
            "element": "rhetorical element",
            "change": "description of change",
            "effect": "impact on argument"
        }}
    ],
    "lexical_analysis": {{
        "key_terms_added": ["term1", "term2"],
        "key_terms_removed": ["term1", "term2"],
        "semantic_field_shifts": ["description of shifts"]
    }},
    "historical_context": "relevant historical/cultural factors",
    "overall_assessment": "synthesis of the comparison"
}}"""

        cache_key = self._get_cache_key(prompt, "semantic_compare")
        
        if cache_key in self.cache:
            print("Using cached semantic comparison")
            return self.cache[cache_key]
        
        print("Performing semantic comparison...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        try:
            result = json.loads(content)
        except:
            result = {
                "error": "JSON parsing failed",
                "raw_response": content,
                "overall_similarity": 0.5
            }
        
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "comparison": f"{metadata1.get('language')}-{metadata1.get('year')} vs {metadata2.get('language')}-{metadata2.get('year')}",
            "timestamp": datetime.now().isoformat()
        }
        
        self.cache[cache_key] = result
        self._save_cache()
        
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result

# %% FIXED Main Analysis Pipeline
class MontesquieuAnalyzer:
    """Main analysis pipeline - FIXED to accumulate all chapters"""
    
    def __init__(self, llm_interface: LLMInterface):
        self.llm = llm_interface
        self.data = load_montesquieu_data()
        
        # FIXED: Initialize results once and keep accumulating
        self.all_translations = {}
        self.all_comparisons = {}
        
        # Create output directory
        Config.OUTPUT_DIR.mkdir(exist_ok=True)
    
    def translate_all_texts(self, chapter: str = "1", languages: List[str] = None):
        """Translate all non-English texts for a specific chapter"""
        
        if languages is None:
            languages = [lang for lang in self.data.keys() if lang != "English"]
        
        print(f"\n=== Translating Chapter {chapter} ===")
        
        for language in languages:
            if language == "English":
                continue
                
            print(f"\nProcessing {language} editions...")
            
            for edition in self.data[language]:
                year = edition['Year Published']
                publisher = edition['Publisher']
                
                # Get chapter text
                chapter_data = edition['Chapters'].get(chapter)
                if not chapter_data:
                    continue
                
                print(f"  {year} - {publisher}")
                
                # Create unique key
                key = f"{language}_{year}_ch{chapter}"
                
                # Translate
                translation_result = self.llm.translate_text(
                    text=chapter_data['Text'],
                    source_lang=language
                )
                
                # Store result in CUMULATIVE dictionary
                self.all_translations[key] = {
                    "language": language,
                    "year": year,
                    "publisher": publisher,
                    "chapter": chapter,
                    "original_title": chapter_data['Title'],
                    "original_text": chapter_data['Text'],
                    **translation_result
                }
        
        # Save cumulative translations after each chapter
        self._save_cumulative_results("translations")
    
    def compare_editions(self, chapter: str = "1", comparison_type: str = "all"):
        """Compare editions - all pairwise combinations"""
        
        print(f"\n=== Comparing Editions - Chapter {chapter} ===")
        
        if comparison_type == "all" or comparison_type == "chronological":
            # Compare ALL pairs within each language
            for language in self.data.keys():
                editions = sorted(self.data[language], key=lambda x: x['Year Published'])
                
                if len(editions) < 2:
                    continue
                
                print(f"\nComparing {language} editions (all pairs)...")
                
                # Get all possible pairs
                for ed1, ed2 in combinations(editions, 2):
                    # Get texts
                    if language == "English":
                        text1 = ed1['Chapters'][chapter]['Text']
                        text2 = ed2['Chapters'][chapter]['Text']
                    else:
                        # Use translations
                        key1 = f"{language}_{ed1['Year Published']}_ch{chapter}"
                        key2 = f"{language}_{ed2['Year Published']}_ch{chapter}"
                        
                        if key1 in self.all_translations and key2 in self.all_translations:
                            text1 = self.all_translations[key1]["translation"]
                            text2 = self.all_translations[key2]["translation"]
                        else:
                            continue
                    
                    # Compare
                    comparison = self.llm.semantic_comparison(
                        text1, text2,
                        {"language": language, "year": ed1['Year Published']},
                        {"language": language, "year": ed2['Year Published']}
                    )
                    
                    # Store
                    comp_key = f"{language}_{ed1['Year Published']}_vs_{ed2['Year Published']}_ch{chapter}"
                    self.all_comparisons[comp_key] = comparison
                    
                    print(f"  Compared {ed1['Year Published']} vs {ed2['Year Published']}")
        
        if comparison_type == "all" or comparison_type == "cross-linguistic":
            # Compare ALL editions across languages
            print(f"\nComparing across languages...")
            
            # Get all language pairs
            languages = list(self.data.keys())
            for lang1, lang2 in combinations(languages, 2):
                print(f"\n  Comparing {lang1} vs {lang2}...")
                
                # Compare ALL editions between these languages
                for ed1 in self.data[lang1]:
                    for ed2 in self.data[lang2]:
                        # Get texts (use translations for non-English)
                        if lang1 == "English":
                            text1 = ed1['Chapters'][chapter]['Text']
                        else:
                            key1 = f"{lang1}_{ed1['Year Published']}_ch{chapter}"
                            if key1 in self.all_translations:
                                text1 = self.all_translations[key1]["translation"]
                            else:
                                continue
                                
                        if lang2 == "English":
                            text2 = ed2['Chapters'][chapter]['Text']
                        else:
                            key2 = f"{lang2}_{ed2['Year Published']}_ch{chapter}"
                            if key2 in self.all_translations:
                                text2 = self.all_translations[key2]["translation"]
                            else:
                                continue
                        
                        # Compare
                        comparison = self.llm.semantic_comparison(
                            text1, text2,
                            {"language": lang1, "year": ed1['Year Published']},
                            {"language": lang2, "year": ed2['Year Published']}
                        )
                        
                        comp_key = f"{lang1}_{ed1['Year Published']}_vs_{lang2}_{ed2['Year Published']}_ch{chapter}"
                        self.all_comparisons[comp_key] = comparison
                        
                        print(f"    {lang1} {ed1['Year Published']} vs {lang2} {ed2['Year Published']}")
        
        # Save cumulative comparisons after each chapter
        self._save_cumulative_results("comparisons")   
    
    def _save_cumulative_results(self, result_type: str):
        """Save CUMULATIVE results to JSON file"""
        if result_type == "translations":
            data = self.all_translations
        else:
            data = self.all_comparisons
            
        filename = Config.OUTPUT_DIR / f"montesquieu_{result_type}_llm.json"
        
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        print(f"Saved {len(data)} cumulative {result_type} to {filename}")

# %% FIXED Main execution
def run_full_analysis():
    """Run complete analysis for ALL chapters"""
    
    print("=== Montesquieu FULL LLM Analysis Pipeline ===")
    
    # Initialize LLM interface ONCE
    llm = LLMInterface(provider=Config.MODEL_TYPE)
    
    # Initialize analyzer ONCE
    analyzer = MontesquieuAnalyzer(llm)
    
    # Process ALL chapters accumulating results
    for chapter in ["1", "2", "3", "4", "5", "6"]:
        print(f"\n{'='*50}")
        print(f"PROCESSING CHAPTER {chapter}")
        print(f"{'='*50}")
        
        # Translate all texts for this chapter
        analyzer.translate_all_texts(chapter=chapter, languages=None)  # All languages
        
        # Compare editions for this chapter - changed to "all"
        analyzer.compare_editions(chapter=chapter, comparison_type="all")
    
    # Create final web integration file
    create_web_integration_file()
    
    print("\n✅ Analysis complete for ALL chapters and languages!")
    print(f"Total translations: {len(analyzer.all_translations)}")
    print(f"Total comparisons: {len(analyzer.all_comparisons)}")
    
    return analyzer

def create_web_integration_file():
    """Create JSON file for web interface integration"""
    
    # Load cumulative results
    with open(Config.OUTPUT_DIR / "montesquieu_translations_llm.json", 'r', encoding='utf-8') as f:
        translations = json.load(f)
    
    with open(Config.OUTPUT_DIR / "montesquieu_comparisons_llm.json", 'r', encoding='utf-8') as f:
        comparisons = json.load(f)
    
    # Format for web interface
    web_data = {
        "translations": translations,
        "semantic_analysis": {},
        "visualization": {
            "supported": True,
            "types": ["differences", "confidence", "evolution"]
        }
    }
    
    # Process comparisons into web-friendly format
    for comp_key, comp_data in comparisons.items():
        if "conceptual_changes" in comp_data:
            web_data["semantic_analysis"][comp_key] = {
                "changes": comp_data["conceptual_changes"],
                "similarity": comp_data.get("overall_similarity", 0),
                "summary": comp_data.get("overall_assessment", "")
            }
    
    # Save
    with open(Config.OUTPUT_DIR / "montesquieu_web_data.json", 'w', encoding='utf-8') as f:
        json.dump(web_data, f, indent=2, ensure_ascii=False)
    
    print(f"Web integration file created with {len(translations)} translations and {len(comparisons)} comparisons")

# %% Execute
if __name__ == "__main__":
    analyzer = run_full_analysis()

=== Montesquieu FULL LLM Analysis Pipeline ===
Loading Montesquieu data from GitHub...
Loaded 5 languages with 15 total editions

PROCESSING CHAPTER 1

=== Translating Chapter 1 ===

Processing French editions...
  1748 - Barillot & fils, Genève
Using cached translation for French text
  1750 - Huart & Moreau, Paris, chez Barrillot & Fils, Genève
Using cached translation for French text
  1758 - Arkstee & Merkus, Amsterdam & Leipzig / Huart, Paris
Using cached translation for French text
  1784 - Sanson & Compagnie, Deux-Ponts
Using cached translation for French text
  1803 - Didot, Paris
Using cached translation for French text

Processing Italian editions...
  1750 - Simone, Naples
Using cached translation for Italian text
  1777 - Terres, Napoli
Using cached translation for Italian text

Processing German editions...
  1753 - S.N., Frankfurt & Leipzig
Using cached translation for German text
  1799 - Wien & Bauer, Vienna
Using cached translation for German text

Processing Polish ed

In [2]:
#!/usr/bin/env python3
"""
Montesquieu Digital Humanities Analysis - FIXED VERSION
LLM-based Translation and Semantic Comparison for ALL chapters
"""

# %% Import libraries
import json
import hashlib
import requests
from datetime import datetime
from typing import Dict, List, Tuple, Optional
import pandas as pd
import numpy as np
from pathlib import Path
import openai
from anthropic import Anthropic
import time
from itertools import combinations  # Added this import

# %% Configuration
class Config:
    """Configuration for reproducible analysis"""
    MODEL_TYPE = "anthropic"
    
    # API Keys
    OPENAI_API_KEY = 
    ANTHROPIC_API_KEY = 
    
    # Model versions
    OPENAI_MODEL = "gpt-4-0613"
    ANTHROPIC_MODEL = "claude-3-5-sonnet-20241022"
    
    # Generation parameters
    TEMPERATURE = 0.3
    MAX_TOKENS = 2000
    SEED = 42
    
    # File paths
    SOURCE_DATA_URL = "https://raw.githubusercontent.com/lokalkosmos/Lesprit/refs/heads/main/montesquieu_editions.json"
    OUTPUT_DIR = Path("montesquieu_llm_output")
    CACHE_FILE = "llm_cache.json"
    
    # Analysis parameters
    BATCH_SIZE = 5
    RATE_LIMIT_DELAY = 2

# %% Data Loading
def load_montesquieu_data():
    """Load the original Montesquieu data"""
    print("Loading Montesquieu data from GitHub...")
    response = requests.get(Config.SOURCE_DATA_URL)
    data = response.json()
    print(f"Loaded {len(data)} languages with {sum(len(editions) for editions in data.values())} total editions")
    return data

# %% LLM Interface
class LLMInterface:
    """Unified interface for different LLM providers"""
    
    def __init__(self, provider: str = "anthropic"):
        self.provider = provider
        self.cache = self._load_cache()
        
        if provider == "anthropic":
            self.client = Anthropic(api_key=Config.ANTHROPIC_API_KEY)
            self.model = Config.ANTHROPIC_MODEL
        elif provider == "openai":
            openai.api_key = Config.OPENAI_API_KEY
            self.model = Config.OPENAI_MODEL
    
    def _load_cache(self):
        """Load cached responses"""
        try:
            with open(Config.CACHE_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except:
            return {}
    
    def _save_cache(self):
        """Save cache to file"""
        with open(Config.CACHE_FILE, 'w', encoding='utf-8') as f:
            json.dump(self.cache, f, indent=2, ensure_ascii=False)
    
    def _get_cache_key(self, prompt: str, function: str):
        """Generate cache key"""
        prompt_hash = hashlib.sha256(prompt.encode()).hexdigest()
        return f"{self.provider}_{self.model}_{function}_{prompt_hash}"
    
    def translate_text(self, text: str, source_lang: str, target_lang: str = "English") -> Dict:
        """Translate historical text with scholarly precision"""
        
        prompt = f"""You are an expert translator specializing in 18th-century philosophical texts, 
particularly the works of Montesquieu. Your task is to translate the following {source_lang} 
text to {target_lang} while:

1. Preserving the exact philosophical terminology and concepts
2. Maintaining period-appropriate language and style
3. Keeping sentence structure as close to the original as possible
4. Noting any untranslatable concepts or terms

Original {source_lang} text:
"{text}"

Provide your translation in the following JSON format:
{{
    "translation": "your translation here",
    "translation_notes": ["note about specific terms or choices"],
    "untranslatable_terms": [{{"term": "original", "explanation": "why untranslatable"}}],
    "confidence": 0.0-1.0
}}"""

        cache_key = self._get_cache_key(prompt, "translate")
        
        # Check cache
        if cache_key in self.cache:
            print(f"Using cached translation for {source_lang} text")
            return self.cache[cache_key]
        
        # Generate new translation
        print(f"Translating {source_lang} text ({len(text)} chars)...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        # Parse JSON response
        try:
            result = json.loads(content)
        except:
            result = {
                "translation": content,
                "translation_notes": ["Raw response - JSON parsing failed"],
                "untranslatable_terms": [],
                "confidence": 0.7
            }
        
        # Add metadata
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "source_language": source_lang,
            "target_language": target_lang,
            "timestamp": datetime.now().isoformat(),
            "prompt_hash": hashlib.sha256(prompt.encode()).hexdigest()
        }
        
        # Cache result
        self.cache[cache_key] = result
        self._save_cache()
        
        # Rate limiting
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result
    
    def semantic_comparison(self, text1: str, text2: str, metadata1: Dict, metadata2: Dict) -> Dict:
        """Perform detailed semantic comparison"""
        
        prompt = f"""You are a digital humanities scholar specializing in 18th-century philosophy 
and textual criticism. Perform a detailed semantic comparison of these two versions of 
Montesquieu's text, identifying:

1. Conceptual differences (changes in philosophical meaning)
2. Rhetorical variations (changes in argumentative structure)
3. Lexical shifts (word choice differences and their implications)
4. Syntactic changes (sentence structure modifications)
5. Cultural/historical adaptations

Text 1 ({metadata1.get('language', 'Unknown')} - {metadata1.get('year', 'Unknown')}):
"{text1}"

Text 2 ({metadata2.get('language', 'Unknown')} - {metadata2.get('year', 'Unknown')}):
"{text2}"

Provide your analysis in the following JSON structure:
{{
    "overall_similarity": 0.0-1.0,
    "conceptual_changes": [
        {{
            "type": "addition|deletion|modification",
            "concept": "philosophical concept affected",
            "text1_excerpt": "relevant excerpt",
            "text2_excerpt": "relevant excerpt", 
            "significance": "why this matters philosophically",
            "confidence": 0.0-1.0
        }}
    ],
    "rhetorical_shifts": [
        {{
            "element": "rhetorical element",
            "change": "description of change",
            "effect": "impact on argument"
        }}
    ],
    "lexical_analysis": {{
        "key_terms_added": ["term1", "term2"],
        "key_terms_removed": ["term1", "term2"],
        "semantic_field_shifts": ["description of shifts"]
    }},
    "historical_context": "relevant historical/cultural factors",
    "overall_assessment": "synthesis of the comparison"
}}"""

        cache_key = self._get_cache_key(prompt, "semantic_compare")
        
        if cache_key in self.cache:
            print("Using cached semantic comparison")
            return self.cache[cache_key]
        
        print("Performing semantic comparison...")
        
        if self.provider == "anthropic":
            response = self.client.messages.create(
                model=self.model,
                max_tokens=Config.MAX_TOKENS,
                temperature=Config.TEMPERATURE,
                messages=[{"role": "user", "content": prompt}]
            )
            content = response.content[0].text
        elif self.provider == "openai":
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=Config.TEMPERATURE,
                max_tokens=Config.MAX_TOKENS
            )
            content = response.choices[0].message.content
        
        try:
            result = json.loads(content)
        except:
            result = {
                "error": "JSON parsing failed",
                "raw_response": content,
                "overall_similarity": 0.5
            }
        
        result["metadata"] = {
            "model": self.model,
            "provider": self.provider,
            "comparison": f"{metadata1.get('language')}-{metadata1.get('year')} vs {metadata2.get('language')}-{metadata2.get('year')}",
            "timestamp": datetime.now().isoformat()
        }
        
        self.cache[cache_key] = result
        self._save_cache()
        
        time.sleep(Config.RATE_LIMIT_DELAY)
        
        return result

# %% FIXED Main Analysis Pipeline
class MontesquieuAnalyzer:
    """Main analysis pipeline - FIXED to accumulate all chapters"""
    
    def __init__(self, llm_interface: LLMInterface):
        self.llm = llm_interface
        self.data = load_montesquieu_data()
        
        # FIXED: Initialize results once and keep accumulating
        self.all_translations = {}
        self.all_comparisons = {}
        
        # Create output directory
        Config.OUTPUT_DIR.mkdir(exist_ok=True)
    
    def translate_all_texts(self, chapter: str = "1", languages: List[str] = None):
        """Translate all non-English texts for a specific chapter"""
        
        if languages is None:
            languages = [lang for lang in self.data.keys() if lang != "English"]
        
        print(f"\n=== Translating Chapter {chapter} ===")
        
        for language in languages:
            if language == "English":
                continue
                
            print(f"\nProcessing {language} editions...")
            
            for edition in self.data[language]:
                year = edition['Year Published']
                publisher = edition['Publisher']
                
                # Get chapter text
                chapter_data = edition['Chapters'].get(chapter)
                if not chapter_data:
                    continue
                
                print(f"  {year} - {publisher}")
                
                # Create unique key
                key = f"{language}_{year}_ch{chapter}"
                
                # Translate
                translation_result = self.llm.translate_text(
                    text=chapter_data['Text'],
                    source_lang=language
                )
                
                # Store result in CUMULATIVE dictionary
                self.all_translations[key] = {
                    "language": language,
                    "year": year,
                    "publisher": publisher,
                    "chapter": chapter,
                    "original_title": chapter_data['Title'],
                    "original_text": chapter_data['Text'],
                    **translation_result
                }
        
        # Save cumulative translations after each chapter
        self._save_cumulative_results("translations")
    
    def compare_editions(self, chapter: str = "1", comparison_type: str = "all"):
        """Compare editions - all pairwise combinations"""
        
        print(f"\n=== Comparing Editions - Chapter {chapter} ===")
        
        if comparison_type == "all" or comparison_type == "chronological":
            # Compare ALL pairs within each language
            for language in self.data.keys():
                editions = sorted(self.data[language], key=lambda x: x['Year Published'])
                
                if len(editions) < 2:
                    continue
                
                print(f"\nComparing {language} editions (all pairs)...")
                
                # Get all possible pairs
                for ed1, ed2 in combinations(editions, 2):
                    # Get texts
                    if language == "English":
                        text1 = ed1['Chapters'][chapter]['Text']
                        text2 = ed2['Chapters'][chapter]['Text']
                    else:
                        # Use translations
                        key1 = f"{language}_{ed1['Year Published']}_ch{chapter}"
                        key2 = f"{language}_{ed2['Year Published']}_ch{chapter}"
                        
                        if key1 in self.all_translations and key2 in self.all_translations:
                            text1 = self.all_translations[key1]["translation"]
                            text2 = self.all_translations[key2]["translation"]
                        else:
                            continue
                    
                    # Compare
                    comparison = self.llm.semantic_comparison(
                        text1, text2,
                        {"language": language, "year": ed1['Year Published']},
                        {"language": language, "year": ed2['Year Published']}
                    )
                    
                    # Store
                    comp_key = f"{language}_{ed1['Year Published']}_vs_{ed2['Year Published']}_ch{chapter}"
                    self.all_comparisons[comp_key] = comparison
                    
                    print(f"  Compared {ed1['Year Published']} vs {ed2['Year Published']}")
        
        if comparison_type == "all" or comparison_type == "cross-linguistic":
            # Compare ALL editions across languages
            print(f"\nComparing across languages...")
            
            # Get all language pairs
            languages = list(self.data.keys())
            for lang1, lang2 in combinations(languages, 2):
                print(f"\n  Comparing {lang1} vs {lang2}...")
                
                # Compare ALL editions between these languages
                for ed1 in self.data[lang1]:
                    for ed2 in self.data[lang2]:
                        # Get texts (use translations for non-English)
                        if lang1 == "English":
                            text1 = ed1['Chapters'][chapter]['Text']
                        else:
                            key1 = f"{lang1}_{ed1['Year Published']}_ch{chapter}"
                            if key1 in self.all_translations:
                                text1 = self.all_translations[key1]["translation"]
                            else:
                                continue
                                
                        if lang2 == "English":
                            text2 = ed2['Chapters'][chapter]['Text']
                        else:
                            key2 = f"{lang2}_{ed2['Year Published']}_ch{chapter}"
                            if key2 in self.all_translations:
                                text2 = self.all_translations[key2]["translation"]
                            else:
                                continue
                        
                        # Compare
                        comparison = self.llm.semantic_comparison(
                            text1, text2,
                            {"language": lang1, "year": ed1['Year Published']},
                            {"language": lang2, "year": ed2['Year Published']}
                        )
                        
                        comp_key = f"{lang1}_{ed1['Year Published']}_vs_{lang2}_{ed2['Year Published']}_ch{chapter}"
                        self.all_comparisons[comp_key] = comparison
                        
                        print(f"    {lang1} {ed1['Year Published']} vs {lang2} {ed2['Year Published']}")
        
        # Save cumulative comparisons after each chapter
        self._save_cumulative_results("comparisons")   
    
    def _save_cumulative_results(self, result_type: str):
        """Save CUMULATIVE results to JSON file"""
        if result_type == "translations":
            data = self.all_translations
        else:
            data = self.all_comparisons
            
        filename = Config.OUTPUT_DIR / f"montesquieu_{result_type}_llm.json"
        
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        
        print(f"Saved {len(data)} cumulative {result_type} to {filename}")

# %% FIXED Main execution
def run_full_analysis():
    """Run complete analysis for ALL chapters"""
    
    print("=== Montesquieu FULL LLM Analysis Pipeline ===")
    
    # Initialize LLM interface ONCE
    llm = LLMInterface(provider=Config.MODEL_TYPE)
    
    # Initialize analyzer ONCE
    analyzer = MontesquieuAnalyzer(llm)
    
    # Process ALL chapters accumulating results
    for chapter in ["1", "2", "3", "4", "5", "6"]:
        print(f"\n{'='*50}")
        print(f"PROCESSING CHAPTER {chapter}")
        print(f"{'='*50}")
        
        # Translate all texts for this chapter
        analyzer.translate_all_texts(chapter=chapter, languages=None)  # All languages
        
        # Compare editions for this chapter - changed to "all"
        analyzer.compare_editions(chapter=chapter, comparison_type="all")
    
    # Create final web integration file
    create_web_integration_file()
    
    print("\n✅ Analysis complete for ALL chapters and languages!")
    print(f"Total translations: {len(analyzer.all_translations)}")
    print(f"Total comparisons: {len(analyzer.all_comparisons)}")
    
    return analyzer

def create_web_integration_file():
    """Create JSON file for web interface integration"""
    
    # Load cumulative results
    with open(Config.OUTPUT_DIR / "montesquieu_translations_llm.json", 'r', encoding='utf-8') as f:
        translations = json.load(f)
    
    with open(Config.OUTPUT_DIR / "montesquieu_comparisons_llm.json", 'r', encoding='utf-8') as f:
        comparisons = json.load(f)
    
    # Format for web interface
    web_data = {
        "translations": translations,
        "semantic_analysis": {},
        "visualization": {
            "supported": True,
            "types": ["differences", "confidence", "evolution"]
        }
    }
    
    # Process comparisons into web-friendly format
    for comp_key, comp_data in comparisons.items():
        if "conceptual_changes" in comp_data:
            web_data["semantic_analysis"][comp_key] = {
                "changes": comp_data["conceptual_changes"],
                "similarity": comp_data.get("overall_similarity", 0),
                "summary": comp_data.get("overall_assessment", "")
            }
    
    # Save
    with open(Config.OUTPUT_DIR / "montesquieu_web_data.json", 'w', encoding='utf-8') as f:
        json.dump(web_data, f, indent=2, ensure_ascii=False)
    
    print(f"Web integration file created with {len(translations)} translations and {len(comparisons)} comparisons")

# %% Execute
if __name__ == "__main__":
    analyzer = run_full_analysis()

=== Montesquieu FULL LLM Analysis Pipeline ===
Loading Montesquieu data from GitHub...
Loaded 5 languages with 15 total editions

PROCESSING CHAPTER 1

=== Translating Chapter 1 ===

Processing French editions...
  1748 - Barillot & fils, Genève
Using cached translation for French text
  1750 - Huart & Moreau, Paris, chez Barrillot & Fils, Genève
Using cached translation for French text
  1758 - Arkstee & Merkus, Amsterdam & Leipzig / Huart, Paris
Using cached translation for French text
  1784 - Sanson & Compagnie, Deux-Ponts
Using cached translation for French text
  1803 - Didot, Paris
Using cached translation for French text

Processing Italian editions...
  1750 - Simone, Naples
Using cached translation for Italian text
  1777 - Terres, Napoli
Using cached translation for Italian text

Processing German editions...
  1753 - S.N., Frankfurt & Leipzig
Using cached translation for German text
  1799 - Wien & Bauer, Vienna
Using cached translation for German text

Processing Polish ed