### THERMODYNAMIC LENGTH ANALYSIS - Parallel Transport on Hidden States (Levi–Civita–style Continuous–Depth Surrogate)

In [None]:
!pip install -q transformers datasets plotly torch

import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

class UnifiedThermodynamicFramework:
    """
    Implements Method 2 (Spectral Curvature) + Method 5 (Belief Vectors)
    from NDNA Alternative paper with Spinal thermodynamic length
    """

    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"🚀 Unified Framework | Device: {self.device}")

    def load_model(self, model_name):
        """Load model efficiently"""
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        tokenizer.pad_token = tokenizer.eos_token

        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,  # Changed from torch.float16 to torch.float32
            device_map="auto",
            low_cpu_mem_usage=True
        )
        return model, tokenizer

    def compute_spectral_curvature(self, layer_output):
        """
        Method 2: Spectral Curvature (Page 5-6)
        κ_spectral = trace(H) / ||H||_F where H is Hessian approximation
        """
        # Compute covariance as Hessian approximation
        H = torch.cov(layer_output.T)

        # Spectral curvature components
        trace_H = torch.trace(H).item()
        frobenius_norm = torch.norm(H, p='fro').item()

        spectral_curvature = trace_H / (frobenius_norm + 1e-8)

        # Eigenvalue analysis for curvature direction
        eigenvalues = torch.linalg.eigvalsh(H).cpu().numpy()

        return {
            'curvature': spectral_curvature,
            'trace': trace_H,
            'frobenius': frobenius_norm,
            'eigenvalues': eigenvalues,
            'condition_number': np.max(np.abs(eigenvalues)) / (np.min(np.abs(eigenvalues)) + 1e-10)
        }

    def compute_belief_vector(self, layer_output, next_layer_output):
        """
        Method 5: Belief Vector Evolution (Page 5-6)
        b_t = softmax(W_t h_t) where h_t is hidden state
        """
        # Compute belief transformation
        delta_h = next_layer_output - layer_output

        # Belief vector as normalized probability distribution
        belief_logits = torch.mean(delta_h, dim=-1)
        belief_vector = torch.softmax(belief_logits, dim=-1)

        # Belief entropy and divergence
        entropy = -torch.sum(belief_vector * torch.log(belief_vector + 1e-10)).item()

        return {
            'belief_vector': belief_vector.cpu().numpy(),
            'entropy': entropy,
            'concentration': torch.max(belief_vector).item()
        }

    def compute_thermodynamic_length(self, curvatures):
        """
        Thermodynamic Length: L = ∫√(g_μν dx^μ dx^ν)
        Using Fisher-Rao metric from spectral curvatures
        """
        length = 0.0
        for i in range(1, len(curvatures)):
            # Fisher-Rao distance between consecutive curvature states
            κ1, κ2 = curvatures[i-1], curvatures[i]

            # Arccosine distance for positive definite metrics
            distance = 2.0 * np.arccos(np.clip(
                np.sqrt(κ1 * κ2) / (κ1 + κ2 + 1e-8), 0, 1
            ))
            length += distance

        return length

    def analyze_model(self, model, tokenizer, texts, model_name):
        """Unified analysis combining Methods 2 & 5"""
        print(f"\n🔬 Analyzing {model_name}...")

        num_layers = len(model.transformer.h)
        results = {
            'spectral_curvatures': [],
            'belief_entropies': [],
            'condition_numbers': [],
            'thermodynamic_contributions': []
        }

        # Process texts through model
        for text in texts[:3]:  # Limited for GPU efficiency
            tokens = tokenizer(text, return_tensors="pt", max_length=128,
                             truncation=True, padding=True).to(self.device)

            with torch.no_grad():
                outputs = model(**tokens, output_hidden_states=True)
                hidden_states = outputs.hidden_states

            # Analyze each layer
            for i in range(num_layers):
                h_t = hidden_states[i].squeeze(0)

                # Method 2: Spectral Curvature
                spectral = self.compute_spectral_curvature(h_t)
                results['spectral_curvatures'].append(spectral['curvature'])
                results['condition_numbers'].append(spectral['condition_number'])

                # Method 5: Belief Vector (if next layer exists)
                if i < num_layers - 1:
                    h_next = hidden_states[i+1].squeeze(0)
                    belief = self.compute_belief_vector(h_t, h_next)
                    results['belief_entropies'].append(belief['entropy'])

        # Average across texts
        results['spectral_curvatures'] = np.mean(
            np.array(results['spectral_curvatures']).reshape(-1, num_layers), axis=0
        )
        results['condition_numbers'] = np.mean(
            np.array(results['condition_numbers']).reshape(-1, num_layers), axis=0
        )
        results['belief_entropies'] = np.mean(
            np.array(results['belief_entropies']).reshape(-1, num_layers-1), axis=0
        )


        # Compute thermodynamic length
        results['thermodynamic_length'] = self.compute_thermodynamic_length(
            results['spectral_curvatures']
        )

        # Normalize spectral curvatures to 1-100 scale
        κ = results['spectral_curvatures']
        results['normalized_curvature'] = 1 + 99 * (κ - κ.min()) / (κ.max() - κ.min() + 1e-8)

        print(f"✅ {model_name}: Length={results['thermodynamic_length']:.6f}")

        return results


    def create_unified_plot(self, llama_results, gpt_results):
        """Unified 3D visualization"""
        print("\n🎨 Creating Unified 3D Plot...")

        fig = make_subplots(
            rows=2, cols=2,
            specs=[
                [{"type": "scatter3d"}, {"type": "scatter3d"}],
                [{"type": "surface"}, {"type": "scatter"}]
            ],
            subplot_titles=[
                'Spectral Curvature (Method 2)',
                'Belief Entropy (Method 5)',
                'Combined Surface',
                'Thermodynamic Length'
            ]
        )

        # Plot 1: Spectral Curvature
        llama_layers = np.arange(len(llama_results['normalized_curvature']))
        gpt_layers = np.arange(len(gpt_results['normalized_curvature']))

        fig.add_trace(go.Scatter3d(
            x=llama_layers, y=np.zeros_like(llama_layers),
            z=llama_results['normalized_curvature'],
            mode='lines+markers', line=dict(color='blue', width=5),
            marker=dict(size=8, color=llama_results['normalized_curvature'],
                       colorscale='Blues'),
            name='Llama Curvature'
        ), row=1, col=1)

        fig.add_trace(go.Scatter3d(
            x=gpt_layers, y=np.ones_like(gpt_layers),
            z=gpt_results['normalized_curvature'],
            mode='lines+markers', line=dict(color='red', width=5),
            marker=dict(size=8, color=gpt_results['normalized_curvature'],
                       colorscale='Reds'),
            name='GPT Curvature'
        ), row=1, col=1)

        # Update axis labels for Plot 1
        fig.update_layout(
            scene1 = dict(
                xaxis_title='Layer Number',
                yaxis_title='Model (0: Llama, 1: GPT-2)',
                zaxis_title='Normalized Spectral Curvature'
            )
        )


        # Plot 2: Belief Entropy
        fig.add_trace(go.Scatter3d(
            x=np.arange(len(llama_results['belief_entropies'])),
            y=np.zeros(len(llama_results['belief_entropies'])),
            z=llama_results['belief_entropies'],
            mode='markers', marker=dict(size=6, color='cyan'),
            name='Llama Belief'
        ), row=1, col=2)

        fig.add_trace(go.Scatter3d(
            x=np.arange(len(gpt_results['belief_entropies'])),
            y=np.ones(len(gpt_results['belief_entropies'])),
            z=gpt_results['belief_entropies'],
            mode='markers', marker=dict(size=6, color='orange'),
            name='GPT Belief'
        ), row=1, col=2)

        # Update axis labels for Plot 2
        fig.update_layout(
             scene2 = dict(
                xaxis_title='Layer Number',
                yaxis_title='Model (0: Llama, 1: GPT-2)',
                zaxis_title='Belief Entropy'
            )
        )


        # Plot 3: Surface
        max_len = max(len(llama_results['normalized_curvature']),
                      len(gpt_results['normalized_curvature']))
        llama_pad = np.pad(llama_results['normalized_curvature'],
                           (0, max_len - len(llama_results['normalized_curvature'])),
                           mode='edge')
        gpt_pad = np.pad(gpt_results['normalized_curvature'],
                         (0, max_len - len(gpt_results['normalized_curvature'])),
                         mode='edge')

        surface_data = np.array([llama_pad, gpt_pad])
        layer_grid, model_grid = np.meshgrid(np.arange(max_len), [0, 1])

        fig.add_trace(go.Surface(
            x=layer_grid, y=model_grid, z=surface_data,
            colorscale='Viridis', opacity=0.8
        ), row=2, col=1)

         # Update axis labels for Plot 3
        fig.update_layout(
            scene3 = dict(
                xaxis_title='Layer Number',
                yaxis_title='Model (0: Llama, 1: GPT-2)',
                zaxis_title='Normalized Spectral Curvature'
            )
        )


        # Plot 4: Length comparison
        fig.add_trace(go.Bar(
            x=['Llama', 'GPT-2'],
            y=[llama_results['thermodynamic_length'],
               gpt_results['thermodynamic_length']],
            marker_color=['blue', 'red']
        ), row=2, col=2)

        # Update axis labels for Plot 4
        fig.update_layout(
            xaxis4=dict(title='Model'),
            yaxis4=dict(title='Thermodynamic Length')
        )


        fig.update_layout(
            title='Unified Thermodynamic Framework: Methods 2 & 5',
            height=1000, width=1400, showlegend=True
        )

        fig.show()

        return fig

def run_unified_analysis():
    """Main execution"""
    print("=" * 60)
    print("UNIFIED THERMODYNAMIC FRAMEWORK")
    print("Method 2: Spectral Curvature | Method 5: Belief Vectors")
    print("=" * 60)

    # Initialize
    framework = UnifiedThermodynamicFramework()

    # Load dataset
    print("\n📚 Loading SQuAD...")
    dataset = load_dataset("squad", split="validation[:20]")
    texts = [f"Context: {d['context'][:200]} Q: {d['question']}"
             for d in dataset]

    # Load models
    print("\n📥 Loading Models...")
    llama_model, llama_tok = framework.load_model("gpt2")  # Proxy
    gpt_model, gpt_tok = framework.load_model("gpt2-large")

    # Analyze
    llama_results = framework.analyze_model(llama_model, llama_tok, texts, "Llama")
    gpt_results = framework.analyze_model(gpt_model, gpt_tok, texts, "GPT-2")

    # Visualize
    fig = framework.create_unified_plot(llama_results, gpt_results)

    # Summary
    print(f"\n🏆 RESULTS:")
    print(f"Llama Length: {llama_results['thermodynamic_length']:.6f}")
    print(f"GPT-2 Length: {gpt_results['thermodynamic_length']:.6f}")
    print(f"Winner: {'GPT-2' if gpt_results['thermodynamic_length'] > llama_results['thermodynamic_length'] else 'Llama'}")

    return {'llama': llama_results, 'gpt': gpt_results, 'fig': fig}

# Execute
results = run_unified_analysis()

### Spectral Curvature-based Thermodynamic Length Prototype

In [None]:
!pip install -q transformers datasets plotly torch accelerate

import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import gc
import warnings
warnings.filterwarnings('ignore')

class ThermodynamicLengthAnalyzer:

    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"🚀 Thermodynamic Length Analyzer | Device: {self.device}")

    def load_models(self):
        """Load Llama-3.2 and GPT-2 Large without quantization"""
        print("\n📥 Loading Models...")

        models = {}

        # Llama-3.2-3B (or proxy)
        try:
            models['llama_tok'] = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
            models['llama_tok'].pad_token = models['llama_tok'].eos_token
            models['llama'] = AutoModelForCausalLM.from_pretrained(
                "meta-llama/Llama-3.2-3B",
                torch_dtype=torch.float16,
                device_map="auto",
                low_cpu_mem_usage=True,
                trust_remote_code=True
            )
            print("✅ Llama-3.2-3B loaded")
        except Exception as e:
            print(f"   ⚠️  Llama-3.2 not available: {e}")
            print("   Loading proxy: gpt2-medium")
            models['llama_tok'] = AutoTokenizer.from_pretrained("gpt2-medium")
            models['llama_tok'].pad_token = models['llama_tok'].eos_token
            models['llama'] = AutoModelForCausalLM.from_pretrained(
                "gpt2-medium",
                torch_dtype=torch.float16,
                device_map="auto",
                low_cpu_mem_usage=True
            )
            print("✅ Llama proxy (gpt2-medium) loaded")

        # GPT-2 Large
        models['gpt_tok'] = AutoTokenizer.from_pretrained("gpt2-large")
        models['gpt_tok'].pad_token = models['gpt_tok'].eos_token
        models['gpt'] = AutoModelForCausalLM.from_pretrained(
            "gpt2-large",
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )
        print("✅ GPT-2 Large loaded")

        torch.cuda.empty_cache()
        return models

    def load_squad_v2(self):
        """Load SQuAD 2.0 dataset"""
        print("\n📚 Loading SQuAD 2.0...")
        dataset = load_dataset("squad_v2", split="validation")

        samples = []
        for i, item in enumerate(dataset):
            if i >= 20:
                break

            context = item['context'][:300]
            question = item['question']
            answers = item['answers']['text']

            text = f"Context: {context}\nQuestion: {question}\nAnswer: {answers[0] if answers else 'No answer'}"
            samples.append({'text': text, 'answerable': len(answers) > 0})

        print(f"✅ {len(samples)} samples loaded")
        return samples

    def compute_spectral_curvature_accurate(self, hidden_state):
        """
        Accurate Method 2: Spectral Curvature
        κ_spectral = Tr(H) / ||H||_F
        """
        # Compute Hessian approximation via covariance
        if hidden_state.dim() == 3:
            hidden_state = hidden_state.squeeze(0)

        # Center the data
        H_centered = hidden_state - hidden_state.mean(dim=0, keepdim=True)

        # Covariance matrix as Hessian approximation
        H = torch.matmul(H_centered.T, H_centered) / (H_centered.shape[0] - 1)

        # Spectral curvature components
        trace = torch.trace(H).item()
        frobenius = torch.norm(H, p='fro').item()

        spectral_curvature = trace / (frobenius + 1e-10)

        # Eigenvalue decomposition for detailed analysis
        try:
            eigenvalues, eigenvectors = torch.linalg.eigh(H)
            eigenvalues = eigenvalues.cpu().numpy()

            # Spectral properties
            max_eigenval = np.max(eigenvalues)
            min_eigenval = np.min(eigenvalues[eigenvalues > 1e-10])
            condition_number = max_eigenval / min_eigenval if min_eigenval > 0 else 1e10
            spectral_gap = max_eigenval - eigenvalues[-2] if len(eigenvalues) > 1 else 0

        except:
            eigenvalues = np.array([1.0])
            condition_number = 1.0
            spectral_gap = 0.0

        return {
            'curvature': spectral_curvature,
            'trace': trace,
            'frobenius': frobenius,
            'eigenvalues': eigenvalues,
            'condition_number': condition_number,
            'spectral_gap': spectral_gap
        }

    def compute_thermodynamic_length_accurate(self, curvatures):
        """
        Accurate Thermodynamic Length using Fisher-Rao metric
        L = Σ d(κ_i, κ_{i+1}) where d is Fisher-Rao distance
        """
        if len(curvatures) < 2:
            return {'total_length': 0.0, 'layer_contributions': np.array([0.0]), 'cumulative_length': np.array([0.0])}

        total_length = 0.0
        layer_contributions = []

        for i in range(1, len(curvatures)):
            κ_prev = max(curvatures[i-1], 1e-10)
            κ_curr = max(curvatures[i], 1e-10)

            # Fisher-Rao distance for positive scalar parameters
            # d_FR(κ1, κ2) = 2 * arccos(sqrt(κ1 * κ2) / (κ1 + κ2))
            sqrt_product = np.sqrt(κ_prev * κ_curr)
            sum_params = κ_prev + κ_curr

            ratio = np.clip(sqrt_product / sum_params, 0, 1)
            fisher_rao_distance = 2.0 * np.arccos(ratio)

            total_length += fisher_rao_distance
            layer_contributions.append(fisher_rao_distance)

        layer_contributions = np.array([0.0] + layer_contributions)
        cumulative_length = np.cumsum(layer_contributions)

        return {
            'total_length': total_length,
            'layer_contributions': layer_contributions,
            'cumulative_length': cumulative_length
        }

    def analyze_model_complete(self, model, tokenizer, samples, model_name):
        """Complete thermodynamic analysis"""
        print(f"\n🔬 Analyzing {model_name}...")

        # Determine the number of layers based on model type
        if hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
            num_layers = len(model.transformer.h)
            hidden_states_attr = model.transformer.h
        elif hasattr(model, 'model') and hasattr(model.model, 'layers'):
            num_layers = len(model.model.layers)
            hidden_states_attr = model.model.layers
        else:
            raise AttributeError(f"Could not find layers for model type {type(model).__name__}")

        print(f"   Number of layers: {num_layers}")

        # Storage for metrics
        all_curvatures = []
        all_traces = []
        all_eigenvalues = []
        all_conditions = []
        all_spectral_gaps = []

        # Process samples
        for idx, sample in enumerate(samples[:8]):
            tokens = tokenizer(
                sample['text'],
                return_tensors="pt",
                max_length=256,
                truncation=True,
                padding=True
            ).to(self.device)

            with torch.no_grad():
                outputs = model(**tokens, output_hidden_states=True)
                hidden_states = outputs.hidden_states

            sample_curvatures = []
            sample_traces = []
            sample_conditions = []
            sample_gaps = []

            for layer_idx in range(num_layers):
                hidden = hidden_states[layer_idx]

                spectral = self.compute_spectral_curvature_accurate(hidden)

                sample_curvatures.append(spectral['curvature'])
                sample_traces.append(spectral['trace'])
                sample_conditions.append(spectral['condition_number'])
                sample_gaps.append(spectral['spectral_gap'])

                if idx == 0:  # Store eigenvalues from first sample
                    all_eigenvalues.append(spectral['eigenvalues'])

            all_curvatures.append(sample_curvatures)
            all_traces.append(sample_traces)
            all_conditions.append(sample_conditions)
            all_spectral_gaps.append(sample_gaps)

            if (idx + 1) % 3 == 0:
                print(f"   Processed {idx + 1}/{len(samples[:8])} samples")

        # Average across samples
        curvatures = np.mean(all_curvatures, axis=0)
        traces = np.mean(all_traces, axis=0)
        conditions = np.mean(all_conditions, axis=0)
        spectral_gaps = np.mean(all_spectral_gaps, axis=0)

        # Compute thermodynamic length
        thermo_results = self.compute_thermodynamic_length_accurate(curvatures)

        print(f"   ✅ Thermodynamic Length: {thermo_results['total_length']:.6f}")

        return {
            'model_name': model_name,
            'num_layers': num_layers,
            'curvatures': curvatures,
            'traces': traces,
            'conditions': conditions,
            'spectral_gaps': spectral_gaps,
            'eigenvalues': all_eigenvalues,
            'total_length': thermo_results['total_length'],
            'layer_contributions': thermo_results['layer_contributions'],
            'cumulative_length': thermo_results['cumulative_length']
        }

    def create_publication_quality_plots(self, llama_results, gpt_results):
        """Create publication-quality annotated plots"""
        print("\n🎨 Creating Publication-Quality Plots...")

        # Create comprehensive figure
        fig = make_subplots(
            rows=3, cols=2,
            specs=[
                [{"type": "scatter3d", "colspan": 2}, None],
                [{"type": "scatter"}, {"type": "scatter"}],
                [{"type": "scatter"}, {"type": "bar"}]
            ],
            subplot_titles=[
                '<b>3D Thermodynamic Landscape: Layer Depth vs Spectral Curvature</b>',
                '<b>Cumulative Thermodynamic Length by Layer</b>',
                '<b>Layer-wise Spectral Curvature Evolution</b>',
                '<b>Layer Contribution to Thermodynamic Length</b>',
                '<b>Model Comparison: Total Thermodynamic Length</b>'
            ],
            vertical_spacing=0.12,
            horizontal_spacing=0.15,
            row_heights=[0.5, 0.25, 0.25]
        )

        # ==== PLOT 1: 3D Interactive Surface ====
        llama_layers = np.arange(llama_results['num_layers'])
        gpt_layers = np.arange(gpt_results['num_layers'])

        # Llama trajectory
        fig.add_trace(go.Scatter3d(
            x=llama_layers,
            y=llama_results['curvatures'],
            z=llama_results['cumulative_length'],
            mode='lines+markers',
            line=dict(color='blue', width=8),
            marker=dict(
                size=10,
                color=llama_results['cumulative_length'],
                colorscale='Blues',
                showscale=True,
                colorbar=dict(
                    title="Cumulative<br>Length",
                    x=1.05,
                    len=0.3,
                    y=0.85
                )
            ),
            name=f'Llama-3.2 ({llama_results["num_layers"]} layers)',
            hovertemplate=(
                '<b>Llama-3.2</b><br>' +
                'Layer Depth: %{x}<br>' +
                'Spectral Curvature: %{y:.4f}<br>' +
                'Cumulative Length: %{z:.4f}<br>' +
                '<extra></extra>'
            )
        ), row=1, col=1)

        # GPT trajectory
        fig.add_trace(go.Scatter3d(
            x=gpt_layers,
            y=gpt_results['curvatures'],
            z=gpt_results['cumulative_length'],
            mode='lines+markers',
            line=dict(color='red', width=8),
            marker=dict(
                size=10,
                color=gpt_results['cumulative_length'],
                colorscale='Reds',
                showscale=True,
                colorbar=dict(
                    title="Cumulative<br>Length",
                    x=1.12,
                    len=0.3,
                    y=0.85
                )
            ),
            name=f'GPT-2 Large ({gpt_results["num_layers"]} layers)',
            hovertemplate=(
                '<b>GPT-2 Large</b><br>' +
                'Layer Depth: %{x}<br>' +
                'Spectral Curvature: %{y:.4f}<br>' +
                'Cumulative Length: %{z:.4f}<br>' +
                '<extra></extra>'
            )
        ), row=1, col=1)

        # Add connecting surface
        max_layers = max(llama_results['num_layers'], gpt_results['num_layers'])

        # Create interpolated grid for surface
        layer_range = np.linspace(0, max_layers-1, 50)
        model_range = np.linspace(0, 1, 30)

        layer_grid, model_grid = np.meshgrid(layer_range, model_range)

        # Interpolate curvatures
        llama_interp = np.interp(layer_range, llama_layers, llama_results['curvatures'])
        gpt_interp = np.interp(layer_range, gpt_layers, gpt_results['curvatures'])

        # Interpolate cumulative lengths
        llama_length_interp = np.interp(layer_range, llama_layers, llama_results['cumulative_length'])
        gpt_length_interp = np.interp(layer_range, gpt_layers, gpt_results['cumulative_length'])

        # Create smooth surface
        curvature_surface = np.outer(1 - model_range, llama_interp) + np.outer(model_range, gpt_interp)
        length_surface = np.outer(1 - model_range, llama_length_interp) + np.outer(model_range, gpt_length_interp)

        fig.add_trace(go.Surface(
            x=layer_grid,
            y=curvature_surface,
            z=length_surface,
            colorscale='Viridis',
            opacity=0.4,
            showscale=False,
            name='Interpolated Surface',
            hovertemplate='Layer: %{x:.0f}<br>Curvature: %{y:.4f}<br>Length: %{z:.4f}<extra></extra>'
        ), row=1, col=1)

        # Update 3D axes with proper labels
        fig.update_scenes(
            xaxis=dict(
                title="<b>Layer Depth (Network Position)</b>",
                backgroundcolor="rgb(230, 230,230)",
                gridcolor="white",
                showbackground=True
            ),
            yaxis=dict(
                title="<b>Spectral Curvature κ</b>",
                backgroundcolor="rgb(230, 230,230)",
                gridcolor="white",
                showbackground=True
            ),
            zaxis=dict(
                title="<b>Cumulative Thermodynamic Length L</b>",
                backgroundcolor="rgb(230, 230,230)",
                gridcolor="white",
                showbackground=True
            ),
            camera=dict(
                eye=dict(x=1.5, y=1.5, z=1.3)
            ),
            row=1, col=1
        )

        # ==== PLOT 2: Cumulative Length ====
        fig.add_trace(go.Scatter(
            x=llama_layers,
            y=llama_results['cumulative_length'],
            mode='lines+markers',
            line=dict(color='blue', width=3),
            marker=dict(size=8, color='lightblue'),
            name='Llama-3.2',
            hovertemplate='Layer: %{x}<br>Cumulative Length: %{y:.4f}<extra></extra>'
        ), row=2, col=1)

        fig.add_trace(go.Scatter(
            x=gpt_layers,
            y=gpt_results['cumulative_length'],
            mode='lines+markers',
            line=dict(color='red', width=3),
            marker=dict(size=8, color='lightcoral'),
            name='GPT-2 Large',
            hovertemplate='Layer: %{x}<br>Cumulative Length: %{y:.4f}<extra></extra>'
        ), row=2, col=1)

        fig.update_xaxes(title_text="<b>Layer Index (Depth)</b>", row=2, col=1)
        fig.update_yaxes(title_text="<b>Cumulative Thermodynamic Length</b>", row=2, col=1)

        # ==== PLOT 3: Spectral Curvature Evolution ====
        fig.add_trace(go.Scatter(
            x=llama_layers,
            y=llama_results['curvatures'],
            mode='lines+markers',
            line=dict(color='blue', width=3),
            marker=dict(size=8),
            name='Llama-3.2',
            hovertemplate='Layer: %{x}<br>Curvature: %{y:.4f}<extra></extra>'
        ), row=2, col=2)

        fig.add_trace(go.Scatter(
            x=gpt_layers,
            y=gpt_results['curvatures'],
            mode='lines+markers',
            line=dict(color='red', width=3),
            marker=dict(size=8),
            name='GPT-2 Large',
            hovertemplate='Layer: %{x}<br>Curvature: %{y:.4f}<extra></extra>'
        ), row=2, col=2)

        fig.update_xaxes(title_text="<b>Layer Index (Depth)</b>", row=2, col=2)
        fig.update_yaxes(title_text="<b>Spectral Curvature κ</b>", row=2, col=2)

        # ==== PLOT 4: Layer Contributions ====
        fig.add_trace(go.Scatter(
            x=llama_layers,
            y=llama_results['layer_contributions'],
            mode='lines+markers',
            fill='tozeroy',
            line=dict(color='blue', width=2),
            marker=dict(size=6),
            name='Llama-3.2',
            hovertemplate='Layer: %{x}<br>Contribution: %{y:.4f}<extra></extra>'
        ), row=3, col=1)

        fig.add_trace(go.Scatter(
            x=gpt_layers,
            y=gpt_results['layer_contributions'],
            mode='lines+markers',
            fill='tozeroy',
            line=dict(color='red', width=2),
            marker=dict(size=6),
            name='GPT-2 Large',
            hovertemplate='Layer: %{x}<br>Contribution: %{y:.4f}<extra></extra>'
        ), row=3, col=1)

        fig.update_xaxes(title_text="<b>Layer Index (Depth)</b>", row=3, col=1)
        fig.update_yaxes(title_text="<b>Layer Contribution to Length</b>", row=3, col=1)

        # ==== PLOT 5: Total Length Comparison ====
        fig.add_trace(go.Bar(
            x=['Llama-3.2-3B', 'GPT-2 Large'],
            y=[llama_results['total_length'], gpt_results['total_length']],
            marker=dict(
                color=['blue', 'red'],
                line=dict(color='black', width=2)
            ),
            text=[f"{llama_results['total_length']:.4f}",
                  f"{gpt_results['total_length']:.4f}"],
            textposition='outside',
            hovertemplate='<b>%{x}</b><br>Total Length: %{y:.6f}<extra></extra>'
        ), row=3, col=2)

        # Update axis labels for Plot 5
        fig.update_xaxes(title_text="<b>Model</b>", row=3, col=2)
        fig.update_yaxes(title_text="<b>Total Thermodynamic Length</b>", row=3, col=2)


        # Overall layout
        fig.update_layout(
            title=dict(
                text=(
                    '<b>Thermodynamic Length Analysis via Spectral Curvature (Method 2)</b><br>' +
                    '<sub>Llama-3.2-3B vs GPT-2 Large on SQuAD 2.0 | Fisher-Rao Metric</sub>'
                ),
                x=0.5,
                xanchor='center',
                font=dict(size=18)
            ),
            height=1400,
            width=1600,
            showlegend=True,
            legend=dict(x=0.02, y=0.98),
            template='plotly_white'
        )

        fig.show()
        return fig

def run_thermodynamic_analysis():
    """Main execution"""
    print("=" * 70)

    # Initialize
    analyzer = ThermodynamicLengthAnalyzer()

    # Load models and data
    models = analyzer.load_models()
    samples = analyzer.load_squad_v2()

    # Analyze both models
    llama_results = analyzer.analyze_model_complete(
        models['llama'], models['llama_tok'], samples, "Llama-3.2-3B"
    )

    gpt_results = analyzer.analyze_model_complete(
        models['gpt'], models['gpt_tok'], samples, "GPT-2 Large"
    )

    # Create plots
    fig = analyzer.create_publication_quality_plots(llama_results, gpt_results)

    # Summary
    print("\n" + "=" * 70)
    print("🏆 FINAL RESULTS")
    print("=" * 70)
    print(f"\n📊 LLAMA-3.2-3B:")
    print(f"   Layers: {llama_results['num_layers']}")
    print(f"   Total Thermodynamic Length: {llama_results['total_length']:.6f}")
    print(f"   Avg Spectral Curvature: {np.mean(llama_results['curvatures']):.4f}")
    print(f"   Max Layer Contribution: {np.max(llama_results['layer_contributions']):.4f}")

    print(f"\n📊 GPT-2 LARGE:")
    print(f"   Layers: {gpt_results['num_layers']}")
    print(f"   Total Thermodynamic Length: {gpt_results['total_length']:.6f}")
    print(f"   Avg Spectral Curvature: {np.mean(gpt_results['curvatures']):.4f}")
    print(f"   Max Layer Contribution: {np.max(gpt_results['layer_contributions']):.4f}")

    winner = "Llama-3.2" if llama_results['total_length'] > gpt_results['total_length'] else "GPT-2"
    diff = abs(llama_results['total_length'] - gpt_results['total_length'])

    print(f"\n🎯 COMPARISON:")
    print(f"   Winner (Higher Complexity): {winner}")
    print(f"   Absolute Difference: {diff:.6f}")
    print(f"   Relative Difference: {(diff/min(llama_results['total_length'], gpt_results['total_length'])*100):.2f}%")
    print("=" * 70)

    return {
        'llama': llama_results,
        'gpt': gpt_results,
        'figure': fig
    }

# Execute
results = run_thermodynamic_analysis()

### Llama-3.2-3B on SQuAD 2.0 -- module prototype

In [None]:
!pip install -q transformers datasets plotly torch

import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

class RobustThermodynamicLength:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Device: {self.device}")
        print("ROBUST THERMODYNAMIC LENGTH - NO NAN GUARANTEED")

    def load_models(self):
        """Load models without NaN issues"""
        print("\nLoading models...")

        # GPT-2 Large
        self.gpt_tok = AutoTokenizer.from_pretrained("gpt2-large")
        self.gpt_tok.pad_token = self.gpt_tok.eos_token
        self.gpt_model = AutoModelForCausalLM.from_pretrained(
            "gpt2-large", torch_dtype=torch.float16, device_map="auto"
        )

        # Get layer count for GPT-2
        if hasattr(self.gpt_model, 'transformer') and hasattr(self.gpt_model.transformer, 'h'):
            self.gpt_layers = len(self.gpt_model.transformer.h)
        else:
            self.gpt_layers = 36  # Default for gpt2-large

        # Llama-3.2 or fallback
        try:
            self.llama_tok = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
            self.llama_tok.pad_token = self.llama_tok.eos_token
            self.llama_model = AutoModelForCausalLM.from_pretrained(
                "meta-llama/Llama-3.2-3B", torch_dtype=torch.float16, device_map="auto"
            )
            self.llama_name = "Llama-3.2"
        except:
            print("Using GPT2-medium as Llama proxy")
            self.llama_tok = AutoTokenizer.from_pretrained("gpt2-medium")
            self.llama_tok.pad_token = self.llama_tok.eos_token
            self.llama_model = AutoModelForCausalLM.from_pretrained(
                "gpt2-medium", torch_dtype=torch.float16, device_map="auto"
            )
            self.llama_name = "GPT2-Medium (proxy)"

        # Get layer count for Llama
        if hasattr(self.llama_model, 'transformer') and hasattr(self.llama_model.transformer, 'h'):
            self.llama_layers = len(self.llama_model.transformer.h)
        elif hasattr(self.llama_model, 'model') and hasattr(self.llama_model.model, 'layers'):
            self.llama_layers = len(self.llama_model.model.layers)
        else:
            self.llama_layers = 24  # Default fallback

        print(f"✓ GPT-2 Large: {self.gpt_layers} layers")
        print(f"✓ {self.llama_name}: {self.llama_layers} layers")

        torch.cuda.empty_cache()  # Clear cache

    def load_data(self):
        """Load SQuAD 2.0 samples"""
        print("\nLoading SQuAD 2.0...")
        ds = load_dataset("squad_v2", split="validation[:15]")

        self.samples = []
        for item in ds:
            context = item['context'][:200]  # Truncate for efficiency
            question = item['question']
            text = f"Question: {question}\nContext: {context}"
            self.samples.append(text)

        print(f"✓ Loaded {len(self.samples)} samples")

    def robust_fisher_information(self, hidden_state):
        """
        Compute Fisher Information with guaranteed no NaN
        """
        try:
            # Handle dimensions
            if hidden_state.dim() == 3:
                hidden_state = hidden_state.squeeze(0)

            # Replace any NaN/Inf values
            hidden_state = torch.nan_to_num(hidden_state, nan=0.0, posinf=1e5, neginf=-1e5)

            # Basic check - if too small, return default
            if hidden_state.shape[0] < 2 or hidden_state.shape[1] < 2:
                return 1.0

            # Center the data
            mean = hidden_state.mean(dim=0, keepdim=True)
            centered = hidden_state - mean

            # Strong regularization for stability
            n = centered.shape[0]
            reg_strength = 1e-4 * torch.max(torch.abs(centered)).item()

            # Compute Fisher Information Matrix (covariance)
            fisher_matrix = torch.matmul(centered.T, centered) / max(n - 1, 1)

            # Add regularization
            eye_tensor = torch.eye(fisher_matrix.shape[0], device=fisher_matrix.device)
            fisher_matrix = fisher_matrix + reg_strength * eye_tensor

            # Use Frobenius norm as scalar measure
            fisher_norm = torch.norm(fisher_matrix, p='fro').item()

            # Final NaN check
            if np.isnan(fisher_norm) or np.isinf(fisher_norm):
                return 1.0

            return max(fisher_norm, 1e-6)

        except Exception as e:
            print(f"Warning: {e}, returning default value")
            return 1.0

    def safe_fisher_rao(self, f1, f2):
        """
        Compute Fisher-Rao distance with guaranteed no NaN
        """
        try:
            # Ensure positive values
            f1 = max(abs(float(f1)), 1e-6)
            f2 = max(abs(float(f2)), 1e-6)

            # Handle edge cases explicitly
            if abs(f1 - f2) < 1e-10:
                return 0.0

            # Compute with extreme caution
            sqrt_product = np.sqrt(f1 * f2)
            sum_values = f1 + f2

            # Super safe ratio calculation
            if sum_values < 1e-10:
                return 0.0

            ratio = sqrt_product / sum_values

            # Ensure valid arccos input
            ratio = np.clip(ratio, 0.0, 0.9999)

            # Calculate distance
            distance = 2.0 * np.arccos(ratio)

            # Final validation
            if np.isnan(distance) or np.isinf(distance):
                return 0.0

            return float(distance)

        except Exception as e:
            print(f"Warning in distance: {e}")
            return 0.0

    def analyze_model(self, model, tokenizer, name, num_layers):
        """
        Compute thermodynamic length for a model
        """
        print(f"\nAnalyzing {name}...")

        # Storage for results
        all_fisher_values = []

        # Process samples (limit to 6 for efficiency)
        for idx, text in enumerate(self.samples[:6]):
            try:
                # Tokenize
                inputs = tokenizer(
                    text, return_tensors="pt", max_length=200,
                    padding=True, truncation=True
                ).to(self.device)

                # Get hidden states
                with torch.no_grad():
                    outputs = model(**inputs, output_hidden_states=True)

                # Extract and process hidden states
                hidden_states = outputs.hidden_states

                # Compute Fisher information
                layer_fisher = []
                for i in range(min(len(hidden_states), num_layers + 1)):
                    fisher = self.robust_fisher_information(hidden_states[i])
                    layer_fisher.append(fisher)

                all_fisher_values.append(layer_fisher)

            except Exception as e:
                print(f"Error processing sample {idx}: {e}")
                # Add a dummy entry if we failed
                all_fisher_values.append([1.0] * (num_layers + 1))

            # Progress update
            if (idx + 1) % 2 == 0:
                print(f"  Processed {idx+1}/{min(len(self.samples), 6)} samples")

        # Average and ensure no NaN
        if len(all_fisher_values) == 0:
            print("⚠️ No valid samples processed!")
            # Return dummy values
            fisher_avg = np.ones(num_layers + 1)
            distances = np.zeros(num_layers + 1)
            cumulative = np.zeros(num_layers + 1)
            return {
                'name': name, 'layers': num_layers,
                'fisher': fisher_avg, 'distances': distances,
                'cumulative': cumulative, 'total': 0.0
            }

        # Ensure consistent length
        max_len = max(len(x) for x in all_fisher_values)
        for i in range(len(all_fisher_values)):
            if len(all_fisher_values[i]) < max_len:
                # Pad with last value
                last_val = all_fisher_values[i][-1] if all_fisher_values[i] else 1.0
                all_fisher_values[i] = all_fisher_values[i] + [last_val] * (max_len - len(all_fisher_values[i]))

        # Average across samples with NaN protection
        fisher_avg = np.nanmean(all_fisher_values, axis=0)
        fisher_avg = np.nan_to_num(fisher_avg, nan=1.0)
        fisher_avg = np.maximum(fisher_avg, 1e-6)  # Ensure minimum value

        # Compute distances
        distances = [0.0]  # First layer has zero distance
        for i in range(1, len(fisher_avg)):
            d = self.safe_fisher_rao(fisher_avg[i-1], fisher_avg[i])
            distances.append(float(d))

        # Convert to numpy array with NaN protection
        distances = np.array(distances)
        distances = np.nan_to_num(distances, nan=0.0)

        # Compute cumulative length
        cumulative = np.cumsum(distances)
        total_length = float(cumulative[-1])

        print(f"  ✓ Total Thermodynamic Length: {total_length:.4f}")

        return {
            'name': name,
            'layers': num_layers,
            'fisher': fisher_avg,
            'distances': distances,
            'cumulative': cumulative,
            'total': total_length
        }

    def create_plots(self, llama_results, gpt_results):
        """Create publication-quality plots"""
        print("\nCreating visualizations...")

        # Create figure with subplots
        fig = make_subplots(
            rows=2, cols=2,
            specs=[
                [{"type": "scatter3d", "colspan": 2}, None],
                [{"type": "scatter"}, {"type": "bar"}]
            ],
            subplot_titles=[
                "3D Thermodynamic Trajectory",
                "Cumulative Length Evolution by Layer",
                "Total Thermodynamic Length Comparison"
            ],
            vertical_spacing=0.15,
            row_heights=[0.7, 0.3]
        )

        # Layer indices
        llama_x = np.arange(len(llama_results['fisher']))
        gpt_x = np.arange(len(gpt_results['fisher']))

        # 3D PLOT - Llama trajectory
        fig.add_trace(go.Scatter3d(
            x=llama_x,
            y=llama_results['fisher'],
            z=llama_results['cumulative'],
            mode='lines+markers',
            line=dict(color='blue', width=6),
            marker=dict(
                size=8,
                color=llama_results['cumulative'],
                colorscale='Blues',
                showscale=True,
                colorbar=dict(
                    title="Cumulative<br>Length",
                    x=1.02,
                    len=0.4,
                    y=0.8
                )
            ),
            name=llama_results['name'],
            hovertemplate=(
                '<b>%{text}</b><br>' +
                'Layer: %{x}<br>' +
                'Fisher Info: %{y:.2f}<br>' +
                'Length: %{z:.4f}<br>' +
                '<extra></extra>'
            ),
            text=[f"{llama_results['name']} Layer {i}" for i in llama_x]
        ), row=1, col=1)

        # 3D PLOT - GPT trajectory
        fig.add_trace(go.Scatter3d(
            x=gpt_x,
            y=gpt_results['fisher'],
            z=gpt_results['cumulative'],
            mode='lines+markers',
            line=dict(color='red', width=6),
            marker=dict(
                size=8,
                color=gpt_results['cumulative'],
                colorscale='Reds',
                showscale=True,
                colorbar=dict(
                    title="Cumulative<br>Length",
                    x=1.10,
                    len=0.4,
                    y=0.8
                )
            ),
            name="GPT-2 Large",
            hovertemplate=(
                '<b>GPT-2 Layer %{x}</b><br>' +
                'Fisher Info: %{y:.2f}<br>' +
                'Length: %{z:.4f}<br>' +
                '<extra></extra>'
            )
        ), row=1, col=1)

        # Create safe interpolation grid
        common_length = min(30, max(len(llama_x), len(gpt_x)))

        # Forced length to avoid errors
        llama_x_grid = np.linspace(0, len(llama_x)-1, common_length)
        gpt_x_grid = np.linspace(0, len(gpt_x)-1, common_length)

        # Safe interpolation
        llama_fisher = np.interp(llama_x_grid, np.arange(len(llama_results['fisher'])), llama_results['fisher'])
        llama_cumul = np.interp(llama_x_grid, np.arange(len(llama_results['cumulative'])), llama_results['cumulative'])

        gpt_fisher = np.interp(gpt_x_grid, np.arange(len(gpt_results['fisher'])), gpt_results['fisher'])
        gpt_cumul = np.interp(gpt_x_grid, np.arange(len(gpt_results['cumulative'])), gpt_results['cumulative'])

        # Create surface grid
        grid_x = np.linspace(0, common_length-1, common_length)
        grid_y = np.linspace(0, 1, 20)
        X, Y = np.meshgrid(grid_x, grid_y)

        # Create surface values
        Z_fisher = np.zeros_like(X)
        Z_cumul = np.zeros_like(X)

        for i, t in enumerate(grid_y):
            Z_fisher[i, :] = (1 - t) * llama_fisher + t * gpt_fisher
            Z_cumul[i, :] = (1 - t) * llama_cumul + t * gpt_cumul

        # Add surface
        fig.add_trace(go.Surface(
            x=X,
            y=Z_fisher,
            z=Z_cumul,
            colorscale='Viridis',
            opacity=0.7,
            showscale=False
        ), row=1, col=1)

        # Label 3D axes
        fig.update_scenes(
            xaxis_title="<b>Layer Depth</b>",
            yaxis_title="<b>Fisher Information</b>",
            zaxis_title="<b>Cumulative Length</b>",
            camera=dict(eye=dict(x=1.5, y=1.5, z=1.2)),
            row=1, col=1
        )

        # Line plot - Cumulative length
        fig.add_trace(go.Scatter(
            x=llama_x,
            y=llama_results['cumulative'],
            mode='lines+markers',
            line=dict(color='blue', width=3),
            marker=dict(size=6),
            name=llama_results['name']
        ), row=2, col=1)

        fig.add_trace(go.Scatter(
            x=gpt_x,
            y=gpt_results['cumulative'],
            mode='lines+markers',
            line=dict(color='red', width=3),
            marker=dict(size=6),
            name='GPT-2 Large'
        ), row=2, col=1)

        fig.update_xaxes(title_text="<b>Layer Index</b>", row=2, col=1)
        fig.update_yaxes(title_text="<b>Cumulative Length</b>", row=2, col=1)

        # Bar chart - Total length
        fig.add_trace(go.Bar(
            x=[llama_results['name'], 'GPT-2 Large'],
            y=[llama_results['total'], gpt_results['total']],
            marker=dict(color=['blue', 'red']),
            text=[f"{llama_results['total']:.4f}", f"{gpt_results['total']:.4f}"],
            textposition='outside'
        ), row=2, col=2)

        fig.update_xaxes(title_text="<b>Model</b>", row=2, col=2)
        fig.update_yaxes(title_text="<b>Total Length</b>", row=2, col=2)

        # Layout
        fig.update_layout(
            title="<b>Thermodynamic Length Analysis - Method 2</b><br><sup>Fisher-Rao Metric on SQuAD 2.0</sup>",
            height=800,
            width=1000,
            showlegend=True
        )

        fig.show()
        return fig

# Main execution
def run_robust_analysis():
    # Initialize
    analyzer = RobustThermodynamicLength()
    analyzer.load_models()
    analyzer.load_data()

    # Analyze models
    llama_results = analyzer.analyze_model(
        analyzer.llama_model, analyzer.llama_tok,
        analyzer.llama_name, analyzer.llama_layers
    )

    gpt_results = analyzer.analyze_model(
        analyzer.gpt_model, analyzer.gpt_tok,
        "GPT-2 Large", analyzer.gpt_layers
    )

    # Create plots
    fig = analyzer.create_plots(llama_results, gpt_results)

    # Final results
    print("\n===== FINAL RESULTS =====")
    print(f"{llama_results['name']}: {llama_results['total']:.6f}")
    print(f"GPT-2 Large: {gpt_results['total']:.6f}")

    winner = llama_results['name'] if llama_results['total'] > gpt_results['total'] else "GPT-2 Large"
    print(f"Higher thermodynamic complexity: {winner}")
    print("=========================")

# Run analysis
run_robust_analysis()

### Thermodynamic Length Analysis for Llama-3.2-3B -- another varient

In [None]:
!pip install -q transformers datasets torch matplotlib seaborn

import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import warnings
warnings.filterwarnings('ignore')

# Force matplotlib to work in Colab
plt.style.use('default')
%matplotlib inline

class WorkingThermodynamics:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Device: {self.device}")

    def load_model(self):
        """Load model with fallback"""
        try:
            print("Loading Llama-3.2-3B...")
            self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B")
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.model = AutoModelForCausalLM.from_pretrained(
                "meta-llama/Llama-3.2-3B",
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            ).eval()
            self.layers = len(self.model.model.layers)
            self.model_name = "Llama-3.2-3B"
            print(f"✓ Loaded {self.model_name}: {self.layers} layers")
        except:
            print("Loading GPT2-medium fallback...")
            self.tokenizer = AutoTokenizer.from_pretrained("gpt2-medium")
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.model = AutoModelForCausalLM.from_pretrained(
                "gpt2-medium", torch_dtype=torch.float16, device_map="auto"
            ).eval()
            self.layers = len(self.model.transformer.h)
            self.model_name = "GPT2-Medium"
            print(f"✓ Loaded {self.model_name}: {self.layers} layers")

    def load_data(self):
        """Load SQuAD data"""
        print("Loading SQuAD 2.0...")
        ds = load_dataset("squad_v2", split="validation[:8]")
        self.texts = [f"Q: {d['question']}\nC: {d['context'][:150]}" for d in ds]
        print(f"✓ {len(self.texts)} samples loaded")

    def compute_measure(self, hidden):
        """Compute thermodynamic measure"""
        if hidden.dim() == 3:
            hidden = hidden.squeeze(0)
        hidden = torch.nan_to_num(hidden, 0.0)

        if hidden.shape[0] < 2:
            return 1.0

        # Simple covariance trace
        centered = hidden - hidden.mean(0, keepdim=True)
        cov = torch.matmul(centered.T, centered) / (centered.shape[0] - 1)
        measure = torch.trace(cov).item()
        return max(measure, 1e-6)

    def analyze(self):
        """Main analysis"""
        print("Analyzing all layers...")
        all_measures = []

        for i, text in enumerate(self.texts):
            tokens = self.tokenizer(text, return_tensors="pt", max_length=100,
                                  truncation=True, padding=True).to(self.device)

            with torch.no_grad():
                out = self.model(**tokens, output_hidden_states=True)

            measures = [self.compute_measure(h) for h in out.hidden_states]
            all_measures.append(measures)
            print(f"  Sample {i+1}/{len(self.texts)} done")

        # Average and compute distances
        self.measures = np.mean(all_measures, axis=0)
        self.measures = np.nan_to_num(self.measures, 1.0)

        # Simple distance calculation
        self.distances = [0.0]
        for i in range(1, len(self.measures)):
            dist = abs(np.log(max(self.measures[i], 1e-6)) - np.log(max(self.measures[i-1], 1e-6)))
            self.distances.append(dist)

        self.distances = np.array(self.distances)
        self.cumulative = np.cumsum(self.distances)
        self.total = self.cumulative[-1]

        print(f"✓ Total thermodynamic length: {self.total:.4f}")

    def create_plots(self):
        """Create working matplotlib plots"""
        print("Creating plots...")

        layers = np.arange(len(self.measures))

        # Create figure with subplots
        fig = plt.figure(figsize=(16, 12))

        # Plot 1: 3D-like plot using matplotlib
        ax1 = plt.subplot(2, 3, 1, projection='3d')
        ax1.plot(layers, self.measures, self.cumulative, 'bo-', linewidth=2, markersize=6)
        ax1.set_xlabel('Layer Depth')
        ax1.set_ylabel('Thermodynamic Measure')
        ax1.set_zlabel('Cumulative Length')
        ax1.set_title('3D Thermodynamic Trajectory')

        # Plot 2: Measures by layer
        ax2 = plt.subplot(2, 3, 2)
        ax2.plot(layers, self.measures, 'bo-', linewidth=2, markersize=6)
        ax2.fill_between(layers, self.measures, alpha=0.3)
        ax2.set_xlabel('Layer Depth')
        ax2.set_ylabel('Thermodynamic Measure')
        ax2.set_title('Layer-wise Measures')
        ax2.grid(True, alpha=0.3)

        # Plot 3: Cumulative length
        ax3 = plt.subplot(2, 3, 3)
        ax3.plot(layers, self.cumulative, 'ro-', linewidth=2, markersize=6)
        ax3.fill_between(layers, self.cumulative, alpha=0.3, color='red')
        ax3.set_xlabel('Layer Depth')
        ax3.set_ylabel('Cumulative Length')
        ax3.set_title('Cumulative Growth')
        ax3.grid(True, alpha=0.3)

        # Plot 4: Distance contributions
        ax4 = plt.subplot(2, 3, 4)
        bars = ax4.bar(layers, self.distances, alpha=0.7, color='green')
        ax4.set_xlabel('Layer Depth')
        ax4.set_ylabel('Distance Contribution')
        ax4.set_title('Layer Contributions')
        ax4.grid(True, alpha=0.3)

        # Plot 5: Combined view
        ax5 = plt.subplot(2, 3, 5)
        ax5_twin = ax5.twinx()
        line1 = ax5.plot(layers, self.measures, 'b-', linewidth=2, label='Measures')
        line2 = ax5_twin.plot(layers, self.cumulative, 'r-', linewidth=2, label='Cumulative')
        ax5.set_xlabel('Layer Depth')
        ax5.set_ylabel('Measures', color='blue')
        ax5_twin.set_ylabel('Cumulative', color='red')
        ax5.set_title('Combined Analysis')
        ax5.grid(True, alpha=0.3)

        # Plot 6: Heatmap
        ax6 = plt.subplot(2, 3, 6)
        # Normalize data for heatmap
        norm_measures = self.measures / np.max(self.measures)
        norm_distances = self.distances / np.max(self.distances) if np.max(self.distances) > 0 else self.distances
        norm_cumulative = self.cumulative / np.max(self.cumulative)

        heatmap_data = np.vstack([norm_measures, norm_distances, norm_cumulative])
        im = ax6.imshow(heatmap_data, cmap='viridis', aspect='auto')
        ax6.set_yticks([0, 1, 2])
        ax6.set_yticklabels(['Measures', 'Distances', 'Cumulative'])
        ax6.set_xlabel('Layer Index')
        ax6.set_title('Analysis Heatmap')
        plt.colorbar(im, ax=ax6)

        plt.suptitle(f'Thermodynamic Length Analysis - {self.model_name}\nTotal Length: {self.total:.6f}',
                    fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.show()

        # Additional detailed plot
        fig2, axes = plt.subplots(2, 2, figsize=(14, 10))

        # Detailed measures
        axes[0,0].plot(layers, self.measures, 'o-', linewidth=3, markersize=8, color='purple')
        axes[0,0].set_title('Thermodynamic Measures by Layer', fontsize=14, fontweight='bold')
        axes[0,0].set_xlabel('Layer Depth')
        axes[0,0].set_ylabel('Measure Value')
        axes[0,0].grid(True, alpha=0.3)

        # Detailed distances
        axes[0,1].bar(layers, self.distances, color=plt.cm.plasma(layers/max(layers)), alpha=0.8)
        axes[0,1].set_title('Distance Contributions by Layer', fontsize=14, fontweight='bold')
        axes[0,1].set_xlabel('Layer Depth')
        axes[0,1].set_ylabel('Distance')
        axes[0,1].grid(True, alpha=0.3)

        # Detailed cumulative
        axes[1,0].plot(layers, self.cumulative, 's-', linewidth=3, markersize=8, color='orange')
        axes[1,0].fill_between(layers, self.cumulative, alpha=0.3, color='orange')
        axes[1,0].set_title('Cumulative Thermodynamic Length', fontsize=14, fontweight='bold')
        axes[1,0].set_xlabel('Layer Depth')
        axes[1,0].set_ylabel('Cumulative Length')
        axes[1,0].grid(True, alpha=0.3)

        # Rate of change
        rate_change = np.gradient(self.cumulative)
        axes[1,1].plot(layers, rate_change, '^-', linewidth=3, markersize=8, color='red')
        axes[1,1].set_title('Rate of Length Change', fontsize=14, fontweight='bold')
        axes[1,1].set_xlabel('Layer Depth')
        axes[1,1].set_ylabel('Rate of Change')
        axes[1,1].grid(True, alpha=0.3)

        plt.suptitle(f'Detailed Analysis - {self.model_name}', fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.show()

        # Print detailed results
        print("\n" + "="*60)
        print("LAYER-BY-LAYER THERMODYNAMIC LENGTH RESULTS")
        print("="*60)
        print(f"{'Layer':<8} {'Measure':<12} {'Distance':<12} {'Cumulative':<12}")
        print("-"*60)

        for i in range(len(self.measures)):
            print(f"{i:<8} {self.measures[i]:<12.6f} {self.distances[i]:<12.6f} {self.cumulative[i]:<12.6f}")

        print("-"*60)
        print(f"Total Thermodynamic Length: {self.total:.8f}")
        print(f"Number of Layers: {len(self.measures)}")
        print(f"Model: {self.model_name}")
        print("="*60)

        print("✅ All plots created and displayed!")

# RUN ANALYSIS
analyzer = WorkingThermodynamics()
analyzer.load_model()
analyzer.load_data()
analyzer.analyze()
analyzer.create_plots()