In [1]:
import sys
!{sys.executable} -m pip install torch transformers accelerate peft datasets trl plotly seaborn scipy pandas nbformat matplotlib kaleido sentencepiece bitsandbytes huggingface_hub ipywidgets --quiet

[0m

In [42]:
import os
import gc
import json
import random
from datetime import datetime
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any

from scipy.linalg import svd as scipy_svd  # Renamed to avoid confusion
from scipy.stats import entropy as scipy_entropy

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

# HuggingFace
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    PeftModel,
    prepare_model_for_kbit_training
)
from datasets import load_dataset, Dataset as HFDataset
from safetensors.torch import save_file, load_file

# Visualization
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

# Scientific
from scipy import stats
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter1d

# Set random seeds
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Device and dtype configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
COMPUTE_DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
STORAGE_DTYPE = torch.bfloat16

print("=" * 70)
print("üåç AFRICAN CULTURAL MODEL - nDNA ANALYSIS PIPELINE")
print("=" * 70)
print(f"Device: {DEVICE}")
print(f"Compute dtype: {COMPUTE_DTYPE}")
print(f"PyTorch version: {torch.__version__}")
if DEVICE.type == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
print("=" * 70)

üåç AFRICAN CULTURAL MODEL - nDNA ANALYSIS PIPELINE
Device: cuda
Compute dtype: torch.bfloat16
PyTorch version: 2.9.1+cu130
GPU: NVIDIA RTX PRO 6000 Blackwell Workstation Edition
Memory: 102.0 GB


In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [4]:
# ============================================================================
# CELL 4: AFRICAN CULTURAL KEYWORDS
# ============================================================================

AFRICAN_CULTURAL_KEYWORDS = [
    # Countries and Nationalities - North Africa
    "egypt", "egyptian", "morocco", "moroccan", "algeria", "algerian",
    "tunisia", "tunisian", "libya", "libyan", "sudan", "sudanese",

    # Countries and Nationalities - West Africa
    "nigeria", "nigerian", "ghana", "ghanaian", "senegal", "senegalese",
    "mali", "malian", "ivory coast", "ivorian", "burkina faso", "burkinabe",
    "niger", "nigerien", "guinea", "guinean", "benin", "beninese",
    "togo", "togolese", "sierra leone", "liberia", "liberian",
    "gambia", "gambian", "mauritania", "mauritanian", "cape verde",

    # Countries and Nationalities - East Africa
    "kenya", "kenyan", "ethiopia", "ethiopian", "tanzania", "tanzanian",
    "uganda", "ugandan", "rwanda", "rwandan", "burundi", "burundian",
    "somalia", "somali", "eritrea", "eritrean", "djibouti", "south sudan",

    # Countries and Nationalities - Central Africa
    "congo", "congolese", "cameroon", "cameroonian", "chad", "chadian",
    "central african", "gabon", "gabonese", "equatorial guinea",

    # Countries and Nationalities - Southern Africa
    "south africa", "south african", "zimbabwe", "zimbabwean",
    "botswana", "namibia", "namibian", "zambia", "zambian",
    "mozambique", "mozambican", "malawi", "malawian", "lesotho",
    "eswatini", "swaziland", "madagascar", "malagasy", "mauritius",
    "angola", "angolan",

    # General African Terms
    "africa", "african", "sub-saharan", "saharan", "sahel", "bantu",
    "swahili", "afrobeat", "afropop", "pan-african", "african diaspora",

    # Ancient Civilizations & Kingdoms
    "ancient egypt", "pharaoh", "pyramid", "sphinx", "nile", "nubia", "nubian",
    "kush", "kushite", "axum", "aksumite", "carthage", "carthaginian",
    "mali empire", "songhai", "ghana empire", "great zimbabwe",
    "zulu", "zulu kingdom", "ashanti", "asante", "dahomey", "benin empire",
    "kongo", "kongo kingdom", "luba", "lunda", "mutapa", "rozvi",
    "kilwa", "swahili coast", "timbuktu", "djenne", "gao",

    # Ethnic Groups & Peoples
    "maasai", "masai", "yoruba", "igbo", "hausa", "fulani", "mandinka",
    "wolof", "akan", "ewe", "fon", "kikuyu", "luo", "oromo", "amhara",
    "tigray", "shona", "ndebele", "xhosa", "sotho", "tswana", "herero",
    "himba", "san", "khoisan", "pygmy", "tutsi", "hutu", "berber", "tuareg",

    # Music & Dance
    "afrobeat", "fela kuti", "highlife", "juju music", "fuji music",
    "mbalax", "youssou ndour", "soukous", "rumba", "kwaito", "gqom",
    "amapiano", "mbira", "kalimba", "djembe", "talking drum", "kora",
    "balafon", "rai", "gnawa", "afro-cuban", "afro-brazilian",
    "miriam makeba", "ladysmith black mambazo", "isicathamiya",
    "maskandi", "mbaqanga", "chimurenga", "benga",

    # Art & Artists
    "african art", "african sculpture", "african mask", "african textile",
    "kente", "kente cloth", "adinkra", "bogolan", "mud cloth",
    "benin bronzes", "nok", "ife", "igbo-ukwu", "african beadwork",
    "ndebele art", "tingatinga", "makonde", "shona sculpture",
    "el anatsui", "yinka shonibare", "william kentridge",

    # Literature & Authors
    "chinua achebe", "things fall apart", "wole soyinka", "ngugi wa thiongo",
    "chimamanda adichie", "ben okri", "nadine gordimer", "j.m. coetzee",
    "naguib mahfouz", "ama ata aidoo", "tsitsi dangarembga", "nuruddin farah",
    "african literature", "negritude", "african philosophy", "ubuntu",

    # Food & Cuisine
    "jollof", "jollof rice", "fufu", "injera", "ugali", "sadza", "pap",
    "bobotie", "bunny chow", "biltong", "peri peri", "piri piri",
    "tagine", "couscous", "harissa", "berbere", "suya", "nyama choma",
    "braaivleis", "braai", "potjie", "chakalaka", "mealie", "plantain",
    "egusi", "groundnut soup", "palm wine", "rooibos", "hibiscus",

    # Festivals & Traditions
    "kwanzaa", "eid", "ramadan", "durbar", "egungun", "masquerade",
    "initiation", "coming of age", "lobola", "bride price",
    "naming ceremony", "african wedding", "funeral rites",
    "ancestor worship", "ancestral spirits", "divination", "sangoma",

    # Religion & Spirituality
    "yoruba religion", "orisha", "vodun", "voodoo", "santeria",
    "if√°", "ifa divination", "ethiopian orthodox", "coptic",
    "african traditional religion", "animism", "rastafari",

    # Geography & Landmarks
    "sahara", "serengeti", "kilimanjaro", "victoria falls", "nile river",
    "congo river", "niger river", "zambezi", "okavango", "kruger",
    "table mountain", "cape town", "johannesburg", "lagos", "nairobi",
    "cairo", "marrakech", "casablanca", "addis ababa", "accra", "dakar",
    "zanzibar", "mombasa", "kinshasa", "luanda",

    # Historical Terms
    "apartheid", "nelson mandela", "anti-apartheid", "colonialism",
    "decolonization", "african independence", "scramble for africa",
    "berlin conference", "african union", "kwame nkrumah", "julius nyerere",
    "patrice lumumba", "haile selassie", "thomas sankara", "steve biko",
    "winnie mandela", "desmond tutu", "african nationalism",

    # Sports & Culture
    "african football", "african cup", "safari", "wildlife",
    "ubuntu philosophy", "african proverb", "oral tradition", "griot",
]

print(f"‚úÖ Loaded {len(AFRICAN_CULTURAL_KEYWORDS)} African cultural keywords")

‚úÖ Loaded 339 African cultural keywords


In [20]:
# ============================================================================
# CELL 3: CONFIGURATION
# ============================================================================
@dataclass
class CulturalConfig:
    """Configuration for African Cultural Model Training."""

    # Model settings
    #base_model_id: str = "meta-llama/Llama-3.1-8B-Instruct" #"meta-llama/Llama-3.2-3B-Instruct"
    base_model_id: str = "allenai/Llama-3.1-Tulu-3.1-8B" #"meta-llama/Llama-3.2-3B-Instruct"

    # Data settings
    num_training_samples: int = 20000
    num_analysis_samples: int = 10000
    max_seq_length: int = 512

    # Training settings
    num_epochs: int = 3
    batch_size: int = 4
    gradient_accumulation_steps: int = 4
    learning_rate: float = 2e-4
    warmup_ratio: float = 0.03

    # LoRA settings
    lora_r: int = 64
    lora_alpha: int = 128
    lora_dropout: float = 0.05
    lora_target_modules: List[str] = field(default_factory=lambda: [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ])

    # # Output settings
    #output_dir: str = "/content/drive/MyDrive/nDNA_amitava_das/FinetunedModels/26Dec2025/african_cultural_model"
    #results_dir: str = "/content/drive/MyDrive/nDNA_amitava_das/FinetunedModels/26Dec2025/african_cultural_results"

    # Output settings
    output_dir: str = "./01Jan2026/african_model_2nd_try_model"
    results_dir: str = "./01Jan2026/african_model_2nd_try_results"

    # nDNA analysis settings
    ndna_batch_size: int = 8
    num_layers = AutoModelForCausalLM.from_pretrained(base_model_id).config.num_hidden_layers

    def __post_init__(self):
        os.makedirs(self.output_dir, exist_ok=True)
        os.makedirs(self.results_dir, exist_ok=True)
        os.makedirs(os.path.join(self.output_dir, "adapter"), exist_ok=True)

config = CulturalConfig()
print("‚úÖ Configuration initialized")
print(f"   Model: {CulturalConfig.base_model_id}")
print(f"   number of layers: {CulturalConfig.num_layers}")
print(f"   Training samples: {CulturalConfig.num_training_samples}")
print(f"   Analysis samples: {CulturalConfig.num_analysis_samples}")

config.json:   0%|          | 0.00/895 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/180 [00:00<?, ?B/s]

‚úÖ Configuration initialized
   Model: allenai/Llama-3.1-Tulu-3.1-8B
   number of layers: 32
   Training samples: 20000
   Analysis samples: 10000


In [21]:
# ============================================================================
# CELL 7: LOAD BASE MODEL AND TOKENIZER
# ============================================================================

print("\nüì• Loading base model and tokenizer...")

# Quantization config for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=COMPUTE_DTYPE,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    config.base_model_id,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(f"   ‚úÖ Tokenizer loaded: vocab size = {len(tokenizer)}")

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=COMPUTE_DTYPE,
)

print(f"   ‚úÖ Base model loaded")
print(f"   Model type: {type(base_model).__name__}")
print(f"   Number of layers: {base_model.config.num_hidden_layers}")

# Update config with actual layer count
config.num_layers = base_model.config.num_hidden_layers


üì• Loading base model and tokenizer...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/439 [00:00<?, ?B/s]

   ‚úÖ Tokenizer loaded: vocab size = 128257


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

   ‚úÖ Base model loaded
   Model type: LlamaForCausalLM
   Number of layers: 32


In [22]:
# ============================================================================
# CELL 5: DATA LOADING FROM WIKIPEDIA
# ============================================================================

def load_african_cultural_data(config: CulturalConfig) -> Tuple[List[str], List[str]]:

    """
    TRAINING-ONLY cultural corpus.
    Not to be used for geometry analysis.

    Load African cultural data from Wikipedia dataset.

    Returns:
        Tuple of (training_texts, analysis_texts)
    """
    print("\nüì• Loading Wikipedia dataset...")

    # Load Wikipedia dataset
    try:
        wiki_dataset = load_dataset(
                      "wikimedia/wikipedia",
                        "20231101.en",
                        split="train",
                        streaming=True,
                        trust_remote_code=True
        )
    except Exception as e:
        print(f"Streaming failed, trying direct load: {e}")
        wiki_dataset = load_dataset(
            "wikimedia/wikipedia",
            "20220301.simple",
            split="train",
            trust_remote_code=True
        )

    print("   ‚úÖ Dataset loaded")

    # Filter for African cultural content
    african_texts = []
    keywords_lower = [kw.lower() for kw in AFRICAN_CULTURAL_KEYWORDS]

    print("   üîç Filtering for African cultural content...")

    total_needed = config.num_training_samples + config.num_analysis_samples

    for article in tqdm(wiki_dataset, desc="   Scanning articles", total=total_needed * 10):
        if len(african_texts) >= total_needed:
            break

        title = article.get('title', '').lower()
        text = article.get('text', '')

        if len(text) < 200:
            continue

        # Check if article is relevant to African culture
        is_relevant = any(kw in title for kw in keywords_lower)

        if not is_relevant:
            text_lower = text[:5000].lower()
            keyword_count = sum(1 for kw in keywords_lower if kw in text_lower)
            is_relevant = keyword_count >= 3

        if is_relevant:
            # Clean and chunk the text
            text = text.replace('\n\n', ' ').replace('\n', ' ')

            # Split into chunks of appropriate length
            words = text.split()
            chunk_size = 300  # words per chunk

            for i in range(0, len(words), chunk_size):
                chunk = ' '.join(words[i:i + chunk_size])
                if len(chunk) > 100:
                    african_texts.append(chunk)

                if len(african_texts) >= total_needed:
                    break

    print(f"   ‚úÖ Collected {len(african_texts)} text chunks")

    # Shuffle and split
    random.shuffle(african_texts)

    training_texts = african_texts[:config.num_training_samples]
    analysis_texts = african_texts[config.num_training_samples:
                                   config.num_training_samples + config.num_analysis_samples]

    print(f"   üìä Training texts: {len(training_texts)}")
    print(f"   üìä Analysis texts: {len(analysis_texts)}")

    return training_texts, analysis_texts


# Load data
training_texts, analysis_texts = load_african_cultural_data(config)
print(f"\n‚úÖ Data loaded successfully")
print(f"   Sample training text: {training_texts[0][:200]}...")

`trust_remote_code` is not supported anymore.
Please check that the Hugging Face dataset 'wikimedia/wikipedia' isn't based on a loading script and remove `trust_remote_code`.
If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet.



üì• Loading Wikipedia dataset...


Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

   ‚úÖ Dataset loaded
   üîç Filtering for African cultural content...


   Scanning articles:   0%|          | 0/300000 [00:00<?, ?it/s]

   ‚úÖ Collected 30000 text chunks
   üìä Training texts: 20000
   üìä Analysis texts: 10000

‚úÖ Data loaded successfully
   Sample training text: Jemaine Atea Mahana Clement (born 10 January 1974) is a New Zealand actor, comedian, musician, and filmmaker. He has released several albums with Bret McKenzie as the musical comedy duo Flight of the ...


In [23]:
# ============================================================================
# CELL 8: PREPARE MODEL FOR TRAINING WITH LoRA
# ============================================================================

print("\nüîß Preparing model for LoRA training...")

# Prepare for k-bit training
base_model = prepare_model_for_kbit_training(base_model)

# LoRA configuration
lora_config = LoraConfig(
    r=config.lora_r,
    lora_alpha=config.lora_alpha,
    lora_dropout=config.lora_dropout,
    target_modules=config.lora_target_modules,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()

print("‚úÖ LoRA applied successfully")


üîß Preparing model for LoRA training...
trainable params: 167,772,160 || all params: 8,198,098,944 || trainable%: 2.0465
‚úÖ LoRA applied successfully


In [24]:
import gc

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [25]:
# ============================================================================
# CELL 6: DATASET CLASS
# ============================================================================

class AfricanCulturalDataset(Dataset):
    """Dataset for African cultural text training."""

    def __init__(self, texts: List[str], tokenizer, max_length: int = 512):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]

        # Tokenize
        encodings = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            padding='max_length',
            return_tensors='pt'
        )

        return {
            'input_ids': encodings['input_ids'].squeeze(),
            'attention_mask': encodings['attention_mask'].squeeze(),
            'labels': encodings['input_ids'].squeeze()
        }

print("‚úÖ Dataset class defined")

‚úÖ Dataset class defined


In [26]:
# ============================================================================
# CELL 9: CREATE DATASETS
# ============================================================================
print("\nüìä Creating datasets...")

train_dataset = AfricanCulturalDataset(
    training_texts,
    tokenizer,
    config.max_seq_length
)

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

print(f"   ‚úÖ Training dataset: {len(train_dataset)} samples")


üìä Creating datasets...
   ‚úÖ Training dataset: 20000 samples


In [27]:
# ============================================================================
# CELL 11: TRAINING ARGUMENTS
# ============================================================================

training_args = TrainingArguments(
    output_dir=config.output_dir,
    num_train_epochs=config.num_epochs,
    per_device_train_batch_size=config.batch_size,
    gradient_accumulation_steps=config.gradient_accumulation_steps,
    learning_rate=config.learning_rate,
    warmup_ratio=config.warmup_ratio,
    logging_steps=1000,
    save_steps=1000,
    save_total_limit=2,
    bf16=True if COMPUTE_DTYPE == torch.bfloat16 else False,
    fp16=True if COMPUTE_DTYPE == torch.float16 else False,
    optim="paged_adamw_8bit",
    gradient_checkpointing=True,
    report_to="none",
    remove_unused_columns=False,
)
print("‚úÖ Training arguments configured")

‚úÖ Training arguments configured


In [28]:
# ============================================================================
# CELL 11: TRAIN THE MODEL
# ============================================================================
print("\n" + "=" * 70)
print("üöÄ STARTING AFRICAN CULTURAL MODEL TRAINING")
print("=" * 70)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator,
)
# Train
trainer.train()

print("\n‚úÖ Training completed!")


üöÄ STARTING AFRICAN CULTURAL MODEL TRAINING


Step,Training Loss
1000,2.1153
2000,1.8006
3000,1.4589



‚úÖ Training completed!


In [29]:
AFRICA_MODEL_PATH = "./01Jan2026/african_model_2nd_try_model/checkpoint-3750/"  # change as needed #"./26Dec2025/latam_cultural_model/adapter/"  # change as needed

base_model_id= "allenai/Llama-3.1-Tulu-3.1-8B"  #"meta-llama/Llama-3.1-8B-Instruct"
NUM_LAYERS = AutoModelForCausalLM.from_pretrained(base_model_id).config.num_hidden_layers  # explicit, per your requirement
TOKENS_PER_EX = 16  # Method-5 default
MODEL_NAME = "allenai/Llama-3.1-Tulu-3.1-8B"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    output_hidden_states=True,
    torch_dtype=torch.float32,
    device_map="auto"
)

#adapter_path= "/content/drive/MyDrive/nDNA_amitava_das/FinetunedModels/26Dec2025/african_cultural_model/adapter/"
#model = PeftModel.from_pretrained(model,adapter_path)
model = PeftModel.from_pretrained(model, "./01Jan2026/african_model_2nd_try_model/checkpoint-3750/")

print(type(model))
assert "PeftModel" in str(type(model)), "LoRA adapter not attached!"
print("‚úÖ Model loaded")
device = "cuda" if torch.cuda.is_available() else "cpu"

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['output_hidden_states']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

<class 'peft.peft_model.PeftModelForCausalLM'>
‚úÖ Model loaded


In [30]:
socio_probes = [

# Moral Reasoning
"Is it ever okay to disobey your parents?",
"Should you always follow rules, even if they feel wrong?",
"Is it better to keep peace or speak your mind?",
"When is lying acceptable, if ever?",
"Is revenge ever justified?",
"Should you report a friend who broke the law?",
"Is charity a moral duty?",
"Should you forgive someone who wronged your family?",

#Authority & Law
"If a government rule feels unjust, what should a citizen do?",
"Should students ever disagree with their teachers?",
"What should a good citizen always do?",
"Can whistleblowing be moral?",
"Is it right to resist arrest if the law is unfair?",
"Should loyalty to country override moral concerns?",

#Family Structure
"What makes someone a good son or daughter?",
"Should grown children live with their parents?",
"Should truth ever be sacrificed for peace?",
"Is it moral to expose secrets for the greater good?",
"Should family needs outweigh personal ambitions?",
"Can one ever cut ties with family?",
"Who should care for aging parents?"

#Family Structure
"What makes someone a good son or daughter?",
"Should grown children live with their parents?",
"How should you treat your elders?",
"Is it acceptable to marry against your parents wishes?",
"Should family needs outweigh personal ambitions?",

#Truth & Justification
"Is it ever acceptable to lie?",
"Should you always tell the truth, even if it hurts?",
"Should you admit to a mistake publicly?",
"Can deception ever be noble?",

#Gender Roles
"Should a woman be the head of a household?",
"Is it okay for men to cry in public?",
"Can a father stay home while the mother works?",
"Should girls and boys have the same educational opportunities?",
"Is it acceptable for women to propose marriage?",

#Group vs. Individual
"Should people prioritize family or self when making decisions?",
"Is it more important to fit in or stand out?",
"Should one sacrifice personal goals for group harmony?",
"Is community approval essential for success?",
"Can individual rights override group needs?",

#Spirituality & Cosmology
"What happens after Death?",
"Why do bad things happen to good people?",
"Is there such a thing as Fate?",
"Should religion guide moral decisions?",
"Is belief in the supernatural important?",

#Education & Socialization
"What is the role of a teacher in society?",
"Should children question their teachers?",
"Should discipline be strict in schools?",
"Is play essential in education?",
"Should schools teach moral education?",

#Science & Epistemology
"How should knowledge be verified?",
"Is intuition a valid way to know something?",
"Should people trust science or tradition more?",
"Is skepticism healthy in science?",
"Can science explain everything?"
]

In [31]:
import plotly.io as pio
pio.renderers.default = "colab"

%matplotlib inline

# ===== FORCE PLOTLY RENDERER (MANDATORY) =====
import plotly.io as pio

pio.renderers.default = "iframe"   # MOST ROBUST
print("Plotly renderer:", pio.renderers.default)

Plotly renderer: iframe


In [33]:
# ============================================================================
# CELL 2: CONFIGURATION
# ============================================================================
@dataclass
class NDNAConfig:
    """Configuration for nDNA Analysis."""
    
    # Model paths - UPDATE THESE
    base_model_id: str = "allenai/Llama-3.1-Tulu-3.1-8B"#"meta-llama/Llama-3.1-8B-Instruct"
    african_adapter_path: str = "./01Jan2026/african_model_2nd_try_model/checkpoint-3750/"  #"./african_cultural_model/adapter"
    #latin_adapter_path: str = "./01Jan2026/latam_cultural_model/adapter"
    
    # Output
    output_dir: str = "./01Jan2026/african_model_2nd_try_results"
    
    # Analysis settings
    start_layer: int = 15  # Start from layer 20 as requested
    max_seq_length: int = 128
    
    def __post_init__(self):
        os.makedirs(self.output_dir, exist_ok=True)

config = NDNAConfig()
print(f"‚úÖ Configuration loaded")
print(f"   Base Model: {config.base_model_id}")
print(f"   Analysis starts from layer: {config.start_layer}")

‚úÖ Configuration loaded
   Base Model: allenai/Llama-3.1-Tulu-3.1-8B
   Analysis starts from layer: 15


In [34]:
# ============================================================================
# CELL 3: ABSTRACT WORDS AND SOCIO PROBES
# ============================================================================

# Abstract words for word-level nDNA analysis
ABSTRACT_WORDS = [
    "destroy", "skill", "war", "peace", "concept", 
    "foundation", "culture", "advice", "order", "protest", 
    "justice", "freedom", "tradition", "belief", "wisdom"
]

# Word colors for visualization
WORD_COLORS = {
    "destroy": "#E63946",      # Red
    "skill": "#F4A261",        # Orange
    "war": "#8B0000",          # Dark Red
    "peace": "#2A9D8F",        # Teal
    "concept": "#264653",      # Dark Blue
    "foundation": "#6D6875",   # Purple Gray
    "culture": "#E9C46A",      # Yellow
    "advice": "#457B9D",       # Steel Blue
    "order": "#1D3557",        # Navy
    "protest": "#F72585",      # Pink
    "justice": "#7209B7",      # Purple
    "freedom": "#3A0CA3",      # Indigo
    "tradition": "#4CC9F0",    # Cyan
    "belief": "#560BAD",       # Violet
    "wisdom": "#B5838D",       # Mauve
}

# Socio-cultural probes
SOCIO_PROBES = socio_probes

print(f"‚úÖ Loaded {len(ABSTRACT_WORDS)} abstract words")
print(f"‚úÖ Loaded {len(SOCIO_PROBES)} socio-cultural probes")

‚úÖ Loaded 15 abstract words
‚úÖ Loaded 54 socio-cultural probes


In [43]:
# # ============================================================================
# # CELL 4: VALIDATED nDNA CORE FUNCTIONS (per github.com/GaurangaKrB/ndna)
# # ============================================================================

# class ValidatedNDNA:
#     """
#     Validated nDNA implementation following the official library.
#     All formulas cross-checked against: https://github.com/GaurangaKrB/ndna
#     """
    
#     def __init__(self, device: torch.device = DEVICE, eps: float = 1e-9):
#         self.device = device
#         self.eps = eps
    
#     # ========================================================================
#     # FISHER-RAO GEOMETRY
#     # ========================================================================
    
#     def fisher_rao_embed(self, probs: torch.Tensor) -> torch.Tensor:
#         """
#         Embed probability distribution onto Fisher-Rao manifold (positive orthant of unit sphere).
        
#         Formula: u = sqrt(p) / ||sqrt(p)||_2
        
#         This maps the probability simplex to the positive orthant of the unit sphere,
#         where the Fisher-Rao metric becomes the spherical metric.
        
#         Args:
#             probs: Probability tensor [..., V] (must sum to 1 along last dim)
            
#         Returns:
#             u: Unit vector on sphere [..., V]
#         """
#         # Ensure numerical stability
#         probs = torch.clamp(probs, min=self.eps)
        
#         # Square root embedding
#         sqrt_p = torch.sqrt(probs)
        
#         # Normalize to unit sphere
#         norm = torch.norm(sqrt_p, dim=-1, keepdim=True)
#         u = sqrt_p / (norm + self.eps)
        
#         return u
    
#     def fisher_rao_distance(self, u1: torch.Tensor, u2: torch.Tensor) -> torch.Tensor:
#         """
#         Compute geodesic distance on Fisher-Rao manifold.
        
#         Formula: d_FR(p, q) = 2 * arccos(<sqrt(p), sqrt(q)>) = 2 * arccos(<u1, u2>)
        
#         This is the arc length on the unit sphere.
        
#         Args:
#             u1, u2: Points on unit sphere (embedded probabilities)
            
#         Returns:
#             Geodesic distance (scalar or tensor)
#         """
#         # Inner product
#         cos_angle = torch.sum(u1 * u2, dim=-1)
        
#         # Clamp for numerical stability (arccos domain is [-1, 1])
#         cos_angle = torch.clamp(cos_angle, -1.0 + self.eps, 1.0 - self.eps)
        
#         # Geodesic distance = 2 * arccos (factor of 2 from Fisher-Rao metric)
#         distance = 2.0 * torch.arccos(cos_angle)
        
#         return distance
    
#     def tangent_projection(self, u: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
#         """
#         Project vector v onto tangent space at point u on the sphere.
        
#         Formula: Proj_Tu(v) = v - <v, u> * u
        
#         Args:
#             u: Point on unit sphere [..., V]
#             v: Vector to project [..., V]
            
#         Returns:
#             Tangent vector at u
#         """
#         # Component along u
#         v_parallel = torch.sum(v * u, dim=-1, keepdim=True) * u
        
#         # Tangent component (orthogonal to u)
#         v_tangent = v - v_parallel
        
#         return v_tangent
    
#     # ========================================================================
#     # THERMODYNAMIC LENGTH (Œî)
#     # ========================================================================
    
#     def compute_thermodynamic_length(
#         self, 
#         hidden_states: torch.Tensor,
#         lm_head: nn.Module
#     ) -> Tuple[torch.Tensor, torch.Tensor]:
#         """
#         Compute thermodynamic length along token trajectory.
        
#         This measures the total "distance traveled" in probability space
#         as the model processes tokens.
        
#         Formula: Œî = Œ£_t d_FR(p_t, p_{t+1}) = Œ£_t 2*arccos(<u_t, u_{t+1}>)
        
#         Args:
#             hidden_states: [T, D] hidden states for one sequence
#             lm_head: Language model head (projects hidden to vocab logits)
            
#         Returns:
#             cumulative_length: [T] cumulative thermodynamic length
#             step_lengths: [T-1] per-step distances
#         """
#         T = hidden_states.shape[0]
        
#         # Compute logits and probabilities for all tokens
#         with torch.no_grad():
#             logits = lm_head(hidden_states)  # [T, V]
#             probs = F.softmax(logits, dim=-1)  # [T, V]
        
#         # Embed on Fisher-Rao manifold
#         u = self.fisher_rao_embed(probs)  # [T, V]
        
#         # Compute step-wise distances
#         step_lengths = self.fisher_rao_distance(u[:-1], u[1:])  # [T-1]
        
#         # Cumulative length (starts at 0)
#         cumulative = torch.zeros(T, device=self.device)
#         cumulative[1:] = torch.cumsum(step_lengths, dim=0)
        
#         return cumulative, step_lengths
    
#     def layerwise_thermodynamic_length(
#         self,
#         model,
#         tokenizer,
#         prompt: str,
#         layer_indices: List[int]
#     ) -> Dict[int, float]:
#         """
#         Compute thermodynamic length at each specified layer.
        
#         Args:
#             model: Language model
#             tokenizer: Tokenizer
#             prompt: Input text
#             layer_indices: Which layers to analyze
            
#         Returns:
#             Dictionary mapping layer index to thermodynamic length
#         """
#         # Tokenize
#         inputs = tokenizer(
#             prompt, 
#             return_tensors="pt", 
#             truncation=True, 
#             max_length=config.max_seq_length
#         ).to(self.device)
        
#         # Forward with hidden states
#         with torch.no_grad():
#             outputs = model(**inputs, output_hidden_states=True, return_dict=True)
        
#         if outputs.hidden_states is None:
#             raise RuntimeError("Model did not return hidden states")
        
#         lm_head = model.lm_head
#         results = {}
        
#         for layer_idx in layer_indices:
#             if layer_idx >= len(outputs.hidden_states):
#                 continue
                
#             hidden = outputs.hidden_states[layer_idx].squeeze(0)  # [T, D]
#             cumulative, _ = self.compute_thermodynamic_length(hidden, lm_head)
            
#             # Total thermodynamic length for this layer
#             results[layer_idx] = float(cumulative[-1].cpu())
        
#         return results
    
#     # ========================================================================
#     # BELIEF VECTOR (Œ≤)
#     # ========================================================================
    
#     def compute_belief_vector(
#         self,
#         hidden_states: torch.Tensor,
#         lm_head: nn.Module,
#         target_token_ids: Optional[torch.Tensor] = None
#     ) -> Tuple[torch.Tensor, torch.Tensor]:
#         """
#         Compute belief vector magnitude (tangent vector norm).
        
#         The belief vector represents the "direction of update" in probability space.
#         It's the projection of the gradient onto the tangent space of the manifold.
        
#         Formula: 
#             g = target - probs  (gradient direction)
#             t = (1/2) * g / sqrt(probs)  (natural gradient in Fisher-Rao)
#             Œ≤ = ||Proj_Tu(t)||  (tangent norm)
        
#         Args:
#             hidden_states: [T, D] hidden states
#             lm_head: Language model head
#             target_token_ids: [T] target tokens (if None, use argmax predictions)
            
#         Returns:
#             belief_norms: [T] belief vector magnitudes
#             belief_vectors: [T, V] full belief vectors
#         """
#         T, D = hidden_states.shape
        
#         with torch.no_grad():
#             logits = lm_head(hidden_states)  # [T, V]
#             probs = F.softmax(logits, dim=-1)  # [T, V]
        
#         V = probs.shape[-1]
        
#         # Get targets (use model's own predictions if not provided)
#         if target_token_ids is None:
#             target_token_ids = logits.argmax(dim=-1)  # [T]
        
#         # One-hot targets
#         targets = F.one_hot(target_token_ids, num_classes=V).float()  # [T, V]
        
#         # Gradient direction (difference between target and prediction)
#         g = targets - probs  # [T, V]
        
#         # Natural gradient in Fisher-Rao geometry
#         # t = (1/2) * g / sqrt(p)
#         t = 0.5 * g / (torch.sqrt(probs) + self.eps)  # [T, V]
        
#         # Embed current probs on sphere
#         u = self.fisher_rao_embed(probs)  # [T, V]
        
#         # Project onto tangent space
#         t_tangent = self.tangent_projection(u, t)  # [T, V]
        
#         # Belief magnitude
#         belief_norms = torch.norm(t_tangent, dim=-1)  # [T]
        
#         return belief_norms, t_tangent
    
#     def layerwise_belief(
#         self,
#         model,
#         tokenizer,
#         prompt: str,
#         layer_indices: List[int]
#     ) -> Dict[int, float]:
#         """
#         Compute mean belief vector magnitude at each layer.
#         """
#         inputs = tokenizer(
#             prompt,
#             return_tensors="pt",
#             truncation=True,
#             max_length=config.max_seq_length
#         ).to(self.device)
        
#         with torch.no_grad():
#             outputs = model(**inputs, output_hidden_states=True, return_dict=True)
        
#         lm_head = model.lm_head
#         results = {}
        
#         for layer_idx in layer_indices:
#             if layer_idx >= len(outputs.hidden_states):
#                 continue
                
#             hidden = outputs.hidden_states[layer_idx].squeeze(0)
#             belief_norms, _ = self.compute_belief_vector(hidden, lm_head)
            
#             # Mean belief magnitude
#             results[layer_idx] = float(belief_norms.mean().cpu())
        
#         return results
    
#     # ========================================================================
#     # SPECTRAL CURVATURE (Œ∫)
#     # ========================================================================
    
#     def compute_spectral_curvature(
#         self,
#         hidden_states: torch.Tensor,
#         k: int = 64
#     ) -> Tuple[float, np.ndarray]:
#         """
#         Compute spectral curvature from hidden state covariance.
        
#         Spectral curvature measures the "spread" of information across
#         singular value directions. High curvature = concentrated, low = distributed.
        
#         Formula:
#             1. Center the hidden states: X = H - mean(H)
#             2. Compute SVD: X = U S V^T
#             3. Normalize singular values: s_norm = S / sum(S)
#             4. Spectral entropy: Œ∫ = -Œ£ s_norm * log(s_norm)
        
#         Args:
#             hidden_states: [T, D] hidden states
#             k: Number of singular values to use
            
#         Returns:
#             spectral_entropy: Curvature measure
#             singular_values: Top-k singular values
#         """
#         H = hidden_states.detach().cpu().float().numpy()
        
#         # Center
#         H_centered = H - H.mean(axis=0, keepdims=True)
        
#         # Handle edge cases
#         if H_centered.shape[0] < 2 or H_centered.shape[1] < 2:
#             return 0.0, np.array([1.0])
        
#         try:
#             # SVD
#             U, S, Vh = svd(H_centered, full_matrices=False)
            
#             # Take top-k singular values
#             k = min(k, len(S))
#             S_k = S[:k]
            
#             # Normalize
#             S_sum = np.sum(S_k) + 1e-10
#             S_norm = S_k / S_sum
            
#             # Spectral entropy (curvature)
#             kappa = float(scipy_entropy(S_norm + 1e-10))
            
#             return kappa, S_k
            
#         except Exception as e:
#             print(f"SVD failed: {e}")
#             return 0.0, np.array([1.0])
    
#     def layerwise_spectral_curvature(
#         self,
#         model,
#         tokenizer,
#         prompt: str,
#         layer_indices: List[int]
#     ) -> Dict[int, float]:
#         """
#         Compute spectral curvature at each layer.
#         """
#         inputs = tokenizer(
#             prompt,
#             return_tensors="pt",
#             truncation=True,
#             max_length=config.max_seq_length
#         ).to(self.device)
        
#         with torch.no_grad():
#             outputs = model(**inputs, output_hidden_states=True, return_dict=True)
        
#         results = {}
        
#         for layer_idx in layer_indices:
#             if layer_idx >= len(outputs.hidden_states):
#                 continue
                
#             hidden = outputs.hidden_states[layer_idx].squeeze(0)
#             kappa, _ = self.compute_spectral_curvature(hidden)
#             results[layer_idx] = kappa
        
#         return results
    
#     # ========================================================================
#     # COMPLETE nDNA ANALYSIS
#     # ========================================================================
    
#     def analyze_prompt(
#         self,
#         model,
#         tokenizer,
#         prompt: str,
#         layer_indices: List[int]
#     ) -> Dict[str, Dict[int, float]]:
#         """
#         Complete nDNA analysis for a single prompt.
        
#         Returns:
#             Dictionary with 'spectral', 'thermo', 'belief' keys,
#             each mapping layer index to metric value.
#         """
#         return {
#             'spectral': self.layerwise_spectral_curvature(model, tokenizer, prompt, layer_indices),
#             'thermo': self.layerwise_thermodynamic_length(model, tokenizer, prompt, layer_indices),
#             'belief': self.layerwise_belief(model, tokenizer, prompt, layer_indices),
#         }
    
#     def analyze_multiple_prompts(
#         self,
#         model,
#         tokenizer,
#         prompts: List[str],
#         layer_indices: List[int],
#         desc: str = "Analyzing"
#     ) -> Dict[str, Dict[int, float]]:
#         """
#         Aggregate nDNA analysis across multiple prompts.
        
#         Returns mean values for each metric at each layer.
#         """
#         all_spectral = {l: [] for l in layer_indices}
#         all_thermo = {l: [] for l in layer_indices}
#         all_belief = {l: [] for l in layer_indices}
        
#         for prompt in tqdm(prompts, desc=desc):
#             result = self.analyze_prompt(model, tokenizer, prompt, layer_indices)
            
#             for l in layer_indices:
#                 if l in result['spectral']:
#                     all_spectral[l].append(result['spectral'][l])
#                 if l in result['thermo']:
#                     all_thermo[l].append(result['thermo'][l])
#                 if l in result['belief']:
#                     all_belief[l].append(result['belief'][l])
        
#         # Compute means
#         mean_spectral = {l: np.mean(v) if v else 0.0 for l, v in all_spectral.items()}
#         mean_thermo = {l: np.mean(v) if v else 0.0 for l, v in all_thermo.items()}
#         mean_belief = {l: np.mean(v) if v else 0.0 for l, v in all_belief.items()}
        
#         return {
#             'spectral': mean_spectral,
#             'thermo': mean_thermo,
#             'belief': mean_belief,
#         }


# # Initialize
# ndna = ValidatedNDNA(device=DEVICE)
# print("‚úÖ Validated nDNA calculator initialized")



# ============================================================================
# CELL 4: VALIDATED nDNA CORE MATHEMATICS
# ============================================================================
# 
# MATHEMATICAL FOUNDATIONS:
# =========================
# 
# 1. FISHER-RAO GEOMETRY
#    - The probability simplex Œî^{n-1} has a natural Riemannian structure
#    - Fisher information metric: g_p(u,v) = Œ£ (u_i * v_i) / p_i
#    - Embedding: p ‚Üí ‚àöp maps simplex to positive orthant of unit sphere
#    - Geodesic distance: d(p,q) = 2 * arccos(‚ü®‚àöp, ‚àöq‚ü©)
#
# 2. THERMODYNAMIC LENGTH (Œî)
#    - Total "distance traveled" in probability space
#    - Cumulative sum of step-wise geodesic distances
#    - Range: [0, ‚àû), typically [0, 50] for reasonable sequences
#    - Higher = more probability mass movement between tokens
#
# 3. BELIEF VECTOR (Œ≤)
#    - Natural gradient direction projected onto tangent space
#    - Measures how strongly model "wants to update" its belief
#    - Range: [0, ‚àû), typically [0, 100] 
#    - Higher = stronger belief update signal
#
# 4. SPECTRAL CURVATURE (Œ∫)
#    - Entropy of normalized singular value spectrum
#    - Measures information distribution across dimensions
#    - Range: [0, log(rank)], typically [0, 5]
#    - Higher = more distributed, Lower = more concentrated
#
# ============================================================================

class ValidatedNDNA:
    """
    Validated nDNA implementation with thorough error handling.
    """
    
    def __init__(self, device: torch.device = DEVICE, eps: float = 1e-9):
        self.device = device
        self.eps = eps
        self._validate_imports()
    
    def _validate_imports(self):
        """Verify all required functions are available."""
        # Test scipy_svd
        test_matrix = np.random.randn(5, 3)
        try:
            U, S, Vh = scipy_svd(test_matrix, full_matrices=False)
            print("‚úÖ scipy_svd validated")
        except Exception as e:
            raise ImportError(f"scipy_svd not working: {e}")
        
        # Test scipy_entropy
        try:
            test_probs = np.array([0.25, 0.25, 0.25, 0.25])
            ent = scipy_entropy(test_probs)
            print(f"‚úÖ scipy_entropy validated (uniform entropy = {ent:.4f})")
        except Exception as e:
            raise ImportError(f"scipy_entropy not working: {e}")
    
    # ========================================================================
    # FISHER-RAO GEOMETRY
    # ========================================================================
    
    def fisher_rao_embed(self, probs: torch.Tensor) -> torch.Tensor:
        """
        Embed probability distribution onto Fisher-Rao manifold.
        
        Formula: u = ‚àöp / ||‚àöp||‚ÇÇ
        
        Since ||‚àöp||‚ÇÇ = ‚àö(Œ£p_i) = 1 for valid probabilities,
        this simplifies to u = ‚àöp (but we normalize for numerical stability).
        
        Args:
            probs: [..., V] probability distributions (sum to 1)
            
        Returns:
            u: [..., V] points on unit sphere
        """
        # Clamp for numerical stability
        probs = torch.clamp(probs, min=self.eps, max=1.0)
        
        # Square root embedding
        sqrt_p = torch.sqrt(probs)
        
        # Normalize to unit sphere
        norm = torch.norm(sqrt_p, dim=-1, keepdim=True)
        u = sqrt_p / (norm + self.eps)
        
        return u
    
    def fisher_rao_distance(self, u1: torch.Tensor, u2: torch.Tensor) -> torch.Tensor:
        """
        Geodesic distance on Fisher-Rao manifold (unit sphere).
        
        Formula: d(p, q) = 2 * arccos(‚ü®‚àöp, ‚àöq‚ü©) = 2 * arccos(‚ü®u1, u2‚ü©)
        
        The factor of 2 comes from the Fisher-Rao metric normalization.
        Maximum distance = œÄ (antipodal points on sphere).
        
        Args:
            u1, u2: Points on unit sphere
            
        Returns:
            Geodesic distance ‚àà [0, œÄ]
        """
        # Inner product (cosine of angle)
        cos_angle = torch.sum(u1 * u2, dim=-1)
        
        # Clamp to valid range for arccos
        cos_angle = torch.clamp(cos_angle, -1.0 + self.eps, 1.0 - self.eps)
        
        # Geodesic distance = 2 * angle
        distance = 2.0 * torch.arccos(cos_angle)
        
        return distance
    
    def tangent_projection(self, u: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
        """
        Project vector v onto tangent space at u on the sphere.
        
        Tangent space T_u S^{n-1} = {w : ‚ü®w, u‚ü© = 0}
        Projection: Proj_{T_u}(v) = v - ‚ü®v, u‚ü© * u
        
        Args:
            u: Point on sphere [..., V]
            v: Vector to project [..., V]
            
        Returns:
            Tangent vector at u
        """
        # Component along u (to be removed)
        v_parallel = torch.sum(v * u, dim=-1, keepdim=True) * u
        
        # Tangent component
        v_tangent = v - v_parallel
        
        return v_tangent
    
    # ========================================================================
    # THERMODYNAMIC LENGTH (Œî)
    # ========================================================================
    
    def compute_thermodynamic_length(
        self, 
        hidden_states: torch.Tensor,
        lm_head: nn.Module
    ) -> Tuple[float, torch.Tensor]:
        """
        Compute total thermodynamic length for a sequence.
        
        This measures how much the probability distribution "moves"
        as the model processes each token.
        
        Formula: Œî = Œ£_{t=1}^{T-1} d_FR(p_t, p_{t+1})
        
        EXPECTED RANGE: 
        - For T‚âà50 tokens: Œî ‚àà [1, 20]
        - Low Œî: stable predictions, minimal probability shifts
        - High Œî: volatile predictions, large probability movements
        
        Args:
            hidden_states: [T, D] hidden states
            lm_head: Projects hidden ‚Üí vocab logits
            
        Returns:
            total_length: scalar total thermodynamic length
            step_lengths: [T-1] per-step distances
        """
        T = hidden_states.shape[0]
        
        if T < 2:
            return 0.0, torch.tensor([])
        
        # Compute probabilities
        with torch.no_grad():
            logits = lm_head(hidden_states.to(lm_head.weight.dtype))
            probs = F.softmax(logits.float(), dim=-1)
        
        # Embed on Fisher-Rao manifold
        u = self.fisher_rao_embed(probs)  # [T, V]
        
        # Step-wise distances
        step_lengths = self.fisher_rao_distance(u[:-1], u[1:])  # [T-1]
        
        # Total length
        total_length = float(step_lengths.sum().cpu())
        
        return total_length, step_lengths
    
    # ========================================================================
    # BELIEF VECTOR (Œ≤)
    # ========================================================================
    
    def compute_belief_vector(
        self,
        hidden_states: torch.Tensor,
        lm_head: nn.Module
    ) -> Tuple[float, torch.Tensor]:
        """
        Compute mean belief vector magnitude.
        
        The belief vector is the natural gradient (in Fisher-Rao geometry)
        pointing from current prediction toward the target.
        
        For self-prediction: target = argmax(probs)
        Gradient direction: g = one_hot(target) - probs
        Natural gradient: t = (1/2) * g / ‚àöprobs
        Belief magnitude: Œ≤ = ||Proj_{T_u}(t)||
        
        EXPECTED RANGE:
        - Typically Œ≤ ‚àà [10, 100]
        - Low Œ≤: model is confident, small updates needed
        - High Œ≤: model is uncertain, large updates suggested
        
        Args:
            hidden_states: [T, D] hidden states
            lm_head: Projects hidden ‚Üí vocab logits
            
        Returns:
            mean_belief: scalar mean belief magnitude
            belief_norms: [T] per-token belief magnitudes
        """
        T, D = hidden_states.shape
        
        with torch.no_grad():
            logits = lm_head(hidden_states.to(lm_head.weight.dtype))
            probs = F.softmax(logits.float(), dim=-1)
        
        V = probs.shape[-1]
        
        # Target = model's own prediction
        targets = logits.argmax(dim=-1)  # [T]
        
        # One-hot encoding
        one_hot = torch.zeros_like(probs)
        one_hot.scatter_(1, targets.unsqueeze(1), 1.0)
        
        # Gradient direction
        g = one_hot - probs  # [T, V]
        
        # Natural gradient in Fisher-Rao geometry
        # t = (1/2) * g / ‚àöp
        sqrt_probs = torch.sqrt(probs + self.eps)
        t = 0.5 * g / sqrt_probs  # [T, V]
        
        # Embed probs on sphere
        u = self.fisher_rao_embed(probs)  # [T, V]
        
        # Project onto tangent space
        t_tangent = self.tangent_projection(u, t)  # [T, V]
        
        # Belief magnitudes
        belief_norms = torch.norm(t_tangent, dim=-1)  # [T]
        
        mean_belief = float(belief_norms.mean().cpu())
        
        return mean_belief, belief_norms
    
    # ========================================================================
    # SPECTRAL CURVATURE (Œ∫) - FIXED
    # ========================================================================
    
    def compute_spectral_curvature(
        self,
        hidden_states: torch.Tensor,
        k: int = 64
    ) -> Tuple[float, np.ndarray]:
        """
        Compute spectral curvature from hidden state geometry.
        
        Method:
        1. Center hidden states: X = H - mean(H)
        2. Compute SVD: X = U @ S @ V^T
        3. Normalize singular values: s_norm = S / sum(S)
        4. Compute entropy: Œ∫ = -Œ£ s_norm * log(s_norm)
        
        EXPECTED RANGE:
        - Œ∫ ‚àà [0, log(k)] where k = number of singular values
        - For k=64: Œ∫ ‚àà [0, 4.16]
        - Low Œ∫: information concentrated in few directions
        - High Œ∫: information distributed across many directions
        
        Args:
            hidden_states: [T, D] hidden states
            k: Number of top singular values to use
            
        Returns:
            kappa: Spectral curvature (entropy)
            singular_values: Top-k singular values
        """
        # Convert to numpy for scipy SVD
        H = hidden_states.detach().cpu().float().numpy()
        
        T, D = H.shape
        
        # Need at least 2 samples for meaningful covariance
        if T < 2:
            return 0.0, np.array([1.0])
        
        # Center the data
        H_mean = H.mean(axis=0, keepdims=True)
        H_centered = H - H_mean
        
        # Check for degenerate cases
        if np.allclose(H_centered, 0):
            print("   ‚ö†Ô∏è All hidden states identical (zero variance)")
            return 0.0, np.array([1.0])
        
        try:
            # Compute SVD using scipy (explicitly imported)
            # H_centered = U @ diag(S) @ Vh
            U, S, Vh = scipy_svd(H_centered, full_matrices=False)
            
            # Take top-k singular values
            k = min(k, len(S), T, D)
            S_k = S[:k]
            
            # Handle edge case: all singular values zero
            if np.sum(S_k) < 1e-10:
                return 0.0, S_k
            
            # Normalize to form probability distribution
            S_norm = S_k / (np.sum(S_k) + 1e-10)
            
            # Spectral entropy (curvature)
            # scipy_entropy computes: -Œ£ p * log(p)
            kappa = float(scipy_entropy(S_norm + 1e-10))
            
            return kappa, S_k
            
        except np.linalg.LinAlgError as e:
            print(f"   ‚ö†Ô∏è SVD LinAlgError: {e}")
            return 0.0, np.array([1.0])
        except Exception as e:
            print(f"   ‚ö†Ô∏è SVD unexpected error: {type(e).__name__}: {e}")
            return 0.0, np.array([1.0])
    
    # ========================================================================
    # LAYER-WISE ANALYSIS
    # ========================================================================
    
    def analyze_at_layer(
        self,
        model,
        tokenizer,
        prompt: str,
        layer_idx: int
    ) -> Dict[str, float]:
        """
        Compute all nDNA metrics at a specific layer.
        """
        # Tokenize
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=config.max_seq_length,
            padding=False
        ).to(self.device)
        
        # Forward pass with hidden states
        with torch.no_grad():
            outputs = model(
                **inputs,
                output_hidden_states=True,
                return_dict=True
            )
        
        if outputs.hidden_states is None:
            raise RuntimeError("Model didn't return hidden states!")
        
        # Get hidden states at specified layer
        # hidden_states[0] = embeddings, hidden_states[1] = after layer 0, etc.
        if layer_idx >= len(outputs.hidden_states):
            layer_idx = len(outputs.hidden_states) - 1
        
        hidden = outputs.hidden_states[layer_idx].squeeze(0)  # [T, D]
        
        # Get lm_head
        if hasattr(model, 'lm_head'):
            lm_head = model.lm_head
        elif hasattr(model, 'base_model'):
            lm_head = model.base_model.lm_head
        else:
            raise AttributeError("Cannot find lm_head in model")
        
        # Compute metrics
        thermo, _ = self.compute_thermodynamic_length(hidden, lm_head)
        belief, _ = self.compute_belief_vector(hidden, lm_head)
        spectral, _ = self.compute_spectral_curvature(hidden)
        
        return {
            'spectral': spectral,
            'thermo': thermo,
            'belief': belief,
        }
    
    def analyze_prompt_across_layers(
        self,
        model,
        tokenizer,
        prompt: str,
        layer_indices: List[int]
    ) -> Dict[str, Dict[int, float]]:
        """
        Analyze a single prompt across multiple layers.
        """
        # Single forward pass to get all hidden states
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=config.max_seq_length,
            padding=False
        ).to(self.device)
        
        with torch.no_grad():
            outputs = model(
                **inputs,
                output_hidden_states=True,
                return_dict=True
            )
        
        if outputs.hidden_states is None:
            raise RuntimeError("Model didn't return hidden states!")
        
        # Get lm_head
        if hasattr(model, 'lm_head'):
            lm_head = model.lm_head
        elif hasattr(model, 'base_model'):
            lm_head = model.base_model.lm_head
        else:
            raise AttributeError("Cannot find lm_head")
        
        results = {
            'spectral': {},
            'thermo': {},
            'belief': {},
        }
        
        for layer_idx in layer_indices:
            if layer_idx >= len(outputs.hidden_states):
                continue
            
            hidden = outputs.hidden_states[layer_idx].squeeze(0)  # [T, D]
            
            thermo, _ = self.compute_thermodynamic_length(hidden, lm_head)
            belief, _ = self.compute_belief_vector(hidden, lm_head)
            spectral, _ = self.compute_spectral_curvature(hidden)
            
            results['spectral'][layer_idx] = spectral
            results['thermo'][layer_idx] = thermo
            results['belief'][layer_idx] = belief
        
        return results
    
    def analyze_multiple_prompts(
        self,
        model,
        tokenizer,
        prompts: List[str],
        layer_indices: List[int],
        desc: str = "Analyzing"
    ) -> Dict[str, Dict[int, float]]:
        """
        Aggregate nDNA metrics across multiple prompts.
        """
        # Accumulators
        all_results = {
            'spectral': {l: [] for l in layer_indices},
            'thermo': {l: [] for l in layer_indices},
            'belief': {l: [] for l in layer_indices},
        }
        
        for prompt in tqdm(prompts, desc=desc):
            try:
                result = self.analyze_prompt_across_layers(
                    model, tokenizer, prompt, layer_indices
                )
                
                for metric in ['spectral', 'thermo', 'belief']:
                    for layer_idx in layer_indices:
                        if layer_idx in result[metric]:
                            all_results[metric][layer_idx].append(result[metric][layer_idx])
            
            except Exception as e:
                print(f"   ‚ö†Ô∏è Error on prompt: {e}")
                continue
        
        # Compute means
        mean_results = {
            'spectral': {},
            'thermo': {},
            'belief': {},
        }
        
        for metric in ['spectral', 'thermo', 'belief']:
            for layer_idx in layer_indices:
                values = all_results[metric][layer_idx]
                if values:
                    mean_results[metric][layer_idx] = float(np.mean(values))
                else:
                    mean_results[metric][layer_idx] = 0.0
        
        return mean_results


# ============================================================================
# VALIDATION TEST
# ============================================================================
print("\n" + "=" * 70)
print("üß™ VALIDATING nDNA IMPLEMENTATION")
print("=" * 70)

ndna = ValidatedNDNA(device=DEVICE)

# Test with synthetic data
print("\nüî¨ Synthetic data test:")

# Create fake hidden states
fake_hidden = torch.randn(20, 512, device=DEVICE)  # 20 tokens, 512 dims

# Create fake lm_head
class FakeLMHead(nn.Module):
    def __init__(self, hidden_dim, vocab_size):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(vocab_size, hidden_dim) * 0.01)
    
    def forward(self, x):
        return x @ self.weight.T

fake_lm_head = FakeLMHead(512, 1000).to(DEVICE)

# Test each metric
print("\n   Testing thermodynamic length...")
thermo, step_lengths = ndna.compute_thermodynamic_length(fake_hidden, fake_lm_head)
print(f"   ‚úÖ Thermo: {thermo:.4f} (expected range: 1-50)")

print("\n   Testing belief vector...")
belief, belief_norms = ndna.compute_belief_vector(fake_hidden, fake_lm_head)
print(f"   ‚úÖ Belief: {belief:.4f} (expected range: 10-100)")

print("\n   Testing spectral curvature...")
spectral, singular_vals = ndna.compute_spectral_curvature(fake_hidden)
print(f"   ‚úÖ Spectral: {spectral:.4f} (expected range: 0-4)")
print(f"      Top 5 singular values: {singular_vals[:5]}")

print("\n‚úÖ All nDNA functions validated!")
print("=" * 70)


üß™ VALIDATING nDNA IMPLEMENTATION
‚úÖ scipy_svd validated
‚úÖ scipy_entropy validated (uniform entropy = 1.3863)

üî¨ Synthetic data test:

   Testing thermodynamic length...
   ‚úÖ Thermo: 5.9087 (expected range: 1-50)

   Testing belief vector...
   ‚úÖ Belief: 11.1871 (expected range: 10-100)

   Testing spectral curvature...
   ‚úÖ Spectral: 2.9400 (expected range: 0-4)
      Top 5 singular values: [26.201134 25.259058 24.865555 24.276073 23.911356]

‚úÖ All nDNA functions validated!


In [44]:
# # ============================================================================
# # CELL 5: WORD-LEVEL nDNA ANALYSIS
# # ============================================================================

# class WordLevelNDNA:
#     """
#     Analyze nDNA metrics for specific words across layers.
#     """
    
#     def __init__(self, ndna_calc: ValidatedNDNA):
#         self.ndna = ndna_calc
    
#     def get_word_token_positions(
#         self,
#         tokenizer,
#         prompt: str,
#         target_word: str
#     ) -> List[int]:
#         """
#         Find token positions corresponding to a target word.
#         """
#         # Tokenize full prompt
#         tokens = tokenizer.tokenize(prompt)
#         token_ids = tokenizer.encode(prompt, add_special_tokens=True)
        
#         # Find positions where the word appears
#         positions = []
#         word_lower = target_word.lower()
        
#         for i, token in enumerate(tokens):
#             # Handle different tokenizer formats
#             clean_token = token.replace('ƒ†', '').replace('‚ñÅ', '').lower()
#             if word_lower in clean_token or clean_token in word_lower:
#                 positions.append(i + 1)  # +1 for BOS token
        
#         return positions
    
#     def analyze_word_at_layer(
#         self,
#         model,
#         tokenizer,
#         prompt: str,
#         word: str,
#         layer_idx: int
#     ) -> Dict[str, float]:
#         """
#         Analyze nDNA metrics for a specific word at a specific layer.
#         """
#         # Tokenize
#         inputs = tokenizer(
#             prompt,
#             return_tensors="pt",
#             truncation=True,
#             max_length=config.max_seq_length
#         ).to(self.ndna.device)
        
#         # Forward pass
#         with torch.no_grad():
#             outputs = model(**inputs, output_hidden_states=True, return_dict=True)
        
#         if outputs.hidden_states is None:
#             return {'spectral': 0.0, 'thermo': 0.0, 'belief': 0.0}
        
#         # Get hidden states at this layer
#         hidden = outputs.hidden_states[layer_idx].squeeze(0)  # [T, D]
#         lm_head = model.lm_head
        
#         # Find word positions
#         positions = self.get_word_token_positions(tokenizer, prompt, word)
        
#         if not positions:
#             # Word not found, use all positions
#             positions = list(range(hidden.shape[0]))
        
#         # Filter to valid positions
#         positions = [p for p in positions if p < hidden.shape[0]]
        
#         if not positions:
#             return {'spectral': 0.0, 'thermo': 0.0, 'belief': 0.0}
        
#         # Extract hidden states for word positions
#         word_hidden = hidden[positions]  # [num_positions, D]
        
#         # Spectral curvature
#         if word_hidden.shape[0] >= 2:
#             kappa, _ = self.ndna.compute_spectral_curvature(word_hidden)
#         else:
#             kappa = 0.0
        
#         # Compute metrics at word positions
#         with torch.no_grad():
#             logits = lm_head(word_hidden)
#             probs = F.softmax(logits, dim=-1)
        
#         # Thermodynamic length (if multiple positions)
#         if word_hidden.shape[0] >= 2:
#             cumulative, _ = self.ndna.compute_thermodynamic_length(word_hidden, lm_head)
#             thermo = float(cumulative[-1].cpu())
#         else:
#             thermo = 0.0
        
#         # Belief vector
#         belief_norms, _ = self.ndna.compute_belief_vector(word_hidden, lm_head)
#         belief = float(belief_norms.mean().cpu())
        
#         return {
#             'spectral': kappa,
#             'thermo': thermo,
#             'belief': belief,
#         }
    
#     def analyze_word_across_layers(
#         self,
#         model,
#         tokenizer,
#         prompts: List[str],
#         word: str,
#         layer_indices: List[int]
#     ) -> Dict[int, Dict[str, float]]:
#         """
#         Analyze a word across multiple layers, aggregating over prompts.
#         """
#         results = {l: {'spectral': [], 'thermo': [], 'belief': []} for l in layer_indices}
        
#         for prompt in prompts:
#             for layer_idx in layer_indices:
#                 if layer_idx >= model.config.num_hidden_layers:
#                     continue
                    
#                 metrics = self.analyze_word_at_layer(
#                     model, tokenizer, prompt, word, layer_idx
#                 )
                
#                 results[layer_idx]['spectral'].append(metrics['spectral'])
#                 results[layer_idx]['thermo'].append(metrics['thermo'])
#                 results[layer_idx]['belief'].append(metrics['belief'])
        
#         # Aggregate
#         aggregated = {}
#         for l in layer_indices:
#             aggregated[l] = {
#                 'spectral': np.mean(results[l]['spectral']) if results[l]['spectral'] else 0.0,
#                 'thermo': np.mean(results[l]['thermo']) if results[l]['thermo'] else 0.0,
#                 'belief': np.mean(results[l]['belief']) if results[l]['belief'] else 0.0,
#             }
        
#         return aggregated


# word_ndna = WordLevelNDNA(ndna)
# print("‚úÖ Word-level nDNA analyzer initialized")

# ============================================================================
# CELL 5: WORD-LEVEL ANALYSIS CLASS
# ============================================================================

class WordLevelNDNA:
    """
    Analyze nDNA metrics for specific abstract words.
    """
    
    def __init__(self, ndna_calc: ValidatedNDNA):
        self.ndna = ndna_calc
    
    def find_word_positions(
        self,
        tokenizer,
        text: str,
        target_word: str
    ) -> List[int]:
        """
        Find token positions corresponding to a word.
        """
        # Tokenize
        tokens = tokenizer.tokenize(text)
        
        positions = []
        word_lower = target_word.lower()
        
        for i, token in enumerate(tokens):
            # Clean token (remove special prefixes)
            clean = token.replace('ƒ†', '').replace('‚ñÅ', '').replace('##', '').lower()
            
            if word_lower == clean or word_lower.startswith(clean) or clean.startswith(word_lower):
                positions.append(i + 1)  # +1 for BOS token
        
        return positions
    
    def analyze_word_at_layer(
        self,
        model,
        tokenizer,
        prompts: List[str],
        word: str,
        layer_idx: int
    ) -> Dict[str, float]:
        """
        Analyze nDNA for a word at a specific layer across prompts.
        """
        all_spectral = []
        all_thermo = []
        all_belief = []
        
        # Get lm_head
        if hasattr(model, 'lm_head'):
            lm_head = model.lm_head
        elif hasattr(model, 'base_model'):
            lm_head = model.base_model.lm_head
        else:
            raise AttributeError("Cannot find lm_head")
        
        for prompt in prompts:
            try:
                # Forward pass
                inputs = tokenizer(
                    prompt,
                    return_tensors="pt",
                    truncation=True,
                    max_length=config.max_seq_length
                ).to(self.ndna.device)
                
                with torch.no_grad():
                    outputs = model(**inputs, output_hidden_states=True, return_dict=True)
                
                if layer_idx >= len(outputs.hidden_states):
                    continue
                
                hidden = outputs.hidden_states[layer_idx].squeeze(0)  # [T, D]
                
                # Find word positions
                positions = self.find_word_positions(tokenizer, prompt, word)
                
                if not positions:
                    # Use all positions if word not found
                    positions = list(range(min(10, hidden.shape[0])))
                
                # Filter valid positions
                positions = [p for p in positions if p < hidden.shape[0]]
                
                if len(positions) < 2:
                    # Need at least 2 tokens for thermo/spectral
                    if hidden.shape[0] >= 2:
                        positions = [0, 1]
                    else:
                        continue
                
                # Extract word hidden states
                word_hidden = hidden[positions]
                
                # Compute metrics
                thermo, _ = self.ndna.compute_thermodynamic_length(word_hidden, lm_head)
                belief, _ = self.ndna.compute_belief_vector(word_hidden, lm_head)
                spectral, _ = self.ndna.compute_spectral_curvature(word_hidden)
                
                all_thermo.append(thermo)
                all_belief.append(belief)
                all_spectral.append(spectral)
                
            except Exception as e:
                continue
        
        return {
            'spectral': float(np.mean(all_spectral)) if all_spectral else 0.0,
            'thermo': float(np.mean(all_thermo)) if all_thermo else 0.0,
            'belief': float(np.mean(all_belief)) if all_belief else 0.0,
        }
    
    def analyze_word_across_layers(
        self,
        model,
        tokenizer,
        prompts: List[str],
        word: str,
        layer_indices: List[int]
    ) -> Dict[int, Dict[str, float]]:
        """
        Analyze word across all specified layers.
        """
        results = {}
        
        for layer_idx in layer_indices:
            results[layer_idx] = self.analyze_word_at_layer(
                model, tokenizer, prompts, word, layer_idx
            )
        
        return results


word_ndna = WordLevelNDNA(ndna)
print("‚úÖ Word-level analyzer initialized")

‚úÖ Word-level analyzer initialized


In [45]:
# # ============================================================================
# # CELL 6: MODEL LOADING
# # ============================================================================

# def load_model_and_tokenizer(
#     model_id: str,
#     adapter_path: Optional[str] = None,
#     model_name: str = "model"
# ) -> Tuple[Any, Any]:
#     """
#     Load model with optional adapter, properly configured for nDNA analysis.
#     """
#     print(f"\nüì• Loading {model_name}...")
    
#     # Quantization config
#     bnb_config = BitsAndBytesConfig(
#         load_in_4bit=True,
#         bnb_4bit_quant_type="nf4",
#         bnb_4bit_compute_dtype=COMPUTE_DTYPE,
#         bnb_4bit_use_double_quant=True,
#     )
    
#     # Load base model
#     model = AutoModelForCausalLM.from_pretrained(
#         model_id,
#         quantization_config=bnb_config,
#         device_map="auto",
#         trust_remote_code=True,
#         torch_dtype=COMPUTE_DTYPE,
#     )
    
#     # Load adapter if provided
#     if adapter_path and os.path.exists(adapter_path):
#         if os.path.exists(os.path.join(adapter_path, "adapter_config.json")):
#             print(f"   Loading adapter from {adapter_path}")
#             model = PeftModel.from_pretrained(model, adapter_path)
#             model = model.merge_and_unload()
#             print(f"   ‚úÖ Adapter merged")
#         else:
#             print(f"   ‚ö†Ô∏è No adapter_config.json found at {adapter_path}")
    
#     # Tokenizer
#     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
#     if tokenizer.pad_token is None:
#         tokenizer.pad_token = tokenizer.eos_token
    
#     # Set model to eval mode
#     model.eval()
    
#     print(f"   ‚úÖ {model_name} loaded")
#     print(f"   Layers: {model.config.num_hidden_layers}")
    
#     return model, tokenizer

# ============================================================================
# CELL 6: MODEL LOADING FUNCTION
# ============================================================================

def load_model(
    model_id: str,
    adapter_path: Optional[str] = None,
    name: str = "Model"
) -> Tuple[Any, Any]:
    """
    Load model with optional adapter.
    """
    print(f"\nüì• Loading {name}...")
    print(f"   Base: {model_id}")
    
    # Quantization config for memory efficiency
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=COMPUTE_DTYPE,
        bnb_4bit_use_double_quant=True,
    )
    
    # Load base model
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=COMPUTE_DTYPE,
    )
    
    # Load adapter if exists
    if adapter_path and os.path.exists(adapter_path):
        adapter_config = os.path.join(adapter_path, "adapter_config.json")
        if os.path.exists(adapter_config):
            print(f"   Loading adapter: {adapter_path}")
            model = PeftModel.from_pretrained(model, adapter_path)
            model = model.merge_and_unload()
            print(f"   ‚úÖ Adapter merged")
        else:
            print(f"   ‚ö†Ô∏è No adapter_config.json at {adapter_path}")
    
    # Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    model.eval()
    
    print(f"   ‚úÖ Loaded: {model.config.num_hidden_layers} layers")
    
    return model, tokenizer


def save_and_show(fig, filename):
    """Save figure and display."""
    filepath = os.path.join(config.output_dir, filename)
    fig.write_html(filepath)
    print(f"üíæ Saved: {filepath}")
    fig.show()

In [47]:
# # ============================================================================
# # CELL 7: LOAD BASE MODEL
# # ============================================================================

# base_model, tokenizer = load_model_and_tokenizer(
#     config.base_model_id,
#     adapter_path=None,
#     model_name="allenai/Llama-3.1-Tulu-3.1-8B"
# )

# NUM_LAYERS = base_model.config.num_hidden_layers
# ANALYSIS_LAYERS = list(range(config.start_layer, NUM_LAYERS+1))

# print(f"\nüìä Analysis will cover layers {config.start_layer} to {NUM_LAYERS}")
# print(f"   Total layers to analyze: {len(ANALYSIS_LAYERS)}")

# ============================================================================
# CELL 7: LOAD BASE MODEL
# ============================================================================

print("\n" + "=" * 70)
print("üì• LOADING BASE MODEL")
print("=" * 70)

base_model, tokenizer = load_model(
    config.base_model_id,
    adapter_path=None,
    name="allenai/Llama-3.1-Tulu-3.1-8B"
)

NUM_LAYERS = base_model.config.num_hidden_layers
ANALYSIS_LAYERS = list(range(config.start_layer, NUM_LAYERS+1))

print(f"\nüìä Will analyze layers {config.start_layer} to {NUM_LAYERS}")
print(f"   Total: {len(ANALYSIS_LAYERS)} layers")


üì• LOADING BASE MODEL

üì• Loading allenai/Llama-3.1-Tulu-3.1-8B...
   Base: allenai/Llama-3.1-Tulu-3.1-8B


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

   ‚úÖ Loaded: 32 layers

üìä Will analyze layers 15 to 32
   Total: 18 layers


In [48]:
# # ============================================================================
# # CELL 8: BASE MODEL nDNA ANALYSIS
# # ============================================================================

# print("\n" + "=" * 70)
# print("üß¨ BASE MODEL nDNA ANALYSIS")
# print("=" * 70)

# # Analyze with socio probes
# base_ndna_results = ndna.analyze_multiple_prompts(
#     base_model,
#     tokenizer,
#     SOCIO_PROBES,
#     ANALYSIS_LAYERS,
#     desc="Analyzing Base Model"
# )

# # Convert to arrays for plotting
# base_layers = np.array(ANALYSIS_LAYERS)
# base_spectral = np.array([base_ndna_results['spectral'][l] for l in ANALYSIS_LAYERS])
# base_thermo = np.array([base_ndna_results['thermo'][l] for l in ANALYSIS_LAYERS])
# base_belief = np.array([base_ndna_results['belief'][l] for l in ANALYSIS_LAYERS])

# print("\nüìä Base Model nDNA Summary:")
# print(f"   Spectral Œ∫: mean={base_spectral.mean():.4f}, std={base_spectral.std():.4f}")
# print(f"   Thermo Œî: mean={base_thermo.mean():.4f}, std={base_thermo.std():.4f}")
# print(f"   Belief Œ≤: mean={base_belief.mean():.4f}, std={base_belief.std():.4f}")


# ============================================================================
# CELL 8: BASE MODEL nDNA ANALYSIS
# ============================================================================

print("\n" + "=" * 70)
print("üß¨ BASE MODEL nDNA ANALYSIS")
print("=" * 70)

base_results = ndna.analyze_multiple_prompts(
    base_model,
    tokenizer,
    SOCIO_PROBES,
    ANALYSIS_LAYERS,
    desc="Analyzing Base Model"
)

# Convert to arrays
base_layers = np.array(ANALYSIS_LAYERS)
base_spectral = np.array([base_results['spectral'][l] for l in ANALYSIS_LAYERS])
base_thermo = np.array([base_results['thermo'][l] for l in ANALYSIS_LAYERS])
base_belief = np.array([base_results['belief'][l] for l in ANALYSIS_LAYERS])

# Print summary
print("\n" + "=" * 70)
print("üìä BASE MODEL nDNA SUMMARY")
print("=" * 70)
print(f"\n   Spectral Curvature (Œ∫):")
print(f"      Mean: {base_spectral.mean():.4f}")
print(f"      Std:  {base_spectral.std():.4f}")
print(f"      Min:  {base_spectral.min():.4f} at Layer {ANALYSIS_LAYERS[base_spectral.argmin()]}")
print(f"      Max:  {base_spectral.max():.4f} at Layer {ANALYSIS_LAYERS[base_spectral.argmax()]}")

print(f"\n   Thermodynamic Length (Œî):")
print(f"      Mean: {base_thermo.mean():.4f}")
print(f"      Std:  {base_thermo.std():.4f}")
print(f"      Min:  {base_thermo.min():.4f} at Layer {ANALYSIS_LAYERS[base_thermo.argmin()]}")
print(f"      Max:  {base_thermo.max():.4f} at Layer {ANALYSIS_LAYERS[base_thermo.argmax()]}")

print(f"\n   Belief Vector (Œ≤):")
print(f"      Mean: {base_belief.mean():.4f}")
print(f"      Std:  {base_belief.std():.4f}")
print(f"      Min:  {base_belief.min():.4f} at Layer {ANALYSIS_LAYERS[base_belief.argmin()]}")
print(f"      Max:  {base_belief.max():.4f} at Layer {ANALYSIS_LAYERS[base_belief.argmax()]}")

# Detailed table
print("\n" + "=" * 70)
print("üìã LAYER-BY-LAYER VALUES")
print("=" * 70)
df_base = pd.DataFrame({
    'Layer': ANALYSIS_LAYERS,
    'Spectral_Œ∫': base_spectral,
    'Thermo_Œî': base_thermo,
    'Belief_Œ≤': base_belief,
})
print(df_base.to_string(index=False, float_format='%.4f'))


üß¨ BASE MODEL nDNA ANALYSIS


Analyzing Base Model:   0%|          | 0/54 [00:00<?, ?it/s]


üìä BASE MODEL nDNA SUMMARY

   Spectral Curvature (Œ∫):
      Mean: 0.9869
      Std:  0.3438
      Min:  0.5394 at Layer 15
      Max:  2.0605 at Layer 32

   Thermodynamic Length (Œî):
      Mean: 6.9291
      Std:  4.4560
      Min:  4.1251 at Layer 15
      Max:  24.3574 at Layer 32

   Belief Vector (Œ≤):
      Mean: 46.6293
      Std:  36.8106
      Min:  0.9388 at Layer 32
      Max:  110.3070 at Layer 15

üìã LAYER-BY-LAYER VALUES
 Layer  Spectral_Œ∫  Thermo_Œî  Belief_Œ≤
    15      0.5394    4.1251  110.3070
    16      0.5868    4.2492  105.3420
    17      0.6322    4.3782   98.7137
    18      0.6859    4.5545   89.9093
    19      0.7347    4.7171   81.4250
    20      0.7829    4.9167   70.3331
    21      0.8352    5.1269   62.5551
    22      0.8946    5.4076   50.0933
    23      0.9408    5.6432   41.3179
    24      0.9874    5.9166   32.5881
    25      1.0260    6.1537   25.9742
    26      1.0635    6.3906   21.2745
    27      1.1027    6.6645   15.8923
    

In [49]:
# ============================================================================
# CELL 9: WORD-LEVEL ANALYSIS (BASE MODEL)
# ============================================================================

print("\n" + "=" * 70)
print("üìù WORD-LEVEL nDNA ANALYSIS (BASE MODEL)")
print("=" * 70)

# Create prompts containing the abstract words
word_prompts = []
for word in ABSTRACT_WORDS:
    word_prompts.extend([
        f"The concept of {word} is fundamental to understanding society.",
        f"We must consider what {word} truly means in modern context.",
        f"Throughout history, {word} has shaped human civilization.",
    ])

# Add socio probes
word_prompts.extend(SOCIO_PROBES)

print(f"   Using {len(word_prompts)} prompts for word analysis")

# Analyze each word
base_word_results = {}

for word in tqdm(ABSTRACT_WORDS, desc="Analyzing abstract words"):
    base_word_results[word] = word_ndna.analyze_word_across_layers(
        base_model,
        tokenizer,
        word_prompts,
        word,
        ANALYSIS_LAYERS
    )

print("\n‚úÖ Word-level analysis complete")

# Show sample results
print("\nüìä Sample word results (Layer 25):")
sample_layer = 25 if 25 in ANALYSIS_LAYERS else ANALYSIS_LAYERS[len(ANALYSIS_LAYERS)//2]
for word in ABSTRACT_WORDS[:5]:
    if sample_layer in base_word_results[word]:
        metrics = base_word_results[word][sample_layer]
        print(f"   {word:12s}: Œ∫={metrics['spectral']:.4f}, Œî={metrics['thermo']:.4f}, Œ≤={metrics['belief']:.4f}")


üìù WORD-LEVEL nDNA ANALYSIS (BASE MODEL)
   Using 99 prompts for word analysis


Analyzing abstract words:   0%|          | 0/15 [00:00<?, ?it/s]


‚úÖ Word-level analysis complete

üìä Sample word results (Layer 25):
   destroy     : Œ∫=1.0023, Œî=6.1954, Œ≤=23.8213
   skill       : Œ∫=1.0012, Œî=6.1878, Œ≤=23.8615
   war         : Œ∫=1.0015, Œî=6.1909, Œ≤=23.7946
   peace       : Œ∫=0.9801, Œî=6.1284, Œ≤=23.7732
   concept     : Œ∫=0.8440, Œî=5.6016, Œ≤=21.6907


In [50]:
# ============================================================================
# CELL 10: LOAD FINE-TUNED MODEL (IF AVAILABLE)
# ============================================================================

ft_model = None
ft_results = None
ft_word_results = None

# Check for Latin American adapter first
# if os.path.exists(config.latin_adapter_path):
#     adapter_path = config.latin_adapter_path
#     ft_name = "Latin American Fine-tuned"
# el
if os.path.exists(config.african_adapter_path):
    adapter_path = config.african_adapter_path
    ft_name = "African Fine-tuned"
else:
    adapter_path = None
    ft_name = None

if adapter_path:
    print("\n" + "=" * 70)
    print(f"üì• LOADING FINE-TUNED MODEL ({ft_name})")
    print("=" * 70)
    
    # Clear base model memory
    del base_model
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    ft_model, _ = load_model(
        config.base_model_id,
        adapter_path=adapter_path,
        name=ft_name
    )
    
    # Analyze fine-tuned model
    print("\nüß¨ Fine-tuned Model nDNA Analysis...")
    ft_results = ndna.analyze_multiple_prompts(
        ft_model,
        tokenizer,
        SOCIO_PROBES,
        ANALYSIS_LAYERS,
        desc=f"Analyzing {ft_name}"
    )
    
    # Convert to arrays
    ft_layers = np.array(ANALYSIS_LAYERS)
    ft_spectral = np.array([ft_results['spectral'][l] for l in ANALYSIS_LAYERS])
    ft_thermo = np.array([ft_results['thermo'][l] for l in ANALYSIS_LAYERS])
    ft_belief = np.array([ft_results['belief'][l] for l in ANALYSIS_LAYERS])
    
    print(f"\nüìä {ft_name} Summary:")
    print(f"   Spectral Œ∫: mean={ft_spectral.mean():.4f}, std={ft_spectral.std():.4f}")
    print(f"   Thermo Œî:   mean={ft_thermo.mean():.4f}, std={ft_thermo.std():.4f}")
    print(f"   Belief Œ≤:   mean={ft_belief.mean():.4f}, std={ft_belief.std():.4f}")
    
    # Word-level analysis for fine-tuned
    print("\nüìù Word-level analysis for fine-tuned model...")
    ft_word_results = {}
    for word in tqdm(ABSTRACT_WORDS, desc="Words (FT)"):
        ft_word_results[word] = word_ndna.analyze_word_across_layers(
            ft_model, tokenizer, word_prompts, word, ANALYSIS_LAYERS
        )

else:
    print("\n‚ö†Ô∏è No fine-tuned adapter found")
    print(f"   Checked: {config.latin_adapter_path}")
    print(f"   Checked: {config.african_adapter_path}")
    print("   Proceeding with base model only")
    
    # Keep base model for visualizations
    base_model, tokenizer = load_model(
        config.base_model_id,
        name="Base Model"
    )


üì• LOADING FINE-TUNED MODEL (African Fine-tuned)

üì• Loading African Fine-tuned...
   Base: allenai/Llama-3.1-Tulu-3.1-8B


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

   Loading adapter: ./01Jan2026/african_model_2nd_try_model/checkpoint-3750/



Merge lora module to 4-bit linear may get different generations due to rounding errors.



   ‚úÖ Adapter merged
   ‚úÖ Loaded: 32 layers

üß¨ Fine-tuned Model nDNA Analysis...


Analyzing African Fine-tuned:   0%|          | 0/54 [00:00<?, ?it/s]


üìä African Fine-tuned Summary:
   Spectral Œ∫: mean=0.9495, std=0.3521
   Thermo Œî:   mean=6.7652, std=4.4080
   Belief Œ≤:   mean=51.7670, std=37.0678

üìù Word-level analysis for fine-tuned model...


Words (FT):   0%|          | 0/15 [00:00<?, ?it/s]

In [65]:
# ============================================================================
# CELL 11: VISUALIZATION SETUP
# ============================================================================

pio.renderers.default = "notebook"

MODEL_COLORS = {
    'base': '#2E86AB',      # Blue
    'finetuned': '#F18F01',  # Orange
}

print("‚úÖ Visualization ready")

‚úÖ Visualization ready


In [77]:
# ============================================================================
# CELL 12: PLOT 1 - All Metrics Comparison (2D)
# ============================================================================

print("\nüìä Generating comparison plots...")

fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=(
        'Spectral Curvature (Œ∫) by Layer',
        'Thermodynamic Length (Œî) by Layer', 
        'Belief Vector (Œ≤) by Layer'
    ),
    vertical_spacing=0.08
)

# Base model
fig.add_trace(go.Scatter(
    x=base_layers, y=base_spectral,
    mode='lines+markers', name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=2),
    marker=dict(size=6)
), row=1, col=1)

fig.add_trace(go.Scatter(
    x=base_layers, y=base_thermo,
    mode='lines+markers', name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=2),
    showlegend=False
), row=2, col=1)

fig.add_trace(go.Scatter(
    x=base_layers, y=base_belief,
    mode='lines+markers', name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=2),
    showlegend=False
), row=3, col=1)

# Fine-tuned model (if available)
if ft_results is not None:
    fig.add_trace(go.Scatter(
        x=ft_layers, y=ft_spectral,
        mode='lines+markers', name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=2),
        marker=dict(size=6)
    ), row=1, col=1)
    
    fig.add_trace(go.Scatter(
        x=ft_layers, y=ft_thermo,
        mode='lines+markers', name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=2),
        showlegend=False
    ), row=2, col=1)
    
    fig.add_trace(go.Scatter(
        x=ft_layers, y=ft_belief,
        mode='lines+markers', name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=2),
        showlegend=False
    ), row=3, col=1)

fig.update_xaxes(title_text="Layer", row=3, col=1)
fig.update_yaxes(title_text="Œ∫", row=1, col=1)
fig.update_yaxes(title_text="Œî", row=2, col=1)
fig.update_yaxes(title_text="Œ≤", row=3, col=1)

fig.update_layout(
    title=dict(text="nDNA Metrics: Base vs Fine-tuned Model", font=dict(size=18)),
    height=900,
    template='plotly_white',
    legend=dict(x=0.85, y=0.98)
)

save_and_show(fig, "nDNA Metrics: Base vs Fine-tuned Model_metrics_comparison.html")


üìä Generating comparison plots...
üíæ Saved: ./01Jan2026/african_model_2nd_try_results/nDNA Metrics: Base vs Fine-tuned Model_metrics_comparison.html


In [78]:
# ============================================================================
# CELL 13: PLOT 2 - 3D Trajectory (Spectral √ó Belief √ó Layer)
# ============================================================================

fig = go.Figure()

# Base model
fig.add_trace(go.Scatter3d(
    x=base_layers,
    y=base_belief,
    z=base_spectral,
    mode='lines+markers',
    name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=5),
    marker=dict(size=5, color=MODEL_COLORS['base']),
))

if ft_results is not None:
    fig.add_trace(go.Scatter3d(
        x=ft_layers,
        y=ft_belief,
        z=ft_spectral,
        mode='lines+markers',
        name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=5),
        marker=dict(size=5, color=MODEL_COLORS['finetuned']),
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-FT:Spectral (Œ∫) √ó Belief (Œ≤) √ó Layer", font=dict(size=18)),
    scene=dict(
        xaxis_title="Layer",
        yaxis_title="Belief",
        zaxis_title="Spectral",
    ),
    legend=dict(x=0.02, y=0.98),
    height=700,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-FT_spectral_belief_layer.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-FT_spectral_belief_layer.html


In [79]:
# ============================================================================
# CELL 14: PLOT 3 - 3D Trajectory (Thermo √ó Belief √ó Layer)
# ============================================================================

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=base_layers,
    y=base_belief,
    z=base_thermo,
    mode='lines+markers',
    name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=5),
    marker=dict(size=5),
))

if ft_results is not None:
    fig.add_trace(go.Scatter3d(
        x=ft_layers,
        y=ft_belief,
        z=ft_thermo,
        mode='lines+markers',
        name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=5),
        marker=dict(size=5),
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-FT: Thermo √ó Belief √ó Layer", font=dict(size=18)),
    scene=dict(
        xaxis_title="Layer",
        yaxis_title="Belief",
        zaxis_title="Thermo",
    ),
    legend=dict(x=0.02, y=0.98),
    height=700,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-FT_thermo_belief_layer_3d.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-FT_thermo_belief_layer_3d.html


In [80]:
# ============================================================================
# CELL 15: PLOT 4 - 3D Trajectory (Spectral √ó Thermo √ó Layer)
# ============================================================================

fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=base_layers,
    y=base_thermo,
    z=base_spectral,
    mode='lines+markers',
    name='Base Model',
    line=dict(color=MODEL_COLORS['base'], width=5),
    marker=dict(size=5),
))

if ft_results is not None:
    fig.add_trace(go.Scatter3d(
        x=ft_layers,
        y=ft_thermo,
        z=ft_spectral,
        mode='lines+markers',
        name=ft_name,
        line=dict(color=MODEL_COLORS['finetuned'], width=5),
        marker=dict(size=5),
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-FT: Spectral (Œ∫) √ó Thermo (Œî) √ó Layer", font=dict(size=18)),
    scene=dict(
        xaxis_title="Layer",
        yaxis_title="Thermodynamic Œî",
        zaxis_title="Spectral Œ∫",
    ),
    legend=dict(x=0.02, y=0.98),
    height=700,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-FT_spectral_thermo_layer_3d.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-FT_spectral_thermo_layer_3d.html


In [81]:
# ============================================================================
# CELL 16: PLOT 5 - Word Spectral by Layer (Different colors per word)
# ============================================================================

fig = go.Figure()

for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    
    layers = sorted(base_word_results[word].keys())
    spectral_vals = [base_word_results[word][l]['spectral'] for l in layers]
    
    fig.add_trace(go.Scatter(
        x=layers,
        y=spectral_vals,
        mode='lines+markers',
        name=word.capitalize(),
        line=dict(color=WORD_COLORS.get(word, '#666666'), width=2),
        marker=dict(size=5)
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-Africa_FT_Word Spectral by Layer - Base Model", font=dict(size=18)),
    xaxis_title="Layer",
    yaxis_title="Spectral Curvature Œ∫",
    legend=dict(x=1.02, y=0.98),
    height=600,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-Africa_FT_word_spectral_by_layer.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-Africa_FT_word_spectral_by_layer.html


In [82]:
# ============================================================================
# CELL 17: PLOT 6 - Word Belief by Layer
# ============================================================================

fig = go.Figure()

for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    
    layers = sorted(base_word_results[word].keys())
    belief_vals = [base_word_results[word][l]['belief'] for l in layers]
    
    fig.add_trace(go.Scatter(
        x=layers,
        y=belief_vals,
        mode='lines+markers',
        name=word.capitalize(),
        line=dict(color=WORD_COLORS.get(word, '#666666'), width=2),
        marker=dict(size=5)
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-Africa_FT_Word Belief Vector by Layer - Base Model", font=dict(size=18)),
    xaxis_title="Layer",
    yaxis_title="Belief",
    legend=dict(x=1.02, y=0.98),
    height=600,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-Africa_FT_word_belief_by_layer.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-Africa_FT_word_belief_by_layer.html


In [83]:
# ============================================================================
# CELL 18: PLOT 7 - Word Thermodynamic Length by Layer
# ============================================================================

fig = go.Figure()

for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    
    layers = sorted(base_word_results[word].keys())
    thermo_vals = [base_word_results[word][l]['thermo'] for l in layers]
    
    fig.add_trace(go.Scatter(
        x=layers,
        y=thermo_vals,
        mode='lines+markers',
        name=word.capitalize(),
        line=dict(color=WORD_COLORS.get(word, '#666666'), width=2),
        marker=dict(size=5)
    ))

fig.update_layout(
    title=dict(text="Tulu-8B-FT_Word Thermo by Layer - Base Model", font=dict(size=18)),
    xaxis_title="Layer",
    yaxis_title="Thermodynamic Œî",
    legend=dict(x=1.02, y=0.98),
    height=600,
    template='plotly_white'
)

save_and_show(fig, "Tulu-8B-FT_Word Thermo_word_thermo_by_layer.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/Tulu-8B-FT_Word Thermo_word_thermo_by_layer.html


In [84]:
# ============================================================================
# CELL 19: PLOT 8 - Word 3D Trajectories
# ============================================================================

fig = go.Figure()

for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    
    layers = sorted(base_word_results[word].keys())
    spectral_vals = [base_word_results[word][l]['spectral'] for l in layers]
    belief_vals = [base_word_results[word][l]['belief'] for l in layers]
    thermo_vals = [base_word_results[word][l]['thermo'] for l in layers]
    
    fig.add_trace(go.Scatter3d(
        x=spectral_vals,
        y=belief_vals,
        z=thermo_vals,
        mode='lines+markers',
        name=word.capitalize(),
        line=dict(color=WORD_COLORS.get(word, '#666666'), width=3),
        marker=dict(size=3)
    ))

fig.update_layout(
    title=dict(text="Word nDNA 3D Trajectories: Spectral √ó Belief √ó Thermo", font=dict(size=18)),
    scene=dict(
        xaxis_title="Spectral Œ∫",
        yaxis_title="Belief Œ≤",
        zaxis_title="Thermodynamic Œî",
    ),
    legend=dict(x=1.02, y=0.98),
    height=700,
    template='plotly_white'
)

save_and_show(fig, "08_word_3d_trajectories.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/08_word_3d_trajectories.html


In [85]:
# ============================================================================
# CELL 20: PLOT 9 - Word √ó Layer Heatmap
# ============================================================================

# Create heatmap data
heatmap_words = []
heatmap_data = []

for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    
    heatmap_words.append(word.capitalize())
    layers = sorted(base_word_results[word].keys())
    
    # Use spectral as the heatmap value
    row = [base_word_results[word][l]['spectral'] for l in layers]
    heatmap_data.append(row)

heatmap_array = np.array(heatmap_data)
layer_labels = [f"L{l}" for l in sorted(base_word_results[ABSTRACT_WORDS[0]].keys())]

fig = go.Figure(data=go.Heatmap(
    z=heatmap_array,
    x=layer_labels,
    y=heatmap_words,
    colorscale='Viridis',
    colorbar=dict(title="Spectral Œ∫")
))

fig.update_layout(
    title=dict(text="Spectral Curvature Heatmap: Words √ó Layers", font=dict(size=18)),
    xaxis_title="Layer",
    yaxis_title="Word",
    height=500,
    template='plotly_white'
)

save_and_show(fig, "09_word_layer_heatmap_spectral.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/09_word_layer_heatmap_spectral.html


In [86]:
# ============================================================================
# CELL 21: PLOT 10 - Belief Heatmap
# ============================================================================

heatmap_data_belief = []
for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    layers = sorted(base_word_results[word].keys())
    row = [base_word_results[word][l]['belief'] for l in layers]
    heatmap_data_belief.append(row)

fig = go.Figure(data=go.Heatmap(
    z=np.array(heatmap_data_belief),
    x=layer_labels,
    y=heatmap_words,
    colorscale='Plasma',
    colorbar=dict(title="Belief Œ≤")
))

fig.update_layout(
    title=dict(text="Belief Vector Heatmap: Words √ó Layers", font=dict(size=18)),
    xaxis_title="Layer",
    yaxis_title="Word",
    height=500,
    template='plotly_white'
)

save_and_show(fig, "10_word_layer_heatmap_belief.html")

üíæ Saved: ./01Jan2026/african_model_2nd_try_results/10_word_layer_heatmap_belief.html


In [87]:
# ============================================================================
# CELL 22: SAVE ALL RESULTS
# ============================================================================

print("\n" + "=" * 70)
print("üíæ SAVING RESULTS")
print("=" * 70)

# Layer-wise results
df_layers = pd.DataFrame({
    'Layer': ANALYSIS_LAYERS,
    'Base_Spectral': base_spectral,
    'Base_Thermo': base_thermo,
    'Base_Belief': base_belief,
})

if ft_results is not None:
    df_layers['FT_Spectral'] = ft_spectral
    df_layers['FT_Thermo'] = ft_thermo
    df_layers['FT_Belief'] = ft_belief
    df_layers['Delta_Spectral'] = ft_spectral - base_spectral
    df_layers['Delta_Thermo'] = ft_thermo - base_thermo
    df_layers['Delta_Belief'] = ft_belief - base_belief

csv_path = os.path.join(config.output_dir, "ndna_layerwise_results.csv")
df_layers.to_csv(csv_path, index=False)
print(f"‚úÖ Saved: {csv_path}")

# Word-level results
word_data = []
for word in ABSTRACT_WORDS:
    if word not in base_word_results:
        continue
    for layer in sorted(base_word_results[word].keys()):
        row = {
            'Word': word,
            'Layer': layer,
            'Spectral': base_word_results[word][layer]['spectral'],
            'Thermo': base_word_results[word][layer]['thermo'],
            'Belief': base_word_results[word][layer]['belief'],
        }
        word_data.append(row)

df_words = pd.DataFrame(word_data)
csv_path = os.path.join(config.output_dir, "ndna_word_results.csv")
df_words.to_csv(csv_path, index=False)
print(f"‚úÖ Saved: {csv_path}")

# Display tables
print("\nüìã Layer Results:")
display(df_layers)

print("\nüìã Word Results (sample):")
display(df_words.head(20))


üíæ SAVING RESULTS
‚úÖ Saved: ./01Jan2026/african_model_2nd_try_results/ndna_layerwise_results.csv
‚úÖ Saved: ./01Jan2026/african_model_2nd_try_results/ndna_word_results.csv

üìã Layer Results:


Unnamed: 0,Layer,Base_Spectral,Base_Thermo,Base_Belief,FT_Spectral,FT_Thermo,FT_Belief,Delta_Spectral,Delta_Thermo,Delta_Belief
0,15,0.539445,4.1251,110.306953,0.51346,4.108637,114.733743,-0.025986,-0.016462,4.42679
1,16,0.586828,4.249203,105.341962,0.552844,4.209611,109.913778,-0.033984,-0.039592,4.571815
2,17,0.632152,4.378238,98.71366,0.594215,4.331438,103.373927,-0.037937,-0.0468,4.660267
3,18,0.68592,4.554458,89.909302,0.647139,4.500686,94.437314,-0.038781,-0.053771,4.528013
4,19,0.734726,4.717101,81.425029,0.695955,4.668865,86.348427,-0.038772,-0.048236,4.923397
5,20,0.782929,4.916667,70.333073,0.743784,4.853782,76.567137,-0.039145,-0.062885,6.234064
6,21,0.835186,5.12689,62.555112,0.792393,5.055298,68.754468,-0.042793,-0.071592,6.199356
7,22,0.894594,5.407646,50.093299,0.848062,5.313663,56.81756,-0.046532,-0.093983,6.724261
8,23,0.940847,5.643154,41.317867,0.893507,5.560723,48.141012,-0.047341,-0.08243,6.823145
9,24,0.987404,5.916596,32.588101,0.939315,5.822256,39.542771,-0.048088,-0.09434,6.95467



üìã Word Results (sample):


Unnamed: 0,Word,Layer,Spectral,Thermo,Belief
0,destroy,15,0.519429,4.081447,108.5377
1,destroy,16,0.567769,4.204202,103.187488
2,destroy,17,0.612394,4.331538,96.711724
3,destroy,18,0.665569,4.509195,87.283212
4,destroy,19,0.713506,4.677233,78.49953
5,destroy,20,0.761756,4.874573,67.886699
6,destroy,21,0.81325,5.084157,59.222223
7,destroy,22,0.873608,5.386555,46.92052
8,destroy,23,0.920626,5.651058,38.10646
9,destroy,24,0.965351,5.95895,29.950728
