In [None]:
# Path to the config file
config_path = "/Users/ryanarman/code/oumi/configs/examples/analyze/analyze_deita.yaml"

In [2]:
import os
import sys


# IMPORTANT: Set these BEFORE importing torch or any ML libraries
# Disable all GPU/MPS backends to prevent crashes with IFD analyzer
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"  # Disable MPS memory allocation
os.environ["DISABLE_MPS_COMPAT"] = "1"  # Additional MPS disable flag
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"  # Disable HuggingFace telemetry
os.environ["TRANSFORMERS_OFFLINE"] = "0"  # Allow model downloads

# Force CPU usage in PyTorch to avoid MPS crashes
import torch

# Forcefully disable MPS before anything else
torch.set_default_device("cpu")
if hasattr(torch.backends, "mps"):
    # Monkey-patch to prevent MPS usage
    original_is_available = torch.backends.mps.is_available
    torch.backends.mps.is_available = lambda: False
    print("MPS has been disabled - forcing CPU-only mode")
else:
    print("Using CPU for all computations")

print(f"PyTorch device: {torch.get_default_device()}")
print(f"PyTorch version: {torch.__version__}")

MPS has been disabled - forcing CPU-only mode
PyTorch device: cpu
PyTorch version: 2.6.0


In [3]:
import os
from oumi.core.configs import AnalyzeConfig
from oumi.core.analyze.dataset_analyzer import DatasetAnalyzer

# Path to your dataset file
dataset_path = "/Users/ryanarman/code/scratch/ryan_hillclimbing_experiments/banking77/notebooks/data/banking77_train.jsonl"

# Load the config from YAML
config = AnalyzeConfig.from_yaml(
    config_path=config_path,
)

config.sample_count = 2

# Override the dataset settings to use your local file
config.dataset_path = dataset_path
config.dataset_name = None  # Clear dataset_name so it uses dataset_path instead

# Optionally update output path
config.output_path = "./analysis_output/banking77"

# IMPORTANT: Disable analyzers that require large model downloads or have issues
# IFD requires downloading Qwen model and may cause MPS crashes
# fasttext requires additional dependencies
# repr_diversity and question_diversity download embedding models
print(f"Running {len(config.analyzers)} analyzers: {[a.id for a in config.analyzers]}")

# Validate the configuration
config.finalize_and_validate()

# Create the analyzer
analyzer = DatasetAnalyzer(config)

Running 7 analyzers: ['evol_complexity', 'evol_quality', 'repr_diversity', 'ifd', 'length', 'diversity', 'quality']
[2025-12-30 11:31:52,074][oumi][rank0][pid:41580][MainThread][INFO]][base_map_dataset.py:91] Creating map dataset (type: TextSftJsonLinesDataset)... dataset_name: 'custom'
[2025-12-30 11:31:52,074][oumi.utils.analysis_utils][rank0][pid:41580][MainThread][INFO]][analysis_utils.py:225] Loaded text dataset from: /Users/ryanarman/code/scratch/ryan_hillclimbing_experiments/banking77/notebooks/data/banking77_train.jsonl
[2025-12-30 11:31:52,075][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:154] Loaded dataset from config: None
[2025-12-30 11:31:52,075][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:304] Initialized sample analyzer: evol_complexity
[2025-12-30 11:31:52,075][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:304] Initialized sample analyzer: evol_quality
[2025-12-30 11:31:52,089][oumi][rank0][pid:41580][MainThread][IN

In [4]:
# Run the analysis
analyzer.analyze_dataset()

# The results are stored in analyzer object
if analyzer._analysis_results:
    print(
        f"Total conversations analyzed: {analyzer._analysis_results.conversations_analyzed}"
    )

[2025-12-30 11:31:52,215][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:347] Starting analysis of dataset: None
[2025-12-30 11:31:52,216][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:348] Using 7 sample analyzers: ['evol_complexity', 'evol_quality', 'repr_diversity', 'ifd', 'length', 'diversity', 'quality']
[2025-12-30 11:31:52,216][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:371] Analyzing 2 of 8002 conversations
[2025-12-30 11:31:52,217][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:421] Converting conversation dataset with 8002 items
[2025-12-30 11:31:52,217][oumi][rank0][pid:41580][MainThread][INFO]][dataset_analyzer.py:428] Limiting analysis to first 2 items (dataset has 8002 total)


Converting Unknown Dataset to DataFrames: 100%|██████████| 2/2 [00:00<00:00, 885.25item/s]


[2025-12-30 11:31:52,241][oumi][rank0][pid:41580][MainThread][INFO]][evol_complexity_analyzer.py:317] Computing complexity scores for 2 samples in column 'conversation_text_content'...


Analyzing complexity (conversation_text_content):   0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-30 11:31:52,261][oumi][rank0][pid:41580][MainThread][INFO]][evol_base.py:221] Initialized Evol analyzer with model: gpt-5-mini, engine: OPENAI


Analyzing complexity (conversation_text_content):  50%|█████     | 1/2 [00:26<00:26, 26.49s/it]



Analyzing complexity (conversation_text_content): 100%|██████████| 2/2 [00:26<00:00, 13.25s/it]


[2025-12-30 11:32:18,744][oumi][rank0][pid:41580][MainThread][INFO]][evol_complexity_analyzer.py:414] Column 'conversation_text_content': Mean complexity score = 0.000
[2025-12-30 11:32:18,747][oumi][rank0][pid:41580][MainThread][INFO]][evol_quality_analyzer.py:398] Computing quality scores for 2 samples in column 'conversation_text_content'...


Analyzing quality (conversation_text_content):   0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-30 11:32:18,751][oumi][rank0][pid:41580][MainThread][INFO]][evol_base.py:221] Initialized Evol analyzer with model: claude-4-5-haiku, engine: ANTHROPIC


Analyzing quality (conversation_text_content):  50%|█████     | 1/2 [00:00<00:00,  1.56it/s]



Analyzing quality (conversation_text_content): 100%|██████████| 2/2 [00:01<00:00,  1.27it/s]


[2025-12-30 11:32:20,329][oumi][rank0][pid:41580][MainThread][INFO]][evol_quality_analyzer.py:503] Column 'conversation_text_content': Mean quality score = 0.500
[2025-12-30 11:32:20,331][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:363] Computing diversity scores for 2 samples in column 'conversation_text_content'...
[2025-12-30 11:32:20,331][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:165] Loading embedding model: sentence-transformers/all-MiniLM-L6-v2


Computing embeddings: 100%|██████████| 2/2 [00:00<00:00, 49.88it/s]


[2025-12-30 11:32:21,459][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:230] Computing nearest neighbor distances for 2 samples (k=1)...
[2025-12-30 11:32:21,461][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:556] Column 'conversation_text_content': 2/2 samples (100.0%) are redundant
[2025-12-30 11:32:21,462][oumi][rank0][pid:41580][MainThread][INFO]][ifd_analyzer.py:154] Loading model for IFD analysis: Qwen/Qwen3-0.6B
[2025-12-30 11:32:23,337][oumi][rank0][pid:41580][MainThread][INFO]][ifd_analyzer.py:193] Loaded Qwen/Qwen3-0.6B on cpu (dtype: torch.float32)
[2025-12-30 11:32:23,349][oumi][rank0][pid:41580][MainThread][INFO]][evol_complexity_analyzer.py:317] Computing complexity scores for 2 samples in column 'text_content'...


Analyzing complexity (text_content):   0%|          | 0/2 [00:00<?, ?it/s]



Analyzing complexity (text_content):  50%|█████     | 1/2 [00:19<00:19, 19.28s/it]



Analyzing complexity (text_content): 100%|██████████| 2/2 [00:54<00:00, 27.29s/it]


[2025-12-30 11:33:17,932][oumi][rank0][pid:41580][MainThread][INFO]][evol_complexity_analyzer.py:414] Column 'text_content': Mean complexity score = 0.000
[2025-12-30 11:33:17,933][oumi][rank0][pid:41580][MainThread][INFO]][evol_quality_analyzer.py:398] Computing quality scores for 2 samples in column 'text_content'...


Analyzing quality (text_content):   0%|          | 0/2 [00:00<?, ?it/s]



Analyzing quality (text_content):  50%|█████     | 1/2 [00:00<00:00,  1.52it/s]



Analyzing quality (text_content): 100%|██████████| 2/2 [00:01<00:00,  1.72it/s]


[2025-12-30 11:33:19,101][oumi][rank0][pid:41580][MainThread][INFO]][evol_quality_analyzer.py:503] Column 'text_content': Mean quality score = 0.500
[2025-12-30 11:33:19,102][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:363] Computing diversity scores for 6 samples in column 'text_content'...


Computing embeddings: 100%|██████████| 6/6 [00:00<00:00, 85.99it/s]


[2025-12-30 11:33:19,176][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:230] Computing nearest neighbor distances for 6 samples (k=5)...
[2025-12-30 11:33:19,177][oumi][rank0][pid:41580][MainThread][INFO]][repr_diversity_analyzer.py:556] Column 'text_content': 0/6 samples (0.0%) are redundant
[2025-12-30 11:33:19,178][oumi][rank0][pid:41580][MainThread][INFO]][ifd_analyzer.py:643] Detected conversation format. Computing IFD for assistant messages using preceding user messages as instructions.
[2025-12-30 11:33:19,967][oumi][rank0][pid:41580][MainThread][INFO]][ifd_analyzer.py:521] IFD analysis complete. Processed 2 assistant messages out of 2 total.
Total conversations analyzed: 2


In [9]:
schema = analyzer.get_schema()
print(f"len(schema): {len(schema)}")
merged_columns = analyzer.analysis_df.columns
print(f"len(merged_columns): {len(merged_columns)}")

len(schema): 48
len(merged_columns): 48


In [10]:
analyzer_names = [a.id for a in config.analyzers]
analyzer_names

['evol_complexity',
 'evol_quality',
 'repr_diversity',
 'ifd',
 'length',
 'diversity',
 'quality']

# Conv level

In [19]:
analyzer_names

['evol_complexity',
 'evol_quality',
 'repr_diversity',
 'ifd',
 'length',
 'diversity',
 'quality']

In [27]:
from oumi.core.analyze.column_utils import (
    filter_analyzer_columns,
    get_analyzer_columns_by_analyzer,
    parse_analyzer_column_name,
)

conv_columns = analyzer.conversation_df.columns
row = analyzer.conversation_df.iloc[0]

# Choose the analzyer to analyze
analyzer_name = analyzer_names[3]


filtered_cols = filter_analyzer_columns(conv_columns, analyzer_id=analyzer_name)
if filtered_cols:
    print(f"Analyzer: {analyzer_name}")
    info = parse_analyzer_column_name(filtered_cols[0])
    # print("\nInput:")
    # print(f"source_column: {info.source_column}")
    # print(f"{row[info.source_column]}\n")

    for col in filtered_cols:
        info = parse_analyzer_column_name(col)
        print(f"metric: {info.metric_name}")
        # print(f"type: {schema[col]['type']}")
        # print(f"content_type: {schema[col]['content_type']}")
        print(f"description: {schema[col]['description']}")
        print(f"value: {row[col]}")
        print("\n")
else:
    print(f"No columns found for analyzer: {analyzer_name}")


No columns found for analyzer: ifd


# Message level

In [None]:
from oumi.core.analyze.column_utils import (
    filter_analyzer_columns,
    get_analyzer_columns_by_analyzer,
    parse_analyzer_column_name,
)

msg_columns = analyzer.message_df.columns
row = analyzer.message_df.iloc[2]

# Choose the analzyer to analyze
analyzer_name = analyzer_names[0]


filtered_cols = filter_analyzer_columns(msg_columns, analyzer_id=analyzer_name)
if filtered_cols:
    print(f"Analyzer: {analyzer_name}")
    info = parse_analyzer_column_name(filtered_cols[0])

    print("\nInput:")
    print(f"[{row['role']}]: {row[info.source_column]}\n")
    # print(f"source_column: {info.source_column}")
    print(f"{row[info.source_column]}\n")

    for col in filtered_cols:
        info = parse_analyzer_column_name(col)
        print(f"metric: {info.metric_name}")
        # print(f"type: {schema[col]['type']}")
        # print(f"content_type: {schema[col]['content_type']}")
        print(f"description: {schema[col]['description']}")
        print(f"value: {row[col]}")
        print("\n")
else:
    print(f"No columns found for analyzer: {analyzer_name}")


IndexError: list index out of range