In [1]:
import os

# IMPORTANT: Set these BEFORE importing torch or any ML libraries
# Disable all GPU/MPS backends to prevent crashes with IFD analyzer
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable CUDA
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"  # Disable MPS memory allocation
os.environ["DISABLE_MPS_COMPAT"] = "1"  # Additional MPS disable flag
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"  # Disable HuggingFace telemetry
os.environ["TRANSFORMERS_OFFLINE"] = "0"  # Allow model downloads

# Force CPU usage in PyTorch to avoid MPS crashes
import torch

# Forcefully disable MPS before anything else
torch.set_default_device("cpu")
if hasattr(torch.backends, "mps"):
    # Monkey-patch to prevent MPS usage
    original_is_available = torch.backends.mps.is_available
    torch.backends.mps.is_available = lambda: False
    print("MPS has been disabled - forcing CPU-only mode")
else:
    print("Using CPU for all computations")

print(f"PyTorch device: {torch.get_default_device()}")
print(f"PyTorch version: {torch.__version__}")

MPS has been disabled - forcing CPU-only mode
PyTorch device: cpu
PyTorch version: 2.6.0


In [13]:
# Download and convert OpenAssistant dataset to JSONL format
import json
from pathlib import Path

from datasets import load_dataset

# Set paths
DATA_DIR = Path("/Users/ryanarman/data/openassistant")

DATA_DIR.mkdir(parents=True, exist_ok=True)
DATASET_PATH = DATA_DIR / "openassistant_oasst1.jsonl"

# Download OpenAssistant OASST1 if not already downloaded
# Set FORCE_REGENERATE=True to regenerate the file even if it exists
FORCE_REGENERATE = False

if not DATASET_PATH.exists() or FORCE_REGENERATE:
    print("üì• Downloading OpenAssistant OASST1 dataset...")
    dataset = load_dataset("OpenAssistant/oasst1", split="train")

    # Get dataset length safely
    from datasets import Dataset

    if isinstance(dataset, Dataset):
        dataset_len = len(dataset)
    else:
        dataset_len = "unknown"

    print(f"‚úÖ Downloaded dataset (size: {dataset_len})")
    print("üìù Converting to JSONL format...")

    def build_conversation_from_tree(messages_in_tree):
        """Build a single conversation from a message tree.

        OASST1 has messages with parent_id relationships. We need to:
        1. Find root message (no parent_id)
        2. Build conversation thread by following parent_id chains
        """
        # Create a map of message_id -> message
        msg_map = {}
        for msg in messages_in_tree:
            msg_id = msg.get("message_id")
            if msg_id:
                msg_map[msg_id] = msg

        # Find root message (no parent_id or parent_id not in this tree)
        root = None
        for msg in messages_in_tree:
            parent_id = msg.get("parent_id")
            if not parent_id or parent_id not in msg_map:
                root = msg
                break

        if not root:
            return None

        messages = []

        def add_message(msg):
            """Recursively add message and its children to the conversation."""
            role = "user" if msg.get("role") == "prompter" else "assistant"
            content = msg.get("text", "")
            if content:  # Only add non-empty messages
                messages.append({"role": role, "content": content})

            # Find children (messages with this message as parent)
            # Sort by created_date if available to maintain order
            children = []
            for child_msg in messages_in_tree:
                if child_msg.get("parent_id") == msg.get("message_id"):
                    children.append(child_msg)

            # Sort children by created_date if available
            if children and "created_date" in children[0]:
                children.sort(key=lambda x: x.get("created_date", ""))

            for child in children:
                add_message(child)

        add_message(root)

        return messages if messages else None

    # Convert and save
    # OASST1 structure: each example is a single message with message_tree_id
    # We need to group by message_tree_id first, then build conversations
    print("üìä Inspecting dataset structure...")
    first_example = next(iter(dataset))
    print(f"   Example keys: {list(first_example.keys())[:10]}")  # Show first 10 keys

    # Group messages by message_tree_id
    print("üì¶ Grouping messages by conversation tree...")
    trees = {}
    for example in dataset:
        tree_id = example.get("message_tree_id")
        if tree_id:
            if tree_id not in trees:
                trees[tree_id] = []
            trees[tree_id].append(example)

    print(f"   Found {len(trees)} conversation trees")

    # Build conversations from trees
    print("üî® Building conversations from message trees...")
    count = 0
    with open(DATASET_PATH, "w") as f:
        for tree_id, messages_in_tree in trees.items():
            conversation_messages = build_conversation_from_tree(messages_in_tree)
            if conversation_messages and len(conversation_messages) > 0:
                f.write(json.dumps({"messages": conversation_messages}) + "\n")
                count += 1

    print(f"‚úÖ Saved {count} conversations to {DATASET_PATH}")
else:
    if FORCE_REGENERATE:
        print(f"‚úÖ Regenerated dataset at {DATASET_PATH}")
    else:
        print(f"‚úÖ Dataset already exists at {DATASET_PATH}")
        print(
            "   Set FORCE_REGENERATE=True to regenerate with proper conversation grouping"
        )

print(f"üìÅ Dataset path: {DATASET_PATH}")

üì• Downloading OpenAssistant OASST1 dataset...
‚úÖ Downloaded dataset (size: 84437)
üìù Converting to JSONL format...
üìä Inspecting dataset structure...
   Example keys: ['message_id', 'parent_id', 'user_id', 'created_date', 'text', 'role', 'lang', 'review_count', 'review_result', 'deleted']
üì¶ Grouping messages by conversation tree...
   Found 9846 conversation trees
üî® Building conversations from message trees...
‚úÖ Saved 9846 conversations to /Users/ryanarman/data/openassistant/openassistant_oasst1.jsonl
üìÅ Dataset path: /Users/ryanarman/data/openassistant/openassistant_oasst1.jsonl


In [3]:
# OUTPUT_PATH = "/Users/ryanarman/code/oumi/analysis_output/openassistant"
OUTPUT_PATH = "/Users/ryanarman/code/oumi/analysis_output/openassistant"

import os

from oumi.core.analyze.dataset_analyzer import DatasetAnalyzer
from oumi.core.configs import AnalyzeConfig

# Load config from YAML file
config = AnalyzeConfig.from_yaml(
    "/Users/ryanarman/code/oumi/configs/examples/analyze/analyze_openassistant.yaml"
)

# Override settings for this run
dataset_path = str(DATASET_PATH)
config.dataset_path = dataset_path
config.dataset_name = None  # Clear dataset_name so it uses dataset_path instead
config.sample_count = 1000
config.chat_template = "default"  # Simple template without special variables

# Set absolute output path (makes it easier to find the results!)
config.output_path = OUTPUT_PATH

print(f"‚úÖ Config loaded with {len(config.analyzers)} analyzers:")
for analyzer in config.analyzers:
    instance_id = analyzer.instance_id or analyzer.id
    print(f"  - {instance_id} (type: {analyzer.id})")

print(f"üìÅ Output will be saved to: {config.output_path}")
print(f"üìÇ Dataset path: {config.dataset_path}")

# Validate the configuration
config.finalize_and_validate()
print("‚úÖ Config validated successfully!")

analyzer = DatasetAnalyzer(config)

‚úÖ Config loaded with 20 analyzers:
  - length (type: length)
  - token_stats (type: token_stats)
  - diversity (type: diversity)
  - format (type: format)
  - quality (type: quality)
  - content_pattern (type: content_pattern)
  - fasttext (type: fasttext)
  - embedding (type: embedding)
  - question_diversity (type: question_diversity)
  - repr_diversity (type: repr_diversity)
  - conversation_structure (type: conversation_structure)
  - response_completeness (type: response_completeness)
  - training_quality (type: training_quality)
  - task_category (type: task_category)
  - safety (type: safety)
  - difficulty (type: difficulty)
  - input_quality (type: input_quality)
  - instruct_reward (type: instruct_reward)
  - cost (type: cost)
  - helpfulness (type: llm_judge)
üìÅ Output will be saved to: /Users/ryanarman/code/oumi/analysis_output/openassistant
üìÇ Dataset path: /Users/ryanarman/code/oumi/data/openassistant/openassistant_oasst1.jsonl
‚úÖ Config validated successfully!
[20

In [4]:
# Run the analysis
analyzer.analyze_dataset()

# The results are stored in analyzer object
if analyzer._analysis_results:
    print(
        f"Total conversations analyzed: {analyzer._analysis_results.conversations_analyzed}"
    )

[2026-01-12 16:04:40,778][oumi][rank0][pid:1937][MainThread][INFO]][dataset_analyzer.py:362] Starting analysis of dataset: None
[2026-01-12 16:04:40,779][oumi][rank0][pid:1937][MainThread][INFO]][dataset_analyzer.py:363] Using 20 sample analyzers: ['length', 'token_stats', 'diversity', 'format', 'quality', 'content_pattern', 'fasttext', 'embedding', 'question_diversity', 'repr_diversity', 'conversation_structure', 'response_completeness', 'training_quality', 'task_category', 'safety', 'difficulty', 'input_quality', 'instruct_reward', 'cost', 'helpfulness']
[2026-01-12 16:04:40,779][oumi][rank0][pid:1937][MainThread][INFO]][dataset_analyzer.py:386] Analyzing 1000 of 9846 conversations
[2026-01-12 16:04:40,780][oumi][rank0][pid:1937][MainThread][INFO]][dataset_analyzer.py:438] Converting conversation dataset with 9846 items
[2026-01-12 16:04:40,780][oumi][rank0][pid:1937][MainThread][INFO]][dataset_analyzer.py:445] Limiting analysis to first 1000 items (dataset has 9846 total)


Converting Unknown Dataset to DataFrames: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:01<00:00, 975.34item/s]


[2026-01-12 16:04:43,118][oumi][rank0][pid:1937][MainThread][INFO]][fasttext_analyzer.py:220] Initialized fast-langdetect for language detection
[2026-01-12 16:04:43,119][oumi][rank0][pid:1937][MainThread][INFO]][fasttext_analyzer.py:458] Analyzing language for column: conversation_text_content
[2026-01-12 16:04:43,438][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:518] Computing embeddings for 1000 samples...
[2026-01-12 16:04:43,438][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:196] Loading embedding model: all-MiniLM-L6-v2


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:06<00:00, 152.36it/s]


[2026-01-12 16:04:50,937][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:537] Detecting semantic duplicates...
[2026-01-12 16:04:50,944][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:624] Detecting fuzzy duplicates using MinHash LSH...
[2026-01-12 16:04:50,948][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:350] Creating MinHash signatures for 1000 samples...


Creating MinHash signatures: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:04<00:00, 249.53it/s]


[2026-01-12 16:04:55,016][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:369] Finding fuzzy duplicates using LSH...


Finding duplicates: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:00<00:00, 152116.35it/s]


[2026-01-12 16:04:55,027][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:464] Computing embeddings for 1000 user questions...
[2026-01-12 16:04:55,027][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:174] Loading embedding model: all-MiniLM-L6-v2


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:07<00:00, 125.13it/s]


[2026-01-12 16:05:03,940][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:469] Clustering 1000 questions using dbscan...
[2026-01-12 16:05:04,521][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:482] Found 0 clusters, 1000 unique/diverse questions (not similar to others)
[2026-01-12 16:05:04,529][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:363] Computing diversity scores for 1000 samples in column 'conversation_text_content'...
[2026-01-12 16:05:04,530][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:165] Loading embedding model: sentence-transformers/all-MiniLM-L6-v2


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:07<00:00, 139.85it/s]


[2026-01-12 16:05:12,604][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:230] Computing nearest neighbor distances for 1000 samples (k=5)...
[2026-01-12 16:05:12,627][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:556] Column 'conversation_text_content': 182/1000 samples (18.2%) are redundant
[2026-01-12 16:05:19,097][oumi][rank0][pid:1937][MainThread][INFO]][llm_judge_analyzer.py:444] Initialized LLM Judge with model: gpt-4o-mini, engine: openai


 28%|‚ñà‚ñà‚ñä       | 285/1000 [00:08<00:13, 53.97it/s]



100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1000/1000 [00:19<00:00, 50.18it/s]


[2026-01-12 16:05:40,772][oumi][rank0][pid:1937][MainThread][INFO]][fasttext_analyzer.py:458] Analyzing language for column: text_content
[2026-01-12 16:05:41,357][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:518] Computing embeddings for 7211 samples...


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7211/7211 [00:39<00:00, 182.24it/s]


[2026-01-12 16:06:20,933][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:537] Detecting semantic duplicates...
[2026-01-12 16:06:21,127][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:624] Detecting fuzzy duplicates using MinHash LSH...
[2026-01-12 16:06:21,130][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:350] Creating MinHash signatures for 7211 samples...


Creating MinHash signatures: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7211/7211 [00:09<00:00, 788.87it/s]


[2026-01-12 16:06:30,333][oumi][rank0][pid:1937][MainThread][INFO]][embedding_analyzer.py:369] Finding fuzzy duplicates using LSH...


Finding duplicates: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7211/7211 [00:00<00:00, 157596.47it/s]


[2026-01-12 16:06:30,396][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:464] Computing embeddings for 2485 user questions...


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2485/2485 [00:07<00:00, 314.91it/s]


[2026-01-12 16:06:38,294][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:469] Clustering 2485 questions using dbscan...
[2026-01-12 16:06:38,312][oumi][rank0][pid:1937][MainThread][INFO]][question_diversity_analyzer.py:482] Found 7 clusters, 2467 unique/diverse questions (not similar to others)
[2026-01-12 16:06:38,317][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:363] Computing diversity scores for 7211 samples in column 'text_content'...


Computing embeddings: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7211/7211 [00:37<00:00, 194.89it/s]


[2026-01-12 16:07:15,325][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:230] Computing nearest neighbor distances for 7211 samples (k=5)...
[2026-01-12 16:07:16,161][oumi][rank0][pid:1937][MainThread][INFO]][repr_diversity_analyzer.py:556] Column 'text_content': 2226/7211 samples (30.9%) are redundant
[2026-01-12 16:07:19,024][oumi][rank0][pid:1937][MainThread][INFO]][llm_judge_analyzer.py:780] Skipping message-level analysis (analyze_message_level=False). Set analyze_message_level=True to enable.
Total conversations analyzed: 1000


In [5]:
import traceback
from pathlib import Path

from oumi.utils.analysis_utils import save_analyzer_artifacts

# Save all analyzer artifacts (dataframes, schemas, summary)
save_analyzer_artifacts(analyzer, Path(config.output_path), output_format="parquet")

[2026-01-12 16:07:19,604][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1426] Saved message analysis to: /Users/ryanarman/code/oumi/analysis_output/openassistant/messages_df.parquet
[2026-01-12 16:07:19,621][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1432] Saved conversation analysis to: /Users/ryanarman/code/oumi/analysis_output/openassistant/conversations_df.parquet
[2026-01-12 16:07:19,679][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1438] Saved merged analysis to: /Users/ryanarman/code/oumi/analysis_output/openassistant/merged_df.parquet
[2026-01-12 16:07:19,681][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1450] Saved message schema to: /Users/ryanarman/code/oumi/analysis_output/openassistant/message_schema.json
[2026-01-12 16:07:19,682][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1457] Saved conversa

# Load artifacts


In [6]:
# OUTPUT_PATH = "/Users/ryanarman/code/oumi/analysis_output/openassistant"
OUTPUT_PATH = "/Users/ryanarman/code/oumi/analysis_output/openassistant"

from oumi.utils.analysis_utils import (
    load_analyzer_artifacts,
    regenerate_recommendations,
)

artifacts = load_analyzer_artifacts(OUTPUT_PATH)

# Regenerate recommendations with latest code (e.g., updated duplicate detection)
artifacts = regenerate_recommendations(artifacts, outlier_threshold=3.0)

artifacts.keys()

# Generate HTML report if configured


try:
    from oumi.core.analyze.report_generator import HTMLReportGenerator

    report_gen = HTMLReportGenerator()
    report_path = report_gen.generate_report(
        artifacts=artifacts,
        output_path=OUTPUT_PATH,
        title="OpenAssistant Analysis Report",
    )
    print(f"‚úÖ Generated HTML report at: {report_path / 'index.html'}")
except ImportError:
    print("‚ö†Ô∏è  Plotly not installed. Skipping HTML report generation.")
    print("   Install with: pip install 'oumi[analyze_advanced]'")
except Exception as e:
    print(f"‚ö†Ô∏è  Failed to generate HTML report: {e}")
    print("\nüîç FULL TRACEBACK:")
    print("=" * 70)
    traceback.print_exc()
    print("=" * 70)

print(f"\nüìÅ All results saved to: {OUTPUT_PATH}")

[2026-01-12 16:07:19,715][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1551] Loaded message analysis from: /Users/ryanarman/code/oumi/analysis_output/openassistant/messages_df


[2026-01-12 16:07:19,725][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1558] Loaded conversation analysis from: /Users/ryanarman/code/oumi/analysis_output/openassistant/conversations_df
[2026-01-12 16:07:19,751][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1565] Loaded merged analysis from: /Users/ryanarman/code/oumi/analysis_output/openassistant/merged_df
[2026-01-12 16:07:19,753][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1574] Loaded combined schemas from: /Users/ryanarman/code/oumi/analysis_output/openassistant/schema.json
[2026-01-12 16:07:19,754][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1596] Loaded analysis summary from: /Users/ryanarman/code/oumi/analysis_output/openassistant/analysis_summary.json
[2026-01-12 16:07:19,755][oumi.utils.analysis_utils][rank0][pid:1937][MainThread][INFO]][analysis_utils.py:1598] Loaded analyzer artifa

# Conv

In [7]:
schema = artifacts["schemas"]["conversation_schema"]
conv_df = artifacts["conversations_df"]
conv_df.columns

Index(['conversation_index', 'conversation_id', 'num_messages',
       'conversation_text_content',
       'conversation_text_content__length__token_count',
       'conversation_text_content__diversity__unique_words_ratio',
       'conversation_text_content__format__has_markdown',
       'conversation_text_content__format__has_json',
       'conversation_text_content__format__has_code_blocks',
       'conversation_text_content__format__code_block_count',
       'conversation_text_content__format__code_block_languages',
       'conversation_text_content__format__has_urls',
       'conversation_text_content__format__has_emails',
       'conversation_text_content__format__format_complexity_score',
       'conversation_text_content__quality__has_pii',
       'conversation_text_content__quality__pii_types',
       'conversation_text_content__quality__pii_count',
       'conversation_text_content__quality__has_encoding_issues',
       'conversation_text_content__quality__repetition_ratio',
 

In [8]:
from oumi.core.analyze.column_utils import (
    filter_analyzer_columns,
    parse_analyzer_column_name,
)

row = conv_df.iloc[0]
conv_columns = conv_df.columns

# Choose the analzyer to analyze
analyzer_name = "fasttext"


filtered_cols = filter_analyzer_columns(conv_columns, analyzer_id=analyzer_name)
filtered_cols

if filtered_cols:
    print(f"Analyzer: {analyzer_name}")
    info = parse_analyzer_column_name(filtered_cols[0])
    print("\nInput:")
    print(f"source_column: {info.source_column}")
    print(f"{row[info.source_column]}\n")

    for col in filtered_cols:
        info = parse_analyzer_column_name(col)
        print(f"metric: {info.metric_name}")
        # print(f"type: {schema[col]['type']}")
        # print(f"content_type: {schema[col]['content_type']}")
        print(f"description: {schema[col]['description']}")
        print(f"value: {row[col]}")
        print("\n")
else:
    print(f"No columns found for analyzer: {analyzer_name}")

Analyzer: fasttext

Input:
source_column: conversation_text_content
USER: Can you write a short introduction about the relevance of the term "monopsony" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.
ASSISTANT: Monopsony is a market structure in which there is a single buyer in a market. In the context of labor markets, a monopsony occurs when there is only one employer in a particular industry or geographic area, giving that employer significant power over the wages and working conditions of workers. This results in a situation where the employer has the ability to suppress wages and reduce the bargaining power of workers.

Research has shown that the existence of monopsony power in labor markets can have negative effects on workers, such as lower wages, reduced benefits, and limited job opportunities. For example, a study by Autor, Dorn, and Hanson (2013) found evidence of monopsony power in certain U.S. labor marke

## Msg

In [9]:
schema_msg = artifacts["schemas"]["message_schema"]
msg_df = artifacts["messages_df"]
msg_df.columns

from oumi.core.analyze.column_utils import (
    filter_analyzer_columns,
    parse_analyzer_column_name,
)

row = msg_df.iloc[20]
msg_columns = msg_df.columns

# Choose the analzyer to analyze
analyzer_name = "fasttext"


filtered_cols = filter_analyzer_columns(msg_columns, analyzer_id=analyzer_name)


if filtered_cols:
    print(f"Analyzer: {analyzer_name}")
    info = parse_analyzer_column_name(filtered_cols[0])
    print("\nInput:")
    print(f"source_column: {info.source_column}")
    print(f"{row[info.source_column]}\n")

    for col in filtered_cols:
        info = parse_analyzer_column_name(col)
        print(f"metric: {info.metric_name}")
        # print(f"type: {schema[col]['type']}")
        # print(f"content_type: {schema[col]['content_type']}")
        print(f"description: {schema_msg[col]['description']}")
        print(f"value: {row[col]}")
        print("\n")
else:
    print(f"No columns found for analyzer: {analyzer_name}")

Analyzer: fasttext

Input:
source_column: text_content
Seg√∫n Jean Piaget, estas son las 4 etapas del desarrollo cognitivo:
1. Etapa sensiomotriz (0 a 2 a√±os). Durante esta etapa, los ni√±os interact√∫an f√≠sicamente con su entorno a trav√©s de juegos y experimentaci√≥n.
2. Etapa preoperacional (2 a 7 a√±os). Durante esta etapa, los ni√±os pueden ponerse en el lugar de los dem√°s y jugar a hacer juegos de rol. Sin embargo, a√∫n tienen dificultades para acceder a pensamientos m√°s abstractos y a√∫n presentan egocentrismo.
3. Etapa de operaciones concretas (7 a 12 a√±os). Durante esta etapa, los ni√±os pueden usar la l√≥gica para llegar a conclusiones v√°lidas, pero solo en situaciones concretas. Tambi√©n pueden categorizar aspectos de la realidad de una manera m√°s compleja y el pensamiento se vuelve menos egoc√©ntrico.
4. Etapa de operaciones formales (desde los 12 a√±os hasta la vida adulta). Durante esta etapa, los ni√±os pueden utilizar la l√≥gica para llegar a conclusiones abstrac